Skip to content

Commit

Permalink
feat: Added retry logic to remote api request
Browse files Browse the repository at this point in the history
  • Loading branch information
d.anchikov committed Jul 6, 2022
1 parent 472b37d commit c694629
Show file tree
Hide file tree
Showing 12 changed files with 536 additions and 63 deletions.
2 changes: 1 addition & 1 deletion api/handler/triggers.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func getTriggerFromRequest(request *http.Request) (*dto.Trigger, *api.ErrorRespo
return nil, api.ErrorInvalidRequest(err)
case remote.ErrRemoteTriggerResponse:
response := api.ErrorRemoteServerUnavailable(err)
middleware.GetLoggerEntry(request).Error("%s : %s : %s", response.StatusText, response.ErrorText, err)
middleware.GetLoggerEntry(request).Errorf("%s : %s : %s", response.StatusText, response.ErrorText, err)
return nil, response
default:
return nil, api.ErrorInternalServer(err)
Expand Down
5 changes: 5 additions & 0 deletions clock/clock.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@ func NewSystemClock() *SystemClock {
func (t *SystemClock) Now() time.Time {
return time.Now().UTC()
}

// Sleep pauses the current goroutine for at least the passed duration
func (t *SystemClock) Sleep(duration time.Duration) {
time.Sleep(duration)
}
7 changes: 4 additions & 3 deletions cmd/checker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,10 @@ func getDefault() config {
Pprof: cmd.ProfilerConfig{Enabled: false},
},
Remote: cmd.RemoteConfig{
CheckInterval: "60s",
Timeout: "60s",
MetricsTTL: "7d",
CheckInterval: "60s",
Timeout: "60s",
MetricsTTL: "168h",
HealthCheckRetrySeconds: "1 1",
},
}
}
40 changes: 33 additions & 7 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package cmd
import (
"fmt"
"io/ioutil"
"strconv"
"strings"
"time"

"github.com/moira-alert/moira/metrics"

Expand Down Expand Up @@ -113,6 +115,12 @@ type RemoteConfig struct {
MetricsTTL string `yaml:"metrics_ttl"`
// Timeout for remote requests
Timeout string `yaml:"timeout"`
// Retry seconds for remote requests divided by spaces
RetrySeconds string `yaml:"retry_seconds"`
// HealthCheckTimeout is timeout for remote api health check requests
HealthCheckTimeout string `yaml:"health_check_timeout"`
// Retry seconds for remote api health check requests divided by spaces
HealthCheckRetrySeconds string `yaml:"health_check_retry_seconds"`
// Username for basic auth
User string `yaml:"user"`
// Password for basic auth
Expand All @@ -129,16 +137,34 @@ type ImageStoreConfig struct {
// GetRemoteSourceSettings returns remote config parsed from moira config files
func (config *RemoteConfig) GetRemoteSourceSettings() *remoteSource.Config {
return &remoteSource.Config{
URL: config.URL,
CheckInterval: to.Duration(config.CheckInterval),
MetricsTTL: to.Duration(config.MetricsTTL),
Timeout: to.Duration(config.Timeout),
User: config.User,
Password: config.Password,
Enabled: config.Enabled,
URL: config.URL,
CheckInterval: to.Duration(config.CheckInterval),
MetricsTTL: to.Duration(config.MetricsTTL),
Timeout: to.Duration(config.Timeout),
RetrySeconds: ParseRetrySeconds(config.RetrySeconds),
HealthCheckTimeout: to.Duration(config.HealthCheckTimeout),
HealthCheckRetrySeconds: ParseRetrySeconds(config.HealthCheckRetrySeconds),
User: config.User,
Password: config.Password,
Enabled: config.Enabled,
}
}

// ParseRetrySeconds parses config value string into array of integers
func ParseRetrySeconds(retrySecondsString string) []time.Duration {
secondsStringList := strings.Fields(retrySecondsString)
retrySecondsIntList := make([]time.Duration, len(secondsStringList))

for index, secondsString := range secondsStringList {
secondsInt, err := strconv.Atoi(secondsString)
if err != nil {
panic(err)
}
retrySecondsIntList[index] = time.Second * time.Duration(secondsInt)
}
return retrySecondsIntList
}

// ReadConfig parses config file by the given path into Moira-used type
func ReadConfig(configFileName string, config interface{}) error {
configYaml, err := ioutil.ReadFile(configFileName)
Expand Down
1 change: 1 addition & 0 deletions interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,4 +224,5 @@ type PlotTheme interface {
// Clock is an interface to work with Time.
type Clock interface {
Now() time.Time
Sleep(duration time.Duration)
}
5 changes: 4 additions & 1 deletion local/checker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ remote:
enabled: true
url: "http://graphite:80/render"
check_interval: 60s
metrics_ttl: 168h
timeout: 60s
metrics_ttl: 7d
retry_seconds: 1 1 1
health_check_timeout: 6s
health_check_retry_seconds: 1 1 1
checker:
nodata_check_interval: 60s
check_interval: 10s
Expand Down
17 changes: 10 additions & 7 deletions metric_source/remote/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@ import "time"

// Config represents config from remote storage
type Config struct {
URL string
CheckInterval time.Duration
MetricsTTL time.Duration
Timeout time.Duration
User string
Password string
Enabled bool
URL string
CheckInterval time.Duration
MetricsTTL time.Duration
Timeout time.Duration
RetrySeconds []time.Duration
HealthCheckTimeout time.Duration
HealthCheckRetrySeconds []time.Duration
User string
Password string
Enabled bool
}

// isEnabled checks that remote config is enabled (url is defined and enabled flag is set)
Expand Down
37 changes: 27 additions & 10 deletions metric_source/remote/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"net/http"
"time"

"github.com/moira-alert/moira/clock"

"github.com/moira-alert/moira"
metricSource "github.com/moira-alert/moira/metric_source"
)
Expand All @@ -23,17 +25,30 @@ func (err ErrRemoteTriggerResponse) Error() string {
return err.InternalError.Error()
}

// ErrRemoteUnavailable is a custom error when remote trigger check fails
type ErrRemoteUnavailable struct {
InternalError error
Target string
}

// Error is a representation of Error interface method
func (err ErrRemoteUnavailable) Error() string {
return err.InternalError.Error()
}

// Remote is implementation of MetricSource interface, which implements fetch metrics method from remote graphite installation
type Remote struct {
config *Config
client *http.Client
clock moira.Clock
}

// Create configures remote metric source
func Create(config *Config) metricSource.MetricSource {
return &Remote{
config: config,
client: &http.Client{Timeout: config.Timeout},
clock: clock.NewSystemClock(),
}
}

Expand All @@ -50,9 +65,15 @@ func (remote *Remote) Fetch(target string, from, until int64, allowRealTimeAlert
Target: target,
}
}
body, err := remote.makeRequest(req)
body, isRemoteAvailable, err := remote.makeRequestWithRetries(req, remote.config.Timeout, remote.config.RetrySeconds)
if err != nil {
return nil, ErrRemoteTriggerResponse{
if isRemoteAvailable {
return nil, ErrRemoteTriggerResponse{
InternalError: err,
Target: target,
}
}
return nil, ErrRemoteUnavailable{
InternalError: err,
Target: target,
}
Expand Down Expand Up @@ -83,18 +104,14 @@ func (remote *Remote) IsConfigured() (bool, error) {

// IsRemoteAvailable checks if graphite API is available and returns 200 response
func (remote *Remote) IsRemoteAvailable() (bool, error) {
maxRetries := 3
until := time.Now().Unix()
from := until - 600 //nolint
req, err := remote.prepareRequest(from, until, "NonExistingTarget")
if err != nil {
return false, err
}
for attempt := 0; attempt < maxRetries; attempt++ {
_, err = remote.makeRequest(req)
if err == nil {
return true, nil
}
}
return false, err
_, isRemoteAvailable, err := remote.makeRequestWithRetries(
req, remote.config.HealthCheckTimeout, remote.config.HealthCheckRetrySeconds,
)
return isRemoteAvailable, err
}
Loading

0 comments on commit c694629

Please sign in to comment.