Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Added retry logic to remote api request #760

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/handler/triggers.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func getTriggerFromRequest(request *http.Request) (*dto.Trigger, *api.ErrorRespo
return nil, api.ErrorInvalidRequest(err)
case remote.ErrRemoteTriggerResponse:
response := api.ErrorRemoteServerUnavailable(err)
middleware.GetLoggerEntry(request).Error("%s : %s : %s", response.StatusText, response.ErrorText, err)
middleware.GetLoggerEntry(request).Errorf("%s : %s : %s", response.StatusText, response.ErrorText, err)
return nil, response
default:
return nil, api.ErrorInternalServer(err)
Expand Down
5 changes: 5 additions & 0 deletions clock/clock.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@ func NewSystemClock() *SystemClock {
func (t *SystemClock) Now() time.Time {
return time.Now().UTC()
}

// Sleep pauses the current goroutine for at least the passed duration
func (t *SystemClock) Sleep(duration time.Duration) {
time.Sleep(duration)
}
8 changes: 5 additions & 3 deletions cmd/checker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,11 @@ func getDefault() config {
Pprof: cmd.ProfilerConfig{Enabled: false},
},
Remote: cmd.RemoteConfig{
CheckInterval: "60s",
Timeout: "60s",
MetricsTTL: "7d",
CheckInterval: "60s",
Timeout: "60s",
MetricsTTL: "168h",
HealthCheckTimeout: "60s",
HealthCheckRetrySeconds: "1 1",
},
}
}
40 changes: 33 additions & 7 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package cmd
import (
"fmt"
"io/ioutil"
"strconv"
"strings"
"time"

"github.com/moira-alert/moira/metrics"

Expand Down Expand Up @@ -113,6 +115,12 @@ type RemoteConfig struct {
MetricsTTL string `yaml:"metrics_ttl"`
// Timeout for remote requests
Timeout string `yaml:"timeout"`
// Retry seconds for remote requests divided by spaces
RetrySeconds string `yaml:"retry_seconds"`
// HealthCheckTimeout is timeout for remote api health check requests
HealthCheckTimeout string `yaml:"health_check_timeout"`
// Retry seconds for remote api health check requests divided by spaces
HealthCheckRetrySeconds string `yaml:"health_check_retry_seconds"`
// Username for basic auth
User string `yaml:"user"`
// Password for basic auth
Expand All @@ -129,16 +137,34 @@ type ImageStoreConfig struct {
// GetRemoteSourceSettings returns remote config parsed from moira config files
func (config *RemoteConfig) GetRemoteSourceSettings() *remoteSource.Config {
return &remoteSource.Config{
URL: config.URL,
CheckInterval: to.Duration(config.CheckInterval),
MetricsTTL: to.Duration(config.MetricsTTL),
Timeout: to.Duration(config.Timeout),
User: config.User,
Password: config.Password,
Enabled: config.Enabled,
URL: config.URL,
CheckInterval: to.Duration(config.CheckInterval),
MetricsTTL: to.Duration(config.MetricsTTL),
Timeout: to.Duration(config.Timeout),
RetrySeconds: ParseRetrySeconds(config.RetrySeconds),
dmitryanchikov marked this conversation as resolved.
Show resolved Hide resolved
HealthCheckTimeout: to.Duration(config.HealthCheckTimeout),
HealthCheckRetrySeconds: ParseRetrySeconds(config.HealthCheckRetrySeconds),
User: config.User,
Password: config.Password,
Enabled: config.Enabled,
}
}

// ParseRetrySeconds parses config value string into array of integers
func ParseRetrySeconds(retrySecondsString string) []time.Duration {
secondsStringList := strings.Fields(retrySecondsString)
retrySecondsIntList := make([]time.Duration, len(secondsStringList))

for index, secondsString := range secondsStringList {
secondsInt, err := strconv.Atoi(secondsString)
if err != nil {
panic(err)
}
retrySecondsIntList[index] = time.Second * time.Duration(secondsInt)
}
return retrySecondsIntList
}

// ReadConfig parses config file by the given path into Moira-used type
func ReadConfig(configFileName string, config interface{}) error {
configYaml, err := ioutil.ReadFile(configFileName)
Expand Down
1 change: 1 addition & 0 deletions interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,4 +227,5 @@ type PlotTheme interface {
// Clock is an interface to work with Time.
type Clock interface {
Now() time.Time
Sleep(duration time.Duration)
}
5 changes: 4 additions & 1 deletion local/checker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ remote:
enabled: true
url: "http://graphite:80/render"
check_interval: 60s
metrics_ttl: 168h
dmitryanchikov marked this conversation as resolved.
Show resolved Hide resolved
timeout: 60s
metrics_ttl: 7d
retry_seconds: 1 1 1
health_check_timeout: 6s
health_check_retry_seconds: 1 1 1
checker:
nodata_check_interval: 60s
check_interval: 10s
Expand Down
17 changes: 10 additions & 7 deletions metric_source/remote/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@ import "time"

// Config represents config from remote storage
type Config struct {
URL string
CheckInterval time.Duration
MetricsTTL time.Duration
Timeout time.Duration
User string
Password string
Enabled bool
URL string
CheckInterval time.Duration
MetricsTTL time.Duration
Timeout time.Duration
RetrySeconds []time.Duration
HealthCheckTimeout time.Duration
HealthCheckRetrySeconds []time.Duration
User string
Password string
Enabled bool
}

// isEnabled checks that remote config is enabled (url is defined and enabled flag is set)
Expand Down
37 changes: 27 additions & 10 deletions metric_source/remote/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"net/http"
"time"

"github.com/moira-alert/moira/clock"

"github.com/moira-alert/moira"
metricSource "github.com/moira-alert/moira/metric_source"
)
Expand All @@ -23,17 +25,30 @@ func (err ErrRemoteTriggerResponse) Error() string {
return err.InternalError.Error()
}

// ErrRemoteUnavailable is a custom error when remote trigger check fails
type ErrRemoteUnavailable struct {
InternalError error
Target string
}

// Error is a representation of Error interface method
func (err ErrRemoteUnavailable) Error() string {
return err.InternalError.Error()
}

// Remote is implementation of MetricSource interface, which implements fetch metrics method from remote graphite installation
type Remote struct {
config *Config
client *http.Client
clock moira.Clock
}

// Create configures remote metric source
func Create(config *Config) metricSource.MetricSource {
return &Remote{
config: config,
client: &http.Client{Timeout: config.Timeout},
clock: clock.NewSystemClock(),
}
}

Expand All @@ -50,9 +65,15 @@ func (remote *Remote) Fetch(target string, from, until int64, allowRealTimeAlert
Target: target,
}
}
body, err := remote.makeRequest(req)
body, isRemoteAvailable, err := remote.makeRequestWithRetries(req, remote.config.Timeout, remote.config.RetrySeconds)
if err != nil {
return nil, ErrRemoteTriggerResponse{
if isRemoteAvailable {
return nil, ErrRemoteTriggerResponse{
InternalError: err,
Target: target,
}
}
return nil, ErrRemoteUnavailable{
InternalError: err,
Target: target,
}
Expand Down Expand Up @@ -83,18 +104,14 @@ func (remote *Remote) IsConfigured() (bool, error) {

// IsRemoteAvailable checks if graphite API is available and returns 200 response
func (remote *Remote) IsRemoteAvailable() (bool, error) {
maxRetries := 3
until := time.Now().Unix()
from := until - 600 //nolint
req, err := remote.prepareRequest(from, until, "NonExistingTarget")
if err != nil {
return false, err
}
for attempt := 0; attempt < maxRetries; attempt++ {
_, err = remote.makeRequest(req)
if err == nil {
return true, nil
}
}
return false, err
_, isRemoteAvailable, err := remote.makeRequestWithRetries(
req, remote.config.HealthCheckTimeout, remote.config.HealthCheckRetrySeconds,
)
return isRemoteAvailable, err
}
Loading