Skip to content

Commit

Permalink
feat: Added handling of Remote Unavailable error
Browse files Browse the repository at this point in the history
  • Loading branch information
d.anchikov committed Jul 6, 2022
1 parent bcc4b92 commit c5b979e
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 35 deletions.
39 changes: 24 additions & 15 deletions checker/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,17 @@ func (triggerChecker *TriggerChecker) handleFetchError(checkData moira.CheckData
checkData.UpdateScore()
return triggerChecker.database.SetTriggerLastCheck(triggerChecker.triggerID, &checkData, triggerChecker.trigger.IsRemote)
}
case remote.ErrRemoteTriggerResponse:
timeSinceLastSuccessfulCheck := checkData.Timestamp - checkData.LastSuccessfulCheckTimestamp
if timeSinceLastSuccessfulCheck >= triggerChecker.ttl {
checkData.State = moira.StateEXCEPTION
checkData.Message = fmt.Sprintf("Remote server unavailable. Trigger is not checked for %d seconds", timeSinceLastSuccessfulCheck)
checkData, err = triggerChecker.compareTriggerStates(checkData)
}
case local.ErrUnknownFunction, local.ErrEvalExpr, remote.ErrRemoteTriggerResponse, remote.ErrRemoteUnavailable:
triggerChecker.logger.Warning(formatTriggerCheckException(triggerChecker.triggerID, err))
case local.ErrUnknownFunction, local.ErrEvalExpr:
checkData.State = moira.StateEXCEPTION
checkData.Message = err.Error()
triggerChecker.logger.Warning(formatTriggerCheckException(triggerChecker.triggerID, err))
if _, ok := err.(remote.ErrRemoteUnavailable); ok {
timeSinceLastSuccessfulCheck := checkData.Timestamp - checkData.LastSuccessfulCheckTimestamp
checkData.Message = fmt.Sprintf(
"Remote server unavailable. Trigger is not checked for %d seconds",
timeSinceLastSuccessfulCheck,
)
}
default:
return triggerChecker.handleUndefinedError(checkData, err)
}
Expand Down Expand Up @@ -235,8 +234,12 @@ func (triggerChecker *TriggerChecker) preparePatternMetrics(fetchedMetrics conve
}

// check is the function that handles check on prepared metrics.
func (triggerChecker *TriggerChecker) check(metrics map[string]map[string]metricSource.MetricData,
aloneMetrics map[string]metricSource.MetricData, checkData moira.CheckData, logger moira.Logger) (moira.CheckData, error) {
func (triggerChecker *TriggerChecker) check(
metrics map[string]map[string]metricSource.MetricData,
aloneMetrics map[string]metricSource.MetricData,
checkData moira.CheckData,
logger moira.Logger,
) (moira.CheckData, error) {
if len(metrics) == 0 { // Case when trigger have only alone metrics
if metrics == nil {
metrics = make(map[string]map[string]metricSource.MetricData, 1)
Expand Down Expand Up @@ -264,8 +267,11 @@ func (triggerChecker *TriggerChecker) check(metrics map[string]map[string]metric
}

// checkTargets is a Function that takes a
func (triggerChecker *TriggerChecker) checkTargets(metricName string, metrics map[string]metricSource.MetricData,
logger moira.Logger) (lastState moira.MetricState, needToDeleteMetric bool, err error) {
func (triggerChecker *TriggerChecker) checkTargets(
metricName string,
metrics map[string]metricSource.MetricData,
logger moira.Logger,
) (lastState moira.MetricState, needToDeleteMetric bool, err error) {
lastState, metricStates, err := triggerChecker.getMetricStepsStates(metricName, metrics, logger)
if err != nil {
return lastState, needToDeleteMetric, err
Expand Down Expand Up @@ -308,8 +314,11 @@ func (triggerChecker *TriggerChecker) checkForNoData(metricLastState moira.Metri
)
}

func (triggerChecker *TriggerChecker) getMetricStepsStates(metricName string, metrics map[string]metricSource.MetricData,
logger moira.Logger) (last moira.MetricState, current []moira.MetricState, err error) {
func (triggerChecker *TriggerChecker) getMetricStepsStates(
metricName string,
metrics map[string]metricSource.MetricData,
logger moira.Logger,
) (last moira.MetricState, current []moira.MetricState, err error) {
var startTime int64
var stepTime int64

Expand Down
177 changes: 157 additions & 20 deletions checker/check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (
"testing"
"time"

"github.com/moira-alert/moira/metric_source/remote"

"github.com/golang/mock/gomock"
"github.com/moira-alert/moira"
"github.com/moira-alert/moira/checker/metrics/conversion"
Expand Down Expand Up @@ -619,6 +621,7 @@ func TestCheck(t *testing.T) {
source := mock_metric_source.NewMockMetricSource(mockCtrl)
fetchResult := mock_metric_source.NewMockFetchResult(mockCtrl)
logger, _ := logging.GetLogger("Test")
logger.Level("fatal") // nolint: errcheck
defer mockCtrl.Finish()

var retention int64 = 10
Expand All @@ -627,8 +630,6 @@ func TestCheck(t *testing.T) {
var errValue float64 = 20
pattern := "super.puper.pattern" //nolint
metric := "super.puper.metric" //nolint
message := "ooops, metric error"
metricErr := fmt.Errorf(message)
messageException := `Unknown graphite function: "WrongFunction"`
unknownFunctionExc := local.ErrorUnknownFunction(fmt.Errorf(messageException))

Expand Down Expand Up @@ -667,31 +668,56 @@ func TestCheck(t *testing.T) {
},
}

Convey("Fetch error", func() {
lastCheck := moira.CheckData{
Convey("trigger should switch to EXCEPTION", func() {
expectedLastCheck := moira.CheckData{
Metrics: triggerChecker.lastCheck.Metrics,
State: moira.StateEXCEPTION,
Timestamp: triggerChecker.until,
EventTimestamp: triggerChecker.until,
Score: int64(100000),
Message: metricErr.Error(),
MetricsToTargetRelation: map[string]string{},
}

gomock.InOrder(
source.EXPECT().Fetch(pattern, triggerChecker.from, triggerChecker.until, true).Return(nil, metricErr),
dataBase.EXPECT().PushNotificationEvent(&moira.NotificationEvent{
IsTriggerEvent: true,
TriggerID: triggerChecker.triggerID,
State: moira.StateEXCEPTION,
OldState: moira.StateOK,
Timestamp: int64(67),
Metric: triggerChecker.trigger.Name,
}, true).Return(nil),
dataBase.EXPECT().SetTriggerLastCheck(triggerChecker.triggerID, &lastCheck, triggerChecker.trigger.IsRemote).Return(nil),
)
err := triggerChecker.Check()
So(err, ShouldBeNil)
expectedNotification := &moira.NotificationEvent{
IsTriggerEvent: true,
TriggerID: triggerChecker.triggerID,
State: moira.StateEXCEPTION,
OldState: moira.StateOK,
Timestamp: int64(67),
Metric: triggerChecker.trigger.Name,
}
Convey("on undefined fetching error", func() {
fetchErr := fmt.Errorf("some error")
expectedLastCheck.Message = fetchErr.Error()
gomock.InOrder(
source.EXPECT().Fetch(pattern, triggerChecker.from, triggerChecker.until, true).Return(nil, fetchErr),
dataBase.EXPECT().PushNotificationEvent(expectedNotification, true).Return(nil),
dataBase.EXPECT().SetTriggerLastCheck(triggerChecker.triggerID, &expectedLastCheck, triggerChecker.trigger.IsRemote).Return(nil),
)
err := triggerChecker.Check()
So(err, ShouldBeNil)
})
Convey("on ErrRemoteUnavailable fetching error", func() {
fetchErr := remote.ErrRemoteUnavailable{InternalError: fmt.Errorf("some error")}
expectedLastCheck.Message = "Remote server unavailable. Trigger is not checked for 67 seconds"
gomock.InOrder(
source.EXPECT().Fetch(pattern, triggerChecker.from, triggerChecker.until, true).Return(nil, fetchErr),
dataBase.EXPECT().PushNotificationEvent(expectedNotification, true).Return(nil),
dataBase.EXPECT().SetTriggerLastCheck(triggerChecker.triggerID, &expectedLastCheck, triggerChecker.trigger.IsRemote).Return(nil),
)
err := triggerChecker.Check()
So(err, ShouldBeNil)
})
Convey("on ErrRemoteTriggerResponse fetching error", func() {
fetchErr := remote.ErrRemoteTriggerResponse{InternalError: fmt.Errorf("some error")}
expectedLastCheck.Message = fetchErr.Error()
gomock.InOrder(
source.EXPECT().Fetch(pattern, triggerChecker.from, triggerChecker.until, true).Return(nil, fetchErr),
dataBase.EXPECT().PushNotificationEvent(expectedNotification, true).Return(nil),
dataBase.EXPECT().SetTriggerLastCheck(triggerChecker.triggerID, &expectedLastCheck, triggerChecker.trigger.IsRemote).Return(nil),
)
err := triggerChecker.Check()
So(err, ShouldBeNil)
})
})

Convey("Switch trigger to EXCEPTION and back", func() {
Expand Down Expand Up @@ -1497,3 +1523,114 @@ func TestTriggerChecker_handlePrepareError(t *testing.T) {
})
})
}

func TestTriggerChecker_handleFetchError(t *testing.T) {
Convey("Test handleFetchError", t, func() {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
dataBase := mock_moira_alert.NewMockDatabase(mockCtrl)
logger, _ := logging.GetLogger("Test")
logger.Level("error") // nolint: errcheck

trigger := &moira.Trigger{}
triggerChecker := TriggerChecker{
triggerID: "test trigger",
trigger: trigger,
database: dataBase,
logger: logger,
ttlState: moira.TTLStateNODATA,
}
checkData := moira.CheckData{}

Convey("with ErrTriggerHasEmptyTargets", func() {
err := ErrTriggerHasEmptyTargets{}
checkData.Timestamp = int64(15)
triggerChecker.lastCheck = &moira.CheckData{
State: moira.StateOK,
EventTimestamp: 10,
}
expectedCheckData := moira.CheckData{
Score: 1000,
State: moira.StateNODATA,
Message: err.Error(),
Timestamp: int64(15),
}
dataBase.EXPECT().SetTriggerLastCheck("test trigger", &expectedCheckData, false)
actualErr := triggerChecker.handleFetchError(checkData, err)
So(actualErr, ShouldBeNil)
})
Convey("with ErrTriggerHasOnlyWildcards", func() {
err := ErrTriggerHasOnlyWildcards{}
checkData.Timestamp = int64(15)
triggerChecker.lastCheck = &moira.CheckData{
State: moira.StateOK,
EventTimestamp: 10,
}
expectedCheckData := moira.CheckData{
Score: 1000,
State: moira.StateNODATA,
Message: err.Error(),
Timestamp: int64(15),
}
dataBase.EXPECT().SetTriggerLastCheck("test trigger", &expectedCheckData, false)
actualErr := triggerChecker.handleFetchError(checkData, err)
So(actualErr, ShouldBeNil)
})
Convey("with ErrRemoteUnavailable", func() {
err := remote.ErrRemoteUnavailable{InternalError: fmt.Errorf("some error")}
checkData.Timestamp = int64(25)
checkData.LastSuccessfulCheckTimestamp = int64(5)
triggerChecker.lastCheck = &moira.CheckData{
State: moira.StateOK,
EventTimestamp: 10,
}
expectedCheckData := moira.CheckData{
Score: 100000,
State: moira.StateEXCEPTION,
Message: "Remote server unavailable. Trigger is not checked for 20 seconds",
Timestamp: int64(25),
EventTimestamp: int64(25),
LastSuccessfulCheckTimestamp: int64(5),
}
dataBase.EXPECT().PushNotificationEvent(&moira.NotificationEvent{
IsTriggerEvent: true,
TriggerID: triggerChecker.triggerID,
State: moira.StateEXCEPTION,
OldState: getEventOldState(moira.StateOK, "", false),
Timestamp: 25,
Metric: triggerChecker.trigger.Name,
MessageEventInfo: nil,
}, true)
dataBase.EXPECT().SetTriggerLastCheck("test trigger", &expectedCheckData, false)
actualErr := triggerChecker.handleFetchError(checkData, err)
So(actualErr, ShouldBeNil)
})
Convey("with ErrRemoteTriggerResponse", func() {
err := remote.ErrRemoteTriggerResponse{InternalError: fmt.Errorf("some error")}
checkData.Timestamp = int64(10)
triggerChecker.lastCheck = &moira.CheckData{
State: moira.StateOK,
EventTimestamp: 10,
}
expectedCheckData := moira.CheckData{
Score: 100000,
State: moira.StateEXCEPTION,
Message: err.Error(),
Timestamp: 10,
EventTimestamp: 10,
}
dataBase.EXPECT().PushNotificationEvent(&moira.NotificationEvent{
IsTriggerEvent: true,
TriggerID: triggerChecker.triggerID,
State: moira.StateEXCEPTION,
OldState: getEventOldState(moira.StateOK, "", false),
Timestamp: 10,
Metric: triggerChecker.trigger.Name,
MessageEventInfo: nil,
}, true)
dataBase.EXPECT().SetTriggerLastCheck("test trigger", &expectedCheckData, false)
actualErr := triggerChecker.handleFetchError(checkData, err)
So(actualErr, ShouldBeNil)
})
})
}

0 comments on commit c5b979e

Please sign in to comment.