engine: Retry should be stateful and add RetryReset
Make the retry meta param a bit more sane now that we can persist it between graph switches. This also unblocks us from pausing during retry loops.
This commit is contained in:
@@ -258,7 +258,9 @@ func (obj *Engine) Worker(vertex pgraph.Vertex) error {
|
||||
|
||||
// initialize or reinitialize the meta state for this resource uid
|
||||
if _, exists := obj.metas[engine.PtrUID(res)]; !exists || res.MetaParams().Reset {
|
||||
obj.metas[engine.PtrUID(res)] = &engine.MetaState{}
|
||||
obj.metas[engine.PtrUID(res)] = &engine.MetaState{
|
||||
CheckApplyRetry: res.MetaParams().Retry, // lookup the retry value
|
||||
}
|
||||
}
|
||||
|
||||
//defer close(obj.state[vertex].stopped) // done signal
|
||||
@@ -492,7 +494,7 @@ Loop:
|
||||
|
||||
// retry...
|
||||
var err error
|
||||
var retry = res.MetaParams().Retry // lookup the retry value
|
||||
//var retry = res.MetaParams().Retry // lookup the retry value
|
||||
var delay uint64
|
||||
RetryLoop:
|
||||
for { // retry loop
|
||||
@@ -542,21 +544,28 @@ Loop:
|
||||
if obj.Debug {
|
||||
obj.Logf("Process(%s): Return(%s)", vertex, engineUtil.CleanError(err))
|
||||
}
|
||||
if err == nil && res.MetaParams().RetryReset { // reset it on success!
|
||||
obj.metas[engine.PtrUID(res)].CheckApplyRetry = res.MetaParams().Retry // lookup the retry value
|
||||
}
|
||||
if err == nil {
|
||||
break RetryLoop
|
||||
}
|
||||
// we've got an error...
|
||||
delay = res.MetaParams().Delay
|
||||
|
||||
if retry < 0 { // infinite retries
|
||||
if obj.metas[engine.PtrUID(res)].CheckApplyRetry < 0 { // infinite retries
|
||||
continue
|
||||
}
|
||||
if retry > 0 { // don't decrement past 0
|
||||
retry--
|
||||
obj.state[vertex].init.Logf("retrying CheckApply after %.4f seconds (%d left)", float64(delay)/1000, retry)
|
||||
if obj.metas[engine.PtrUID(res)].CheckApplyRetry > 0 { // don't decrement past 0
|
||||
obj.metas[engine.PtrUID(res)].CheckApplyRetry--
|
||||
obj.state[vertex].init.Logf(
|
||||
"retrying CheckApply after %.4f seconds (%d left)",
|
||||
float64(delay)/1000,
|
||||
obj.metas[engine.PtrUID(res)].CheckApplyRetry,
|
||||
)
|
||||
continue
|
||||
}
|
||||
//if retry == 0 { // optional
|
||||
//if obj.metas[engine.PtrUID(res)].CheckApplyRetry == 0 { // optional
|
||||
// err = errwrap.Wrapf(err, "permanent process error")
|
||||
//}
|
||||
|
||||
|
||||
@@ -65,9 +65,16 @@ type MetaParams struct {
|
||||
// reason to want to do something differently for the Watch errors.
|
||||
|
||||
// Retry is the number of times to retry on error. Use -1 for infinite.
|
||||
// This value is used for both Watch and CheckApply.
|
||||
Retry int16 `yaml:"retry"`
|
||||
|
||||
// Delay is the number of milliseconds to wait between retries.
|
||||
// RetryReset resets the retry count for CheckApply if it succeeds. This
|
||||
// value is currently different from the count used for Watch.
|
||||
// TODO: Consider resetting retry count for watch if it sends an event?
|
||||
RetryReset bool `yaml:"retryreset"`
|
||||
|
||||
// Delay is the number of milliseconds to wait between retries. This
|
||||
// value is used for both Watch and CheckApply.
|
||||
Delay uint64 `yaml:"delay"`
|
||||
|
||||
// Poll is the number of seconds between poll intervals. Use 0 to Watch.
|
||||
@@ -227,4 +234,7 @@ func (obj *MetaParams) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
// changed a parameter (field) of the resource. This doesn't mean we don't want
|
||||
// to ever reset these counts. For that, flip on the reset meta param.
|
||||
type MetaState struct {
|
||||
|
||||
// CheckApplyRetry is the current retry count for CheckApply.
|
||||
CheckApplyRetry int16
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user