From 656b09d1be1a8ac3705c2293a21f312d0ea12f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=96=E7=95=8C?= Date: Sun, 15 Mar 2026 13:03:31 +0800 Subject: [PATCH] ccm,ocm: never treat external usage endpoint failures as over-limit --- service/ccm/credential_external.go | 47 +++++------------------------- service/ocm/credential_external.go | 47 +++++------------------------- 2 files changed, 16 insertions(+), 78 deletions(-) diff --git a/service/ccm/credential_external.go b/service/ccm/credential_external.go index a9876eaf5..9a3ca1e16 100644 --- a/service/ccm/credential_external.go +++ b/service/ccm/credential_external.go @@ -549,10 +549,8 @@ func (c *externalCredential) pollUsage(ctx context.Context) { response, err := c.doPollUsageRequest(ctx) if err != nil { - if !c.isPollBackoffAtCap() { - c.logger.Error("poll usage for ", c.tag, ": ", err) - } - c.incrementPollFailures() + c.logger.Debug("poll usage for ", c.tag, ": ", err) + c.clearPollFailures() return } defer response.Body.Close() @@ -560,16 +558,7 @@ func (c *externalCredential) pollUsage(ctx context.Context) { if response.StatusCode != http.StatusOK { body, _ := io.ReadAll(response.Body) c.logger.Debug("poll usage for ", c.tag, ": status ", response.StatusCode, " ", string(body)) - // 404 means the remote does not have a status endpoint yet; - // usage will be updated passively from response headers. - if response.StatusCode == http.StatusNotFound { - c.stateAccess.Lock() - c.state.consecutivePollFailures = 0 - c.checkTransitionLocked() - c.stateAccess.Unlock() - } else { - c.incrementPollFailures() - } + c.clearPollFailures() return } @@ -581,7 +570,7 @@ func (c *externalCredential) pollUsage(ctx context.Context) { err = json.NewDecoder(response.Body).Decode(&statusResponse) if err != nil { c.logger.Debug("poll usage for ", c.tag, ": decode: ", err) - c.incrementPollFailures() + c.clearPollFailures() return } @@ -625,34 +614,14 @@ func (c *externalCredential) markUsagePollAttempted() { } func (c *externalCredential) pollBackoff(baseInterval time.Duration) time.Duration { - c.stateAccess.RLock() - failures := c.state.consecutivePollFailures - c.stateAccess.RUnlock() - if failures <= 0 { - return baseInterval - } - backoff := failedPollRetryInterval * time.Duration(1<<(failures-1)) - if backoff > httpRetryMaxBackoff { - return httpRetryMaxBackoff - } - return backoff + return baseInterval } -func (c *externalCredential) isPollBackoffAtCap() bool { - c.stateAccess.RLock() - defer c.stateAccess.RUnlock() - failures := c.state.consecutivePollFailures - return failures > 0 && failedPollRetryInterval*time.Duration(1<<(failures-1)) >= httpRetryMaxBackoff -} - -func (c *externalCredential) incrementPollFailures() { +func (c *externalCredential) clearPollFailures() { c.stateAccess.Lock() - c.state.consecutivePollFailures++ - shouldInterrupt := c.checkTransitionLocked() + c.state.consecutivePollFailures = 0 + c.checkTransitionLocked() c.stateAccess.Unlock() - if shouldInterrupt { - c.interruptConnections() - } } func (c *externalCredential) usageTrackerOrNil() *AggregatedUsage { diff --git a/service/ocm/credential_external.go b/service/ocm/credential_external.go index bcfe6d234..d924ae4ee 100644 --- a/service/ocm/credential_external.go +++ b/service/ocm/credential_external.go @@ -588,10 +588,8 @@ func (c *externalCredential) pollUsage(ctx context.Context) { response, err := c.doPollUsageRequest(ctx) if err != nil { - if !c.isPollBackoffAtCap() { - c.logger.Error("poll usage for ", c.tag, ": ", err) - } - c.incrementPollFailures() + c.logger.Debug("poll usage for ", c.tag, ": ", err) + c.clearPollFailures() return } defer response.Body.Close() @@ -599,16 +597,7 @@ func (c *externalCredential) pollUsage(ctx context.Context) { if response.StatusCode != http.StatusOK { body, _ := io.ReadAll(response.Body) c.logger.Debug("poll usage for ", c.tag, ": status ", response.StatusCode, " ", string(body)) - // 404 means the remote does not have a status endpoint yet; - // usage will be updated passively from response headers. - if response.StatusCode == http.StatusNotFound { - c.stateAccess.Lock() - c.state.consecutivePollFailures = 0 - c.checkTransitionLocked() - c.stateAccess.Unlock() - } else { - c.incrementPollFailures() - } + c.clearPollFailures() return } @@ -620,7 +609,7 @@ func (c *externalCredential) pollUsage(ctx context.Context) { err = json.NewDecoder(response.Body).Decode(&statusResponse) if err != nil { c.logger.Debug("poll usage for ", c.tag, ": decode: ", err) - c.incrementPollFailures() + c.clearPollFailures() return } @@ -664,34 +653,14 @@ func (c *externalCredential) markUsagePollAttempted() { } func (c *externalCredential) pollBackoff(baseInterval time.Duration) time.Duration { - c.stateAccess.RLock() - failures := c.state.consecutivePollFailures - c.stateAccess.RUnlock() - if failures <= 0 { - return baseInterval - } - backoff := failedPollRetryInterval * time.Duration(1<<(failures-1)) - if backoff > httpRetryMaxBackoff { - return httpRetryMaxBackoff - } - return backoff + return baseInterval } -func (c *externalCredential) isPollBackoffAtCap() bool { - c.stateAccess.RLock() - defer c.stateAccess.RUnlock() - failures := c.state.consecutivePollFailures - return failures > 0 && failedPollRetryInterval*time.Duration(1<<(failures-1)) >= httpRetryMaxBackoff -} - -func (c *externalCredential) incrementPollFailures() { +func (c *externalCredential) clearPollFailures() { c.stateAccess.Lock() - c.state.consecutivePollFailures++ - shouldInterrupt := c.checkTransitionLocked() + c.state.consecutivePollFailures = 0 + c.checkTransitionLocked() c.stateAccess.Unlock() - if shouldInterrupt { - c.interruptConnections() - } } func (c *externalCredential) usageTrackerOrNil() *AggregatedUsage {