Skip to content

Commit

Permalink
Making poller history a dynamic config (#7344)
Browse files Browse the repository at this point in the history
## What changed?
<!-- Describe what has changed in this PR -->
- PollerHistoryTTL is now a dynamic config

## Why?
<!-- Tell your future self why have you made these changes -->
- Versioning-3.1 tests required setting a low value for this config in
order for some tests to work - right now, those tests were excluded from
CI runs and that's not good.

## How did you test it?
<!-- How have you verified this change? Tested locally? Added a unit
test? Checked in staging env? -->
- Unskipped the tests that were being skipped previously.

## Potential risks
<!-- Assuming the worst case, what can be broken when deploying this
change to production? -->

## Documentation
<!-- Have you made sure this change doesn't falsify anything currently
stated in `docs/`? If significant
new behavior is added, have you described that in `docs/`? -->

## Is hotfix candidate?
<!-- Is this PR a hotfix candidate or does it require a notification to
be sent to the broader community? (Yes/No) -->
  • Loading branch information
Shivs11 authored Feb 14, 2025
1 parent d33f3f4 commit 9669f3a
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 16 deletions.
6 changes: 6 additions & 0 deletions common/dynamicconfig/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,12 @@ to exceed this number will fail with a FailedPrecondition error.`,
`MatchingDeletedRuleRetentionTime is the length of time that deleted Version Assignment Rules and
Deleted Redirect Rules will be kept in the DB (with DeleteTimestamp). After this time, the tombstones are deleted at the next time update of versioning data for the task queue.`,
)
PollerHistoryTTL = NewNamespaceDurationSetting(
"matching.PollerHistoryTTL",
5*time.Minute,
`PollerHistoryTTL is the time to live for poller histories in the pollerHistory cache of a physical task queue. Poller histories are fetched when
requiring a list of pollers that polled a given task queue.`,
)
ReachabilityBuildIdVisibilityGracePeriod = NewNamespaceDurationSetting(
"matching.wv.ReachabilityBuildIdVisibilityGracePeriod",
3*time.Minute,
Expand Down
7 changes: 7 additions & 0 deletions service/matching/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ type (
RedirectRuleLimitPerQueue dynamicconfig.IntPropertyFnWithNamespaceFilter
RedirectRuleMaxUpstreamBuildIDsPerQueue dynamicconfig.IntPropertyFnWithNamespaceFilter
DeletedRuleRetentionTime dynamicconfig.DurationPropertyFnWithNamespaceFilter
PollerHistoryTTL dynamicconfig.DurationPropertyFnWithNamespaceFilter
ReachabilityBuildIdVisibilityGracePeriod dynamicconfig.DurationPropertyFnWithNamespaceFilter
ReachabilityCacheOpenWFsTTL dynamicconfig.DurationPropertyFn
ReachabilityCacheClosedWFsTTL dynamicconfig.DurationPropertyFn
Expand Down Expand Up @@ -169,6 +170,8 @@ type (
BreakdownMetricsByPartition func() bool
BreakdownMetricsByBuildID func() bool

PollerHistoryTTL func() time.Duration

loadCause loadCause
}

Expand Down Expand Up @@ -246,6 +249,7 @@ func NewConfig(
RedirectRuleLimitPerQueue: dynamicconfig.RedirectRuleLimitPerQueue.Get(dc),
RedirectRuleMaxUpstreamBuildIDsPerQueue: dynamicconfig.RedirectRuleMaxUpstreamBuildIDsPerQueue.Get(dc),
DeletedRuleRetentionTime: dynamicconfig.MatchingDeletedRuleRetentionTime.Get(dc),
PollerHistoryTTL: dynamicconfig.PollerHistoryTTL.Get(dc),
ReachabilityBuildIdVisibilityGracePeriod: dynamicconfig.ReachabilityBuildIdVisibilityGracePeriod.Get(dc),
ReachabilityCacheOpenWFsTTL: dynamicconfig.ReachabilityCacheOpenWFsTTL.Get(dc),
ReachabilityCacheClosedWFsTTL: dynamicconfig.ReachabilityCacheClosedWFsTTL.Get(dc),
Expand Down Expand Up @@ -363,5 +367,8 @@ func newTaskQueueConfig(tq *tqid.TaskQueue, config *Config, ns namespace.Name) *
TaskQueueInfoByBuildIdTTL: func() time.Duration {
return config.TaskQueueInfoByBuildIdTTL(ns.String(), taskQueueName, taskType)
},
PollerHistoryTTL: func() time.Duration {
return config.PollerHistoryTTL(ns.String())
},
}
}
3 changes: 2 additions & 1 deletion service/matching/physical_task_queue_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ func newPhysicalTaskQueueManager(
tasksAddedInIntervals: newTaskTracker(clock.NewRealTimeSource()),
tasksDispatchedInIntervals: newTaskTracker(clock.NewRealTimeSource()),
}
pqMgr.pollerHistory = newPollerHistory()

pqMgr.pollerHistory = newPollerHistory(partitionMgr.config.PollerHistoryTTL())

pqMgr.liveness = newLiveness(
clock.NewRealTimeSource(),
Expand Down
3 changes: 1 addition & 2 deletions service/matching/poller_history.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (

const (
pollerHistoryInitMaxSize = 1000
pollerHistoryTTL = 5 * time.Minute
)

type (
Expand All @@ -52,7 +51,7 @@ type pollerHistory struct {
history cache.Cache
}

func newPollerHistory() *pollerHistory {
func newPollerHistory(pollerHistoryTTL time.Duration) *pollerHistory {
opts := &cache.Options{
TTL: pollerHistoryTTL,
Pin: false,
Expand Down
1 change: 0 additions & 1 deletion service/worker/workerdeployment/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,6 @@ func (d *ClientImpl) ListWorkerDeployments(
pageSize = d.visibilityMaxPageSize(namespaceEntry.Name().String())
}

// todo (Shivam): closed workflows should be filtered out.
persistenceResp, err := d.visibilityManager.ListWorkflowExecutions(
ctx,
&manager.ListWorkflowExecutionsRequestV2{
Expand Down
23 changes: 14 additions & 9 deletions tests/worker_deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,8 @@ func (s *WorkerDeploymentSuite) TestSetWorkerDeploymentRampingVersion_Unversione

// Should see that the ramping version of the task queues in the current version is unversioned
func (s *WorkerDeploymentSuite) TestSetWorkerDeploymentRampingVersion_Unversioned_VersionedCurrent() {
s.T().Skip("skipping this test since it's flaking on Cassandra. TODO (Shivam): Fix this.")

ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
defer cancel()
tv := testvars.New(s)
Expand Down Expand Up @@ -1031,7 +1033,7 @@ func (s *WorkerDeploymentSuite) verifyTaskQueueVersioningInfo(ctx context.Contex
}

func (s *WorkerDeploymentSuite) TestDeleteWorkerDeployment_ValidDelete() {
s.T().Skip("skipping this test for now until I make TTL of pollerHistoryTTL configurable by dynamic config.")
s.OverrideDynamicConfig(dynamicconfig.PollerHistoryTTL, 500*time.Millisecond)

ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
Expand Down Expand Up @@ -1075,7 +1077,7 @@ func (s *WorkerDeploymentSuite) TestDeleteWorkerDeployment_ValidDelete() {
})
assert.NoError(t, err)
assert.Empty(t, resp.Pollers)
}, 10*time.Second, time.Second)
}, 5*time.Second, time.Second)

// delete succeeds
s.tryDeleteVersion(ctx, tv1, true)
Expand Down Expand Up @@ -1116,13 +1118,16 @@ func (s *WorkerDeploymentSuite) TestDeleteWorkerDeployment_ValidDelete() {
}, time.Second*5, time.Millisecond*200)

// ListDeployments should not show the closed/deleted Worker Deployment
listResp, err := s.FrontendClient().ListWorkerDeployments(ctx, &workflowservice.ListWorkerDeploymentsRequest{
Namespace: s.Namespace().String(),
})
s.Nil(err)
for _, dInfo := range listResp.GetWorkerDeployments() {
s.NotEqual(tv1.DeploymentSeries(), dInfo.GetName())
}
s.EventuallyWithT(func(t *assert.CollectT) {
a := assert.New(t)
listResp, err := s.FrontendClient().ListWorkerDeployments(ctx, &workflowservice.ListWorkerDeploymentsRequest{
Namespace: s.Namespace().String(),
})
a.Nil(err)
for _, dInfo := range listResp.GetWorkerDeployments() {
a.NotEqual(tv1.DeploymentSeries(), dInfo.GetName())
}
}, time.Second*5, time.Millisecond*200)
}

func (s *WorkerDeploymentSuite) TestDeleteWorkerDeployment_Idempotent() {
Expand Down
6 changes: 3 additions & 3 deletions tests/worker_deployment_version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ func (s *DeploymentVersionSuite) TestVersionScavenger_DeleteOnAdd() {
}

func (s *DeploymentVersionSuite) TestDeleteVersion_ValidDelete() {
s.T().Skip("skipping this test for now until I make TTL of pollerHistoryTTL configurable by dynamic config.")
s.OverrideDynamicConfig(dynamicconfig.PollerHistoryTTL, 500*time.Millisecond)

ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
Expand Down Expand Up @@ -718,7 +718,7 @@ func (s *DeploymentVersionSuite) TestDeleteVersion_ValidDelete() {
}

func (s *DeploymentVersionSuite) TestDeleteVersion_ValidDelete_SkipDrainage() {
s.T().Skip("skipping this test for now until I make TTL of pollerHistoryTTL configurable by dynamic config.")
s.OverrideDynamicConfig(dynamicconfig.PollerHistoryTTL, 500*time.Millisecond)

ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
Expand All @@ -736,7 +736,7 @@ func (s *DeploymentVersionSuite) TestDeleteVersion_ValidDelete_SkipDrainage() {
})
assert.NoError(t, err)
assert.Empty(t, resp.Pollers)
}, 10*time.Second, time.Second)
}, 5*time.Second, time.Second)

// skipDrainage=true will make delete succeed
s.tryDeleteVersion(ctx, tv1, true, true)
Expand Down

0 comments on commit 9669f3a

Please sign in to comment.