Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Making poller history a dynamic config #7344

Merged
merged 6 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/dynamicconfig/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,12 @@ to exceed this number will fail with a FailedPrecondition error.`,
`MatchingDeletedRuleRetentionTime is the length of time that deleted Version Assignment Rules and
Deleted Redirect Rules will be kept in the DB (with DeleteTimestamp). After this time, the tombstones are deleted at the next time update of versioning data for the task queue.`,
)
PollerHistoryTTL = NewNamespaceDurationSetting(
"matching.PollerHistoryTTL",
5*time.Minute,
`PollerHistoryTTL is the time to live for poller histories in the pollerHistory cache of a physical task queue. Poller histories are fetched when
requiring a list of pollers that polled a given task queue.`,
)
ReachabilityBuildIdVisibilityGracePeriod = NewNamespaceDurationSetting(
"matching.wv.ReachabilityBuildIdVisibilityGracePeriod",
3*time.Minute,
Expand Down
7 changes: 7 additions & 0 deletions service/matching/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ type (
RedirectRuleLimitPerQueue dynamicconfig.IntPropertyFnWithNamespaceFilter
RedirectRuleMaxUpstreamBuildIDsPerQueue dynamicconfig.IntPropertyFnWithNamespaceFilter
DeletedRuleRetentionTime dynamicconfig.DurationPropertyFnWithNamespaceFilter
PollerHistoryTTL dynamicconfig.DurationPropertyFnWithNamespaceFilter
ReachabilityBuildIdVisibilityGracePeriod dynamicconfig.DurationPropertyFnWithNamespaceFilter
ReachabilityCacheOpenWFsTTL dynamicconfig.DurationPropertyFn
ReachabilityCacheClosedWFsTTL dynamicconfig.DurationPropertyFn
Expand Down Expand Up @@ -169,6 +170,8 @@ type (
BreakdownMetricsByPartition func() bool
BreakdownMetricsByBuildID func() bool

PollerHistoryTTL func() time.Duration

loadCause loadCause
}

Expand Down Expand Up @@ -246,6 +249,7 @@ func NewConfig(
RedirectRuleLimitPerQueue: dynamicconfig.RedirectRuleLimitPerQueue.Get(dc),
RedirectRuleMaxUpstreamBuildIDsPerQueue: dynamicconfig.RedirectRuleMaxUpstreamBuildIDsPerQueue.Get(dc),
DeletedRuleRetentionTime: dynamicconfig.MatchingDeletedRuleRetentionTime.Get(dc),
PollerHistoryTTL: dynamicconfig.PollerHistoryTTL.Get(dc),
ReachabilityBuildIdVisibilityGracePeriod: dynamicconfig.ReachabilityBuildIdVisibilityGracePeriod.Get(dc),
ReachabilityCacheOpenWFsTTL: dynamicconfig.ReachabilityCacheOpenWFsTTL.Get(dc),
ReachabilityCacheClosedWFsTTL: dynamicconfig.ReachabilityCacheClosedWFsTTL.Get(dc),
Expand Down Expand Up @@ -363,5 +367,8 @@ func newTaskQueueConfig(tq *tqid.TaskQueue, config *Config, ns namespace.Name) *
TaskQueueInfoByBuildIdTTL: func() time.Duration {
return config.TaskQueueInfoByBuildIdTTL(ns.String(), taskQueueName, taskType)
},
PollerHistoryTTL: func() time.Duration {
return config.PollerHistoryTTL(ns.String())
},
}
}
3 changes: 2 additions & 1 deletion service/matching/physical_task_queue_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ func newPhysicalTaskQueueManager(
tasksAddedInIntervals: newTaskTracker(clock.NewRealTimeSource()),
tasksDispatchedInIntervals: newTaskTracker(clock.NewRealTimeSource()),
}
pqMgr.pollerHistory = newPollerHistory()

pqMgr.pollerHistory = newPollerHistory(partitionMgr.config.PollerHistoryTTL())

pqMgr.liveness = newLiveness(
clock.NewRealTimeSource(),
Expand Down
3 changes: 1 addition & 2 deletions service/matching/poller_history.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (

const (
pollerHistoryInitMaxSize = 1000
pollerHistoryTTL = 5 * time.Minute
)

type (
Expand All @@ -52,7 +51,7 @@ type pollerHistory struct {
history cache.Cache
}

func newPollerHistory() *pollerHistory {
func newPollerHistory(pollerHistoryTTL time.Duration) *pollerHistory {
opts := &cache.Options{
TTL: pollerHistoryTTL,
Pin: false,
Expand Down
1 change: 0 additions & 1 deletion service/worker/workerdeployment/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,6 @@ func (d *ClientImpl) ListWorkerDeployments(
pageSize = d.visibilityMaxPageSize(namespaceEntry.Name().String())
}

// todo (Shivam): closed workflows should be filtered out.
persistenceResp, err := d.visibilityManager.ListWorkflowExecutions(
ctx,
&manager.ListWorkflowExecutionsRequestV2{
Expand Down
23 changes: 14 additions & 9 deletions tests/worker_deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,8 @@ func (s *WorkerDeploymentSuite) TestSetWorkerDeploymentRampingVersion_Unversione

// Should see that the ramping version of the task queues in the current version is unversioned
func (s *WorkerDeploymentSuite) TestSetWorkerDeploymentRampingVersion_Unversioned_VersionedCurrent() {
s.T().Skip("skipping this test since it's flaking on Cassandra. TODO (Shivam): Fix this.")

ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
defer cancel()
tv := testvars.New(s)
Expand Down Expand Up @@ -1031,7 +1033,7 @@ func (s *WorkerDeploymentSuite) verifyTaskQueueVersioningInfo(ctx context.Contex
}

func (s *WorkerDeploymentSuite) TestDeleteWorkerDeployment_ValidDelete() {
s.T().Skip("skipping this test for now until I make TTL of pollerHistoryTTL configurable by dynamic config.")
s.OverrideDynamicConfig(dynamicconfig.PollerHistoryTTL, 500*time.Millisecond)

ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
Expand Down Expand Up @@ -1075,7 +1077,7 @@ func (s *WorkerDeploymentSuite) TestDeleteWorkerDeployment_ValidDelete() {
})
assert.NoError(t, err)
assert.Empty(t, resp.Pollers)
}, 10*time.Second, time.Second)
}, 5*time.Second, time.Second)

// delete succeeds
s.tryDeleteVersion(ctx, tv1, true)
Expand Down Expand Up @@ -1116,13 +1118,16 @@ func (s *WorkerDeploymentSuite) TestDeleteWorkerDeployment_ValidDelete() {
}, time.Second*5, time.Millisecond*200)

// ListDeployments should not show the closed/deleted Worker Deployment
listResp, err := s.FrontendClient().ListWorkerDeployments(ctx, &workflowservice.ListWorkerDeploymentsRequest{
Namespace: s.Namespace().String(),
})
s.Nil(err)
for _, dInfo := range listResp.GetWorkerDeployments() {
s.NotEqual(tv1.DeploymentSeries(), dInfo.GetName())
}
s.EventuallyWithT(func(t *assert.CollectT) {
a := assert.New(t)
listResp, err := s.FrontendClient().ListWorkerDeployments(ctx, &workflowservice.ListWorkerDeploymentsRequest{
Namespace: s.Namespace().String(),
})
a.Nil(err)
for _, dInfo := range listResp.GetWorkerDeployments() {
a.NotEqual(tv1.DeploymentSeries(), dInfo.GetName())
}
}, time.Second*5, time.Millisecond*200)
Shivs11 marked this conversation as resolved.
Show resolved Hide resolved
}

func (s *WorkerDeploymentSuite) TestDeleteWorkerDeployment_Idempotent() {
Expand Down
6 changes: 3 additions & 3 deletions tests/worker_deployment_version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ func (s *DeploymentVersionSuite) TestVersionScavenger_DeleteOnAdd() {
}

func (s *DeploymentVersionSuite) TestDeleteVersion_ValidDelete() {
s.T().Skip("skipping this test for now until I make TTL of pollerHistoryTTL configurable by dynamic config.")
s.OverrideDynamicConfig(dynamicconfig.PollerHistoryTTL, 500*time.Millisecond)

ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
Expand Down Expand Up @@ -718,7 +718,7 @@ func (s *DeploymentVersionSuite) TestDeleteVersion_ValidDelete() {
}

func (s *DeploymentVersionSuite) TestDeleteVersion_ValidDelete_SkipDrainage() {
s.T().Skip("skipping this test for now until I make TTL of pollerHistoryTTL configurable by dynamic config.")
s.OverrideDynamicConfig(dynamicconfig.PollerHistoryTTL, 500*time.Millisecond)

ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
Expand All @@ -736,7 +736,7 @@ func (s *DeploymentVersionSuite) TestDeleteVersion_ValidDelete_SkipDrainage() {
})
assert.NoError(t, err)
assert.Empty(t, resp.Pollers)
}, 10*time.Second, time.Second)
}, 5*time.Second, time.Second)

// skipDrainage=true will make delete succeed
s.tryDeleteVersion(ctx, tv1, true, true)
Expand Down
Loading