Skip to content

Commit

Permalink
Update the e2e tests to reflect the new defaults (#2222)
Browse files Browse the repository at this point in the history
* Update the e2e tests to reflect the new defaults
  • Loading branch information
johscheuer authored Feb 25, 2025
1 parent a739598 commit bf8922c
Show file tree
Hide file tree
Showing 17 changed files with 133 additions and 71 deletions.
38 changes: 10 additions & 28 deletions controllers/add_process_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ package controllers
import (
"context"
"fmt"
"strings"

"github.com/FoundationDB/fdb-kubernetes-operator/v2/pkg/fdbstatus"
"github.com/go-logr/logr"

corev1 "k8s.io/api/core/v1"
Expand All @@ -48,32 +47,11 @@ func (a addProcessGroups) reconcile(ctx context.Context, r *FoundationDBClusterR
return &requeue{curError: err}
}

exclusions := map[fdbv1beta2.ProcessGroupID]fdbv1beta2.None{}
if cluster.UseLocalitiesForExclusion() {
if status == nil {
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}

status, err = adminClient.GetStatus()
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
}

prefix := fdbv1beta2.FDBLocalityExclusionPrefix + ":"
for _, excludedServer := range status.Cluster.DatabaseConfiguration.ExcludedServers {
if excludedServer.Locality == "" {
continue
}

processGroupID, found := strings.CutPrefix(excludedServer.Locality, prefix)
if !found {
continue
}
exclusions[fdbv1beta2.ProcessGroupID(processGroupID)] = fdbv1beta2.None{}
}
// Fetch the excluded localities from the provided machine-readable status. If the status is not available, e.g. because
// the cluster is unavailable, return an empty map and continue with adding new process groups if required.
exclusions, getLocalitiesErr := fdbstatus.GetExcludedLocalitiesFromStatus(logger, cluster, status, r.getAdminClient)
if getLocalitiesErr != nil {
logger.Error(err, "Error getting exclusion list")
}

hasNewProcessGroups := false
Expand Down Expand Up @@ -109,5 +87,9 @@ func (a addProcessGroups) reconcile(ctx context.Context, r *FoundationDBClusterR
}
}

if getLocalitiesErr != nil {
return &requeue{curError: getLocalitiesErr, delayedRequeue: true}
}

return nil
}
5 changes: 3 additions & 2 deletions controllers/add_services.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ func recreateService(ctx context.Context, r *FoundationDBClusterReconciler, curr
if err != nil {
return err
}
err = r.Create(ctx, newService)
return err

return r.Create(ctx, newService)
}

// updateServices updates selected safe fields on a service based on a new
Expand All @@ -134,5 +134,6 @@ func updateService(ctx context.Context, logger logr.Logger, cluster *fdbv1beta2.
logger.Info("Updating service")
return r.Update(ctx, currentService)
}

return nil
}
16 changes: 11 additions & 5 deletions e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ CLOUD_PROVIDER?=
UPGRADE_VERSIONS?="$(FDB_VERSION):$(NEXT_FDB_VERSION)"
# Those are feature flags for the operator tests. Enable a feature if you want to run the operator tests with a specific
# feature enabled e.g. like DNS.
FEATURE_DNS?=false
FEATURE_LOCALITIES?=false
FEATURE_DNS?=
FEATURE_LOCALITIES?=
# Allows to specify the tag for a specific FDB version. Format is 7.1.57:7.1.57-testing,7.3.35:7.3.35-debugging
FDB_VERSION_TAG_MAPPING?=
# If the FEATURE_SERVER_SIDE_APPLY environment variable is not defined the test suite will be randomly enable (1) or disable (0)
Expand Down Expand Up @@ -145,6 +145,14 @@ foundationdb-nightly-tests: GINKGO_LABEL_FILTER=--ginkgo.label-filter="foundatio
# Run the actual foundationdb-nightly tests.
foundationdb-nightly-tests: run

ifdef FEATURE_LOCALITIES
FEATURE_LOCALITIES_FLAG=--feature-localities=$(FEATURE_LOCALITIES)
endif

ifdef FEATURE_DNS
FEATURE_DNS_FLAG=--feature-dns=$(FEATURE_DNS)
endif

%.run: %
@sleep $$(shuf -i 1-10 -n 1)
go test -timeout=$(TIMEOUT) $(VERBOSE) ./$< \
Expand All @@ -167,14 +175,12 @@ foundationdb-nightly-tests: run
--enable-chaos-tests=$(ENABLE_CHAOS_TESTS) \
--upgrade-versions=$(UPGRADE_VERSIONS) \
--feature-unified-image=$(FEATURE_UNIFIED_IMAGE) \
--feature-localities=$(FEATURE_LOCALITIES) \
--feature-dns=$(FEATURE_DNS) \
--cloud-provider=$(CLOUD_PROVIDER) \
--dump-operator-state=$(DUMP_OPERATOR_STATE) \
--cluster-name=$(CLUSTER_NAME) \
--storage-engine=$(STORAGE_ENGINE) \
--fdb-version-tag-mapping=$(FDB_VERSION_TAG_MAPPING) \
--unified-fdb-image=$(UNIFIED_FDB_IMAGE) \
--feature-server-side-apply=$(FEATURE_SERVER_SIDE_APPLY) \
--seaweedfs-image=$(SEAWEEDFS_IMAGE) \
--seaweedfs-image=$(SEAWEEDFS_IMAGE) $(FEATURE_LOCALITIES_FLAG) $(FEATURE_DNS) \
| grep -v 'constructing many client instances from the same exec auth config can cause performance problems during cert rotation' &> $(BASE_DIR)/../logs/$<.log
4 changes: 2 additions & 2 deletions e2e/fixtures/cluster_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ type ClusterConfig struct {
// UseMaintenanceMode if enabled the FoundationDBCluster resource will enable the maintenance mode.
UseMaintenanceMode bool
// UseLocalityBasedExclusions if enabled the FoundationDBCluster resource will enable the locality based exclusions.
UseLocalityBasedExclusions bool
UseLocalityBasedExclusions *bool
// UseDNS if enabled the FoundationDBCluster resource will enable the DNS feature.
UseDNS bool
UseDNS *bool
// If enabled the cluster will be setup with the unified image.
UseUnifiedImage *bool
// SimulateCustomFaultDomainEnv will simulate the use case that a user has set a custom environment variable to
Expand Down
3 changes: 1 addition & 2 deletions e2e/fixtures/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,7 @@ func (factory *Factory) CreateFdbCluster(
config *ClusterConfig,
options ...ClusterOption,
) *FdbCluster {
spec := factory.GenerateFDBClusterSpec(config)
return factory.CreateFdbClusterFromSpec(spec, config, options...)
return factory.CreateFdbClusterFromSpec(factory.GenerateFDBClusterSpec(config), config, options...)
}

// CreateFdbClusterFromSpec creates a FDB cluster. This method can be used in combination with the GenerateFDBClusterSpec method.
Expand Down
5 changes: 3 additions & 2 deletions e2e/fixtures/fdb_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,14 +676,15 @@ func (fdbCluster *FdbCluster) SetTransactionServerPerPod(

// ReplacePod replaces the provided Pod if it's part of the FoundationDBCluster.
func (fdbCluster *FdbCluster) ReplacePod(pod corev1.Pod, waitForReconcile bool) {
cluster := fdbCluster.GetCluster()
fdbCluster.cluster.Spec.ProcessGroupsToRemove = []fdbv1beta2.ProcessGroupID{GetProcessGroupID(pod)}
fdbCluster.UpdateClusterSpec()

if !waitForReconcile {
return
}

gomega.Expect(fdbCluster.WaitForReconciliation(SoftReconcileOption(true))).NotTo(gomega.HaveOccurred())
gomega.Expect(fdbCluster.WaitForReconciliation(SoftReconcileOption(true), MinimumGenerationOption(cluster.Generation+1))).NotTo(gomega.HaveOccurred())
}

// ReplacePods replaces the provided Pods in the current FoundationDBCluster.
Expand Down Expand Up @@ -1451,7 +1452,7 @@ func (fdbCluster *FdbCluster) EnsureTeamTrackersAreHealthy() {
}

return true
}).WithTimeout(1 * time.Minute).WithPolling(1 * time.Second).MustPassRepeatedly(5).Should(gomega.BeTrue())
}).WithTimeout(2 * time.Minute).WithPolling(1 * time.Second).MustPassRepeatedly(5).Should(gomega.BeTrue())
}

// EnsureTeamTrackersHaveMinReplicas will check if the machine-readable status suggest that the team trackers min_replicas
Expand Down
4 changes: 2 additions & 2 deletions e2e/fixtures/fdb_cluster_specs.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ func (factory *Factory) createFDBClusterSpec(
IgnoreLogGroupsForUpgrade: []fdbv1beta2.LogGroup{
"fdb-kubernetes-operator",
},
UseLocalitiesForExclusion: pointer.Bool(config.UseLocalityBasedExclusions),
UseLocalitiesForExclusion: config.UseLocalityBasedExclusions,
},
Routing: fdbv1beta2.RoutingConfig{
UseDNSInClusterFile: pointer.Bool(config.UseDNS),
UseDNSInClusterFile: config.UseDNS,
HeadlessService: pointer.Bool(
true,
), // to make switching between hostname <-> IP smooth
Expand Down
4 changes: 2 additions & 2 deletions e2e/fixtures/fdb_operator_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -486,8 +486,8 @@ spec:
# We are setting low values here as the e2e test are taking down processes multiple times
# and having a high wait time between recoveries will increase the reliability of the cluster but also
# increase the time our e2e test take.
- --minimum-recovery-time-for-inclusion=1.0
- --minimum-recovery-time-for-exclusion=1.0
- --minimum-recovery-time-for-inclusion=30.0
- --minimum-recovery-time-for-exclusion=30.0
- --cluster-label-key-for-node-trigger=foundationdb.org/fdb-cluster-name
- --enable-node-index
- --replace-on-security-context-change
Expand Down
24 changes: 21 additions & 3 deletions e2e/test_operator/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -665,11 +665,18 @@ var _ = Describe("Operator", Label("e2e", "pr"), func() {
})

When("changing the public IP source", func() {
It("should change the public IP source and create/delete services", func() {
BeforeEach(func() {
if fdbCluster.GetCluster().UseDNSInClusterFile() {
Skip("using DNS and public IP from service is not tested")
}

log.Printf("set public IP source to %s", fdbv1beta2.PublicIPSourceService)
Expect(
fdbCluster.SetPublicIPSource(fdbv1beta2.PublicIPSourceService),
).ShouldNot(HaveOccurred())
})

It("should change the public IP source and create/delete services", func() {
Eventually(func() bool {
pods := fdbCluster.GetPods()
svcList := fdbCluster.GetServices()
Expand Down Expand Up @@ -2196,7 +2203,7 @@ var _ = Describe("Operator", Label("e2e", "pr"), func() {
_, _, err := fdbCluster.RunFdbCliCommandInOperatorWithoutRetry(cmd, true, 20)

return err
}).WithTimeout(2 * time.Minute).ShouldNot(HaveOccurred())
}).WithTimeout(5 * time.Minute).WithPolling(15 * time.Second).ShouldNot(HaveOccurred())

command := fmt.Sprintf("maintenance on %s %s", pickedProcessGroup.FaultDomain, "3600")
_, _ = fdbCluster.RunFdbCliCommandInOperator(command, false, 20)
Expand Down Expand Up @@ -2667,21 +2674,32 @@ var _ = Describe("Operator", Label("e2e", "pr"), func() {
fdbCluster.UpdateClusterSpecWithSpec(spec)

fdbCluster.ReplacePod(pickedPod, false)
var pickedProcessGroup *fdbv1beta2.ProcessGroupStatus
Expect(fdbCluster.WaitUntilWithForceReconcile(1, 900, func(cluster *fdbv1beta2.FoundationDBCluster) bool {
for _, processGroup := range cluster.Status.ProcessGroups {
if processGroup.ProcessGroupID != pickedProcessGroupID {
continue
}

initialExclusionTimestamp = processGroup.ExclusionTimestamp
pickedProcessGroup = processGroup
break
}

log.Println("initialExclusionTimestamp", initialExclusionTimestamp)
return initialExclusionTimestamp != nil
})).NotTo(HaveOccurred(), "process group is missing the exclusion timestamp")

var excludedServer fdbv1beta2.ExcludedServers
if fdbCluster.GetCluster().UseLocalitiesForExclusion() {
Expect(pickedProcessGroup).NotTo(BeNil())
excludedServer = fdbv1beta2.ExcludedServers{Locality: pickedProcessGroup.GetExclusionString()}
} else {
excludedServer = fdbv1beta2.ExcludedServers{Address: pickedPod.Status.PodIP}
}

// Ensure that the IP is excluded
Expect(fdbCluster.GetStatus().Cluster.DatabaseConfiguration.ExcludedServers).To(ContainElements(fdbv1beta2.ExcludedServers{Address: pickedPod.Status.PodIP}))
Expect(fdbCluster.GetStatus().Cluster.DatabaseConfiguration.ExcludedServers).To(ContainElements(excludedServer))
})

AfterEach(func() {
Expand Down
20 changes: 19 additions & 1 deletion e2e/test_operator_ha/operator_ha_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,21 +125,39 @@ var _ = Describe("Operator HA tests", Label("e2e", "pr"), func() {
}

log.Println("deleting coordinator pod:", pod.Name, "with addresses", pod.Status.PodIPs)
// Set Pod as unschedulable to ensure that they are not recreated. Otherwise the Pods might be recreated
// fast enough to not result in a new connection string.
fdbCluster.GetPrimary().SetPodAsUnschedulable(pod)
factory.DeletePod(&pod)
}
})

AfterEach(func() {
Expect(fdbCluster.GetPrimary().ClearBuggifyNoSchedule(true)).To(Succeed())
})

It("should change the coordinators", func() {
primary := fdbCluster.GetPrimary()

lastForceReconcile := time.Now()
Eventually(func(g Gomega) string {
// Ensure that the coordinators are changed in a timely manner for the test case.
if time.Since(lastForceReconcile) > 1*time.Minute {
for _, cluster := range fdbCluster.GetAllClusters() {
cluster.ForceReconcile()
}

lastForceReconcile = time.Now()
}

status := primary.GetStatus()

// Make sure we have the same count of coordinators again and the deleted
coordinators := fdbstatus.GetCoordinatorsFromStatus(status)
g.Expect(coordinators).To(HaveLen(len(initialCoordinators)))

return status.Cluster.ConnectionString
}).WithTimeout(5 * time.Minute).WithPolling(2 * time.Second).ShouldNot(Equal(initialConnectionString))
}).WithTimeout(10 * time.Minute).WithPolling(2 * time.Second).ShouldNot(Equal(initialConnectionString))

// Make sure the new connection string is propagated in time to all FoundationDBCLuster resources.
for _, cluster := range fdbCluster.GetAllClusters() {
Expand Down
1 change: 0 additions & 1 deletion e2e/test_operator_ha_upgrades/operator_ha_upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,6 @@ var _ = Describe("Operator HA Upgrades", Label("e2e", "pr"), func() {
}

clusterConfig := fixtures.DefaultClusterConfigWithHaMode(fixtures.HaFourZoneSingleSat, false)
clusterConfig.UseLocalityBasedExclusions = true

clusterSetupWithTestConfig(
testConfig{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ var _ = Describe("Operator with three data hall", Label("e2e", "pr"), func() {
spec := fdbCluster.GetCluster().Spec.DeepCopy()
spec.AutomationOptions.UseLocalitiesForExclusion = pointer.Bool(false)
fdbCluster.UpdateClusterSpecWithSpec(spec)
Expect(fdbCluster.GetCluster().UseLocalitiesForExclusion()).To(BeFalse())
})

It("should remove the targeted Pod", func() {
Expand Down
7 changes: 7 additions & 0 deletions e2e/test_operator_upgrades/operator_upgrades_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -554,8 +554,15 @@ var _ = Describe("Operator Upgrades", Label("e2e", "pr"), func() {
factory.SetFinalizerForPod(&podMarkedForRemoval, []string{"foundationdb.org/test"})
// Don't wait for reconciliation as the cluster will never reconcile.
fdbCluster.ReplacePod(podMarkedForRemoval, false)

timeSinceLastForceReconcile := time.Now()
// Make sure the process group is marked for removal
Eventually(func() *int64 {
if time.Since(timeSinceLastForceReconcile) > 1*time.Minute {
fdbCluster.ForceReconcile()
timeSinceLastForceReconcile = time.Now()
}

cluster := fdbCluster.GetCluster()

for _, processGroup := range cluster.Status.ProcessGroups {
Expand Down
Loading

0 comments on commit bf8922c

Please sign in to comment.