From e1a6c88d2e7a4eb49d8a98f7b162ceaade580cef Mon Sep 17 00:00:00 2001 From: Vladimir Iliakov Date: Mon, 2 Feb 2026 13:30:59 +0100 Subject: [PATCH 1/3] STAC-24228: Implenting retry for K8s update operations --- .opencode/agents/code-reviewer.md | 393 ++++++++++++++++ .opencode/config.yaml | 44 ++ AGENTS.md | 171 +++++++ ARCHITECTURE.md | 2 +- README.md | 52 ++- flake.nix | 2 +- internal/clients/k8s/client.go | 427 ++++++++++-------- .../config/testdata/validConfigMapConfig.yaml | 2 +- .../config/testdata/validConfigMapOnly.yaml | 2 +- .../config/testdata/validSecretConfig.yaml | 2 +- 10 files changed, 911 insertions(+), 186 deletions(-) create mode 100644 .opencode/agents/code-reviewer.md create mode 100644 .opencode/config.yaml create mode 100644 AGENTS.md diff --git a/.opencode/agents/code-reviewer.md b/.opencode/agents/code-reviewer.md new file mode 100644 index 0000000..a9b78fb --- /dev/null +++ b/.opencode/agents/code-reviewer.md @@ -0,0 +1,393 @@ +# Code Reviewer Agent + +You are a code reviewer for the **SUSE Observability Backup CLI**, a Go CLI application for managing backups and restores for SUSE Observability on Kubernetes. + +## Your Role + +Review Go code changes to ensure compliance with: +1. Project architecture and layered design +2. Code style guidelines +3. Go best practices for CLI applications +4. Cobra CLI patterns +5. Kubernetes client-go patterns + +## Project Context + +- **Module**: `github.com/stackvista/stackstate-backup-cli` +- **Go Version**: 1.25.3 +- **CLI Framework**: Cobra (`github.com/spf13/cobra`) +- **Binary Name**: `sts-backup` + +## Architecture Overview + +This project uses a **5-layer architecture** with strict dependency rules: + +``` +Layer 4: cmd/ # CLI commands (thin layer) +Layer 3: internal/app/ # Dependency injection container +Layer 2: internal/orchestration/ # Multi-service workflows +Layer 1: internal/clients/ # Service clients +Layer 0: internal/foundation/ # Core utilities (config, logger, output) +``` + +### Dependency Rules (CRITICAL) + +| Layer | Can Import | Cannot Import | +|-------|------------|---------------| +| `cmd/` | `internal/app/*` (preferred), all `internal/*` | - | +| `internal/app/` | All `internal/*` packages | - | +| `internal/orchestration/` | `clients/*`, `foundation/*` | Other `orchestration/*` packages | +| `internal/clients/` | Only `foundation/*` | Other `clients/*`, `orchestration/*` | +| `internal/foundation/` | Only stdlib and external utilities | Any `internal/*` packages | + +## Code Style Checklist + +### 1. Import Organization + +Imports MUST be organized in three groups separated by blank lines: + +```go +import ( + // Standard library + "context" + "fmt" + + // External dependencies + "github.com/spf13/cobra" + appsv1 "k8s.io/api/apps/v1" + + // Internal packages + "github.com/stackvista/stackstate-backup-cli/internal/app" + es "github.com/stackvista/stackstate-backup-cli/internal/clients/elasticsearch" +) +``` + +**Review for**: Mixed import groups, missing blank lines between groups. + +### 2. Error Handling + +Errors MUST be wrapped with context using `%w`: + +```go +// GOOD +return fmt.Errorf("failed to get service %s: %w", name, err) + +// BAD +return err +return fmt.Errorf("failed: %v", err) // loses error chain +``` + +**Review for**: Unwrapped errors, errors without context, use of `%v` instead of `%w`. + +### 3. Naming Conventions + +- **Files**: lowercase with underscores (`client_test.go`, `port_forward.go`) +- **Packages**: short, lowercase, single-word (`k8s`, `config`, `scale`) +- **Constants**: PascalCase (exported), camelCase (unexported) +- **Interfaces**: Defined in consumer package, verified with `var _ Interface = (*Client)(nil)` + +**Review for**: Inconsistent naming, overly long package names. + +### 4. Function Length + +- **Max lines**: 100 +- **Max statements**: 60 +- **Exception**: Table-driven tests (use `//nolint:funlen // Table-driven test`) + +**Review for**: Functions exceeding limits without valid nolint comment. + +### 5. Line Length + +- **Max**: 250 characters + +### 6. Dependency Injection Pattern + +Commands MUST use `app.Context` for dependencies, NOT create clients directly: + +```go +// GOOD +func runRestore(appCtx *app.Context) error { + appCtx.K8sClient // Use injected client + appCtx.ESClient + appCtx.Config + appCtx.Logger + appCtx.Formatter +} + +// BAD - Direct client creation in command +func runRestore(globalFlags *config.CLIGlobalFlags) error { + k8sClient, _ := k8s.NewClient(globalFlags.Kubeconfig, globalFlags.Debug) + esClient, _ := elasticsearch.NewClient("http://localhost:9200") +} +``` + +**Review for**: Commands creating clients directly instead of using `app.Context`. + +### 7. Command Runner Pattern + +Commands should use the common runner with `cmdutils.Run`: + +```go +func listCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List available snapshots", + Run: func(_ *cobra.Command, _ []string) { + cmdutils.Run(globalFlags, runListSnapshots, cmdutils.MinioIsRequired) + }, + } +} +``` + +**Review for**: Commands not using `cmdutils.Run`, inconsistent command structure. + +### 8. Port-Forward Lifecycle + +Port-forwards MUST be cleaned up with defer: + +```go +pf, err := portforward.SetupPortForward(...) +if err != nil { + return err +} +defer close(pf.StopChan) // REQUIRED - prevents resource leak +``` + +**Review for**: Missing defer cleanup for port-forwards. + +### 9. Scale Operations with Locks + +Restore operations MUST use `scale.ScaleDownWithLock` and `scale.ScaleUpAndReleaseLock`: + +```go +scaledApps, err := scale.ScaleDownWithLock(scale.ScaleDownWithLockParams{ + K8sClient: appCtx.K8sClient, + Namespace: appCtx.Namespace, + LabelSelector: selector, + Datastore: config.DatastoreStackgraph, + AllSelectors: appCtx.Config.GetAllScaleDownSelectors(), + Log: appCtx.Logger, +}) +defer scale.ScaleUpAndReleaseLock(...) +``` + +**Review for**: Restore operations not using lock pattern. + +### 10. Logging Standards + +Use the correct logging level and method: + +```go +log.Infof("Starting operation...") // General info (no emoji prefix) +log.Debugf("Detail: %v", detail) // Debug output (has emoji prefix) +log.Warningf("Non-fatal issue: %v", warning) // Warnings (has emoji prefix) +log.Errorf("Operation failed: %v", err) // Errors (has emoji prefix) +log.Successf("Operation completed") // Success messages (has emoji prefix) +``` + +**Review for**: Incorrect log levels, inconsistent logging patterns. + +### 11. Testing Standards + +Tests MUST follow table-driven pattern with testify: + +```go +func TestClient_Scale(t *testing.T) { + tests := []struct { + name string + expectError bool + }{ + {name: "success", expectError: false}, + {name: "failure", expectError: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeClient := fake.NewSimpleClientset() + // ... test logic + require.NoError(t, err) + assert.Equal(t, expected, actual) + }) + } +} +``` + +**Review for**: Non-table-driven tests, missing subtests, not using testify assertions. + +### 12. Interface Definitions + +Interfaces should be defined in the consumer package with compile-time verification: + +```go +// Define interface where it's used +type K8sClientInterface interface { + GetDeployments(namespace string, selector string) ([]appsv1.Deployment, error) +} + +// Compile-time verification in the implementing package +var _ Interface = (*Client)(nil) +``` + +**Review for**: Interfaces defined in wrong package, missing compile-time checks. + +### 13. Configuration Usage + +Never hard-code configuration values: + +```go +// GOOD +serviceName := appCtx.Config.Elasticsearch.Service.Name + +// BAD +serviceName := "elasticsearch-master" +``` + +**Review for**: Hard-coded service names, ports, or other configuration. + +## Architecture Violations to Flag + +### Critical Violations + +1. **Foundation importing internal packages** + ```go + // BAD: internal/foundation/config/loader.go + import "github.com/stackvista/stackstate-backup-cli/internal/clients/k8s" + ``` + +2. **Clients importing other clients** + ```go + // BAD: internal/clients/elasticsearch/client.go + import "github.com/stackvista/stackstate-backup-cli/internal/clients/k8s" + ``` + +3. **Orchestration importing other orchestration packages** + ```go + // BAD: internal/orchestration/scale/scale.go + import "github.com/stackvista/stackstate-backup-cli/internal/orchestration/portforward" + ``` + +4. **Business logic in commands** + - Commands should be thin wrappers + - Complex logic belongs in `orchestration/` or `clients/` + +### Warning-Level Issues + +1. Commands not using `app.Context` +2. Missing error wrapping +3. Inconsistent logging levels +4. Missing test coverage for new functionality + +## Golang CLI Best Practices + +### Cobra Command Structure + +```go +func newCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { + cmd := &cobra.Command{ + Use: "command-name", + Short: "Short description (one line)", + Long: `Longer description with more details.`, + Example: ` sts-backup service command-name --flag value`, + Run: func(cmd *cobra.Command, args []string) { + cmdutils.Run(globalFlags, runCommand) + }, + } + + // Add flags + cmd.Flags().StringVarP(&localFlag, "flag", "f", "default", "Flag description") + + return cmd +} +``` + +### Context Handling + +Always accept and propagate `context.Context` for cancellation: + +```go +func (c *Client) DoOperation(ctx context.Context, param string) error { + // Use ctx for HTTP requests, K8s operations, etc. +} +``` + +### Graceful Shutdown + +Long-running operations should handle signals: + +```go +ctx, cancel := context.WithCancel(context.Background()) +defer cancel() + +sigCh := make(chan os.Signal, 1) +signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) +``` + +## Review Output Format + +When reviewing code, always start with this header to confirm you're using project-specific guidelines: + +> 📋 **Reviewing against SUSE Observability Backup CLI guidelines** (code-reviewer agent) + +Then provide feedback in this format: + +### Summary +Brief overview of the changes and overall assessment. + +### Architecture Compliance +- [ ] Layer dependencies are correct +- [ ] No forbidden imports +- [ ] Business logic in appropriate layer + +### Code Style +- [ ] Import organization +- [ ] Error handling with wrapping +- [ ] Naming conventions +- [ ] Function length + +### Best Practices +- [ ] Dependency injection via `app.Context` +- [ ] Port-forward cleanup with defer +- [ ] Scale operations with locks +- [ ] Proper logging levels + +### Testing +- [ ] Table-driven tests +- [ ] Uses testify assertions +- [ ] Adequate coverage + +### Issues Found +List specific issues with file:line references and suggested fixes. + +### Recommendations +Optional improvements that are not blocking. + +## Commands for Verification + +Run these commands to verify code quality: + +```bash +# Build +go build -o sts-backup . + +# Run all tests +go test ./... + +# Lint +golangci-lint run --config=.golangci.yml ./... + +# Verify architecture - foundation has no internal imports +go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/foundation/... | grep 'stackvista.*internal' + +# Verify architecture - clients only import foundation +go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/clients/... | grep 'stackvista.*internal' | grep -v foundation + +# Verify architecture - orchestration doesn't import other orchestration +go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/orchestration/... | grep 'stackvista.*orchestration' +``` + +## Key Files Reference + +- `ARCHITECTURE.md` - Detailed architecture documentation +- `AGENTS.md` - AI agent guidelines +- `.golangci.yml` - Linter configuration +- `internal/app/app.go` - Dependency injection container +- `cmd/cmdutils/runner.go` - Command runner utilities diff --git a/.opencode/config.yaml b/.opencode/config.yaml new file mode 100644 index 0000000..b6a511d --- /dev/null +++ b/.opencode/config.yaml @@ -0,0 +1,44 @@ +# OpenCode Configuration for SUSE Observability Backup CLI + +## Project Information + +name: stackstate-backup-cli +description: Go CLI for managing backups/restores for SUSE Observability on Kubernetes + +## Agents + +agents: + code-reviewer: + path: .opencode/agents/code-reviewer.md + description: | + Code reviewer agent for Go CLI. Reviews code for: + - Project architecture compliance (5-layer design) + - Go best practices for CLI applications + - Cobra command patterns + - Kubernetes client-go patterns + - Code style and formatting + triggers: + - review + - /review + - code review + +## Key Documentation + +docs: + - ARCHITECTURE.md + - AGENTS.md + - README.md + +## Build Commands + +commands: + build: go build -o sts-backup . + test: go test ./... + lint: golangci-lint run --config=.golangci.yml ./... + verify-arch: | + # Verify foundation has no internal imports + go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/foundation/... | grep 'stackvista.*internal' || true + # Verify clients only import foundation + go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/clients/... | grep 'stackvista.*internal' | grep -v foundation || true + # Verify orchestration doesn't import other orchestration + go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/orchestration/... | grep 'stackvista.*orchestration' || true diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..bb93f2b --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,171 @@ +# AGENTS.md - AI Coding Agent Guidelines + +Go CLI for managing backups/restores for SUSE Observability on Kubernetes. + +> **Important**: Read [ARCHITECTURE.md](ARCHITECTURE.md) for detailed architecture, design patterns, and extension guides. + +## Build, Lint, and Test Commands + +```bash +# Build +go build -o sts-backup . + +# Run all tests +go test ./... + +# Run a specific test function +go test -v -run TestClient_ScaleDownDeployments ./internal/clients/k8s/... + +# Run tests in a specific package +go test -v ./internal/clients/elasticsearch/... + +# Lint (required before committing) +golangci-lint run --config=.golangci.yml ./... +``` + +## Code Style Guidelines + +### Imports + +Organize in three groups: standard library, external deps, internal packages. + +```go +import ( + "context" + "fmt" + + "github.com/spf13/cobra" + appsv1 "k8s.io/api/apps/v1" + + "github.com/stackvista/stackstate-backup-cli/internal/app" + es "github.com/stackvista/stackstate-backup-cli/internal/clients/elasticsearch" +) +``` + +### Formatting + +- Use `gofmt` and `goimports` +- Max line length: 250 chars, max function: 100 lines/60 statements + +### Types + +```go +// Client represents an Elasticsearch client (comment starts with type name) +type Client struct { + es *elasticsearch.Client +} + +type Config struct { + Elasticsearch ElasticsearchConfig `yaml:"elasticsearch" validate:"required"` +} +``` + +### Naming + +- **Files**: lowercase with underscores (`client_test.go`) +- **Packages**: short, lowercase, single-word (`k8s`, `config`) +- **Constants**: PascalCase exported, camelCase unexported + +### Error Handling + +Always wrap errors: `return fmt.Errorf("failed to get service %s: %w", name, err)` + +### Interfaces + +Define in consumer package with compile-time check: `var _ Interface = (*Client)(nil)` + +### Testing + +Table-driven tests with `testify/assert` and `require`, use `fake.NewSimpleClientset()` for K8s: + +```go +func TestClient_Scale(t *testing.T) { + tests := []struct { + name string + expectError bool + }{ + {name: "success", expectError: false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeClient := fake.NewSimpleClientset() + require.NoError(t, err) + assert.Equal(t, expected, actual) + }) + } +} +``` + +## Architecture (see ARCHITECTURE.md for details) + +### Layered Structure + +``` +cmd/ # Layer 4: CLI commands (thin) +internal/ + app/ # Layer 3: Dependency injection + orchestration/ # Layer 2: Multi-service workflows + clients/ # Layer 1: Service clients (k8s, elasticsearch, s3) + foundation/ # Layer 0: Core utilities (config, logger, output) +``` + +### Dependency Rules + +- **cmd/** imports `internal/app/*` (preferred) +- **orchestration/** imports `clients/*` and `foundation/*`, NOT other orchestration +- **clients/** imports only `foundation/*` +- **foundation/** imports only stdlib + +## Common Pitfalls to Avoid + +**Don't import clients from other clients** - move logic to `internal/orchestration/` + +**Don't put business logic in commands** - extract to orchestration or client packages + +**Don't import foundation packages from each other** - keep them independent + +**Don't hard-code configuration** - use `config.Elasticsearch.Service.Name` + +**Don't create clients directly in commands** - use `app.Context`: + +```go +// GOOD +func runRestore(appCtx *app.Context) error { + appCtx.K8sClient // Kubernetes client + appCtx.ESClient // Elasticsearch client + appCtx.Config // Configuration + appCtx.Logger // Structured logger +} +``` + +## Key Patterns + +**Port-Forward Lifecycle** - Always defer cleanup: + +```go +pf, err := portforward.SetupPortForward(...) +defer close(pf.StopChan) +``` + +**Scale with Locks** - For restore operations use `scale.ScaleDownWithLock()` from orchestration. + +**Restore Lock** - Stackgraph and Settings are mutually exclusive (both modify HBase data). + +## Linter Exceptions + +Use sparingly: `//nolint:funlen // Table-driven test` or `//nolint:unparam` + +## Logging + +```go +log.Infof("Starting...") // General info +log.Debugf("Detail: %v", d) // Debug details +log.Warningf("Issue: %v", w) // Warnings +log.Errorf("Failed: %v", e) // Errors +log.Successf("Done") // Success messages +``` + +## Configuration + +- Loaded from Kubernetes ConfigMap and Secret +- Precedence: CLI flags > Environment > Secret > ConfigMap > Defaults diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 6055fe7..a809f66 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -1,6 +1,6 @@ # Repository Architecture -This document describes the overall architecture and package organization of the StackState Backup CLI. +This document describes the overall architecture and package organization of the SUSE Observability Backup CLI. ## Design Philosophy diff --git a/README.md b/README.md index ce256d4..8950270 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# StackState Backup CLI +# SUSE Observability Backup CLI A command-line tool for managing backups and restores for SUSE Observability platform running on Kubernetes. @@ -453,6 +453,56 @@ go test ./... golangci-lint run --config=.golangci.yml ./... ``` +### Using OpenCode for Development + +We're exploring AI-assisted development with [OpenCode](https://github.com/opencode-ai/opencode). If you'd like to try it for your tasks, here's how to get started. + +#### Quick Start + +1. Install OpenCode following their [installation guide](https://github.com/opencode-ai/opencode#installation) +2. Run `opencode` in the repository root +3. Start asking questions or requesting changes + +#### What Works Well + +OpenCode can help with: +- **Understanding the codebase**: Ask about architecture, patterns, or how specific features work +- **Implementing new features**: Describe what you need, and it will follow project conventions +- **Writing tests**: Request table-driven tests following our testify patterns +- **Code reviews**: Ask it to review changes against project guidelines + +#### Example: Adding a New Command + +Here's an example workflow for implementing a new feature: + +``` +You: I need to add a "prune" command to elasticsearch that deletes snapshots older than N days. + It should follow the existing patterns in this codebase. + +OpenCode will: +1. Explore existing commands to understand patterns +2. Create cmd/elasticsearch/prune.go following the command runner pattern +3. Add any needed client methods to internal/clients/elasticsearch/ +4. Generate table-driven tests +5. Run linting to verify compliance +``` + +#### Tips + +- **Reference the docs**: Mention `ARCHITECTURE.md` or `AGENTS.md` if you want it to follow specific guidelines +- **Ask for reviews**: After implementing, ask OpenCode to review the code against project standards +- **Iterate**: If something doesn't look right, ask for adjustments + +#### Code Review Agent + +This repository includes a code review agent (`.opencode/agents/code-reviewer.md`) that understands our architecture and coding standards. Use it to validate changes before submitting PRs. + +#### Contributing to OpenCode Configuration + +The OpenCode configuration files in `.opencode/` are not set in stone. If you find ways to improve the agents or add new ones, feel free to update them. Better prompts, additional review checks, or new specialized agents are all welcome contributions. + +> **Note**: We're still experimenting with AI-assisted development. Share your experiences with the team - what works, what doesn't, and any tips you discover. + ## License Copyright (c) 2025 SUSE diff --git a/flake.nix b/flake.nix index f1658f0..a903d4f 100644 --- a/flake.nix +++ b/flake.nix @@ -1,5 +1,5 @@ { - description = "StackState CLI"; + description = "SUSE Observability CLI"; nixConfig.bash-prompt = "STS CLI 2 $ "; diff --git a/internal/clients/k8s/client.go b/internal/clients/k8s/client.go index 497f7c3..dc8d59a 100644 --- a/internal/clients/k8s/client.go +++ b/internal/clients/k8s/client.go @@ -19,6 +19,7 @@ import ( "k8s.io/client-go/tools/clientcmd" "k8s.io/client-go/tools/portforward" "k8s.io/client-go/transport/spdy" + "k8s.io/client-go/util/retry" ) // Client wraps the Kubernetes clientset @@ -172,162 +173,204 @@ type RestoreLockInfo struct { StartedAt string } -// ScalableResource abstracts operations on resources that can be scaled -type ScalableResource interface { - GetName() string - GetReplicas() int32 - SetReplicas(replicas int32) - GetAnnotations() map[string]string - SetAnnotations(annotations map[string]string) - Update(ctx context.Context, client kubernetes.Interface, namespace string) error -} +// DeploymentUpdateFunc is a function that modifies a deployment. +// It receives a fresh copy of the deployment and should apply the desired changes. +type DeploymentUpdateFunc func(dep *appsv1.Deployment) -// deploymentAdapter adapts appsv1.Deployment to ScalableResource interface -type deploymentAdapter struct { - deployment *appsv1.Deployment -} +// StatefulSetUpdateFunc is a function that modifies a statefulset. +// It receives a fresh copy of the statefulset and should apply the desired changes. +type StatefulSetUpdateFunc func(sts *appsv1.StatefulSet) -func (d *deploymentAdapter) GetName() string { - return d.deployment.Name +// updateDeploymentWithRetry fetches a fresh copy of the deployment and applies the update function, +// retrying on conflict errors (when resource version has changed). +func updateDeploymentWithRetry(ctx context.Context, client kubernetes.Interface, namespace, name string, updateFn DeploymentUpdateFunc) error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + // Get fresh copy + dep, err := client.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return err + } + // Apply changes + updateFn(dep) + // Update + _, err = client.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{}) + return err + }) } -func (d *deploymentAdapter) GetReplicas() int32 { - if d.deployment.Spec.Replicas == nil { - return 0 - } - return *d.deployment.Spec.Replicas +// updateStatefulSetWithRetry fetches a fresh copy of the statefulset and applies the update function, +// retrying on conflict errors (when resource version has changed). +func updateStatefulSetWithRetry(ctx context.Context, client kubernetes.Interface, namespace, name string, updateFn StatefulSetUpdateFunc) error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + // Get fresh copy + sts, err := client.AppsV1().StatefulSets(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return err + } + // Apply changes + updateFn(sts) + // Update + _, err = client.AppsV1().StatefulSets(namespace).Update(ctx, sts, metav1.UpdateOptions{}) + return err + }) } -func (d *deploymentAdapter) SetReplicas(replicas int32) { - d.deployment.Spec.Replicas = &replicas -} +// scaleDownDeployment scales a single deployment to 0 replicas with retry on conflict. +// Returns the original replica count. +// +//nolint:dupl // Deployment and StatefulSet are different K8s types requiring separate implementations +func scaleDownDeployment(ctx context.Context, client kubernetes.Interface, namespace, name string) (int32, error) { + var originalReplicas int32 -func (d *deploymentAdapter) GetAnnotations() map[string]string { - return d.deployment.Annotations -} + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + dep, err := client.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return err + } -func (d *deploymentAdapter) SetAnnotations(annotations map[string]string) { - d.deployment.Annotations = annotations -} + if dep.Spec.Replicas != nil { + originalReplicas = *dep.Spec.Replicas + } -func (d *deploymentAdapter) Update(ctx context.Context, client kubernetes.Interface, namespace string) error { - _, err := client.AppsV1().Deployments(namespace).Update(ctx, d.deployment, metav1.UpdateOptions{}) - return err -} + // Only update if not already at 0 + if originalReplicas > 0 { + if dep.Annotations == nil { + dep.Annotations = make(map[string]string) + } + dep.Annotations[PreRestoreReplicasAnnotation] = fmt.Sprintf("%d", originalReplicas) + zero := int32(0) + dep.Spec.Replicas = &zero -// statefulSetAdapter adapts appsv1.StatefulSet to ScalableResource interface -type statefulSetAdapter struct { - statefulSet *appsv1.StatefulSet -} + _, err = client.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{}) + return err + } + return nil + }) -func (s *statefulSetAdapter) GetName() string { - return s.statefulSet.Name + return originalReplicas, err } -func (s *statefulSetAdapter) GetReplicas() int32 { - if s.statefulSet.Spec.Replicas == nil { - return 0 - } - return *s.statefulSet.Spec.Replicas -} +// scaleDownStatefulSet scales a single statefulset to 0 replicas with retry on conflict. +// Returns the original replica count. +// +//nolint:dupl // Deployment and StatefulSet are different K8s types requiring separate implementations +func scaleDownStatefulSet(ctx context.Context, client kubernetes.Interface, namespace, name string) (int32, error) { + var originalReplicas int32 -func (s *statefulSetAdapter) SetReplicas(replicas int32) { - s.statefulSet.Spec.Replicas = &replicas -} + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + sts, err := client.AppsV1().StatefulSets(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return err + } -func (s *statefulSetAdapter) GetAnnotations() map[string]string { - return s.statefulSet.Annotations -} + if sts.Spec.Replicas != nil { + originalReplicas = *sts.Spec.Replicas + } -func (s *statefulSetAdapter) SetAnnotations(annotations map[string]string) { - s.statefulSet.Annotations = annotations -} + // Only update if not already at 0 + if originalReplicas > 0 { + if sts.Annotations == nil { + sts.Annotations = make(map[string]string) + } + sts.Annotations[PreRestoreReplicasAnnotation] = fmt.Sprintf("%d", originalReplicas) + zero := int32(0) + sts.Spec.Replicas = &zero -func (s *statefulSetAdapter) Update(ctx context.Context, client kubernetes.Interface, namespace string) error { - _, err := client.AppsV1().StatefulSets(namespace).Update(ctx, s.statefulSet, metav1.UpdateOptions{}) - return err + _, err = client.AppsV1().StatefulSets(namespace).Update(ctx, sts, metav1.UpdateOptions{}) + return err + } + return nil + }) + + return originalReplicas, err } -// scaleDownResources is a generic function that scales down resources to 0 replicas -func scaleDownResources(ctx context.Context, client kubernetes.Interface, namespace string, resources []ScalableResource) ([]AppsScale, error) { - if len(resources) == 0 { - return []AppsScale{}, nil - } +// scaleUpDeploymentFromAnnotation scales a deployment back to its original replica count with retry on conflict. +// Returns (replica count, found annotation, error). If no annotation was found, returns (0, false, nil). +// +//nolint:dupl // Deployment and StatefulSet are different K8s types requiring separate implementations +func scaleUpDeploymentFromAnnotation(ctx context.Context, client kubernetes.Interface, namespace, name string) (int32, bool, error) { + var scaledTo int32 + var found bool - var scaledResources []AppsScale + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + dep, err := client.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return err + } - for _, resource := range resources { - originalReplicas := resource.GetReplicas() + if dep.Annotations == nil { + found = false + return nil + } - // Store original replica count - scaledResources = append(scaledResources, AppsScale{ - Name: resource.GetName(), - Replicas: originalReplicas, - }) + replicasStr, exists := dep.Annotations[PreRestoreReplicasAnnotation] + if !exists { + found = false + return nil + } - // Scale to 0 if not already at 0 - if originalReplicas > 0 { - // Add annotation with original replica count - annotations := resource.GetAnnotations() - if annotations == nil { - annotations = make(map[string]string) - } - annotations[PreRestoreReplicasAnnotation] = fmt.Sprintf("%d", originalReplicas) - resource.SetAnnotations(annotations) - resource.SetReplicas(0) + var originalReplicas int32 + if _, err := fmt.Sscanf(replicasStr, "%d", &originalReplicas); err != nil { + return fmt.Errorf("failed to parse replicas annotation: %w", err) + } - if err := resource.Update(ctx, client, namespace); err != nil { - return scaledResources, fmt.Errorf("failed to scale down resource %s: %w", resource.GetName(), err) - } + dep.Spec.Replicas = &originalReplicas + delete(dep.Annotations, PreRestoreReplicasAnnotation) + + _, err = client.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{}) + if err == nil { + scaledTo = originalReplicas + found = true } - } + return err + }) - return scaledResources, nil + return scaledTo, found, err } -// scaleUpResourcesFromAnnotations is a generic function that scales up resources based on annotations -func scaleUpResourcesFromAnnotations(ctx context.Context, client kubernetes.Interface, namespace string, resources []ScalableResource) ([]AppsScale, error) { - if len(resources) == 0 { - return []AppsScale{}, nil - } +// scaleUpStatefulSetFromAnnotation scales a statefulset back to its original replica count with retry on conflict. +// Returns (replica count, found annotation, error). If no annotation was found, returns (0, false, nil). +// +//nolint:dupl // Deployment and StatefulSet are different K8s types requiring separate implementations +func scaleUpStatefulSetFromAnnotation(ctx context.Context, client kubernetes.Interface, namespace, name string) (int32, bool, error) { + var scaledTo int32 + var found bool - var scaledResources []AppsScale + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + sts, err := client.AppsV1().StatefulSets(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return err + } - for _, resource := range resources { - annotations := resource.GetAnnotations() - if annotations == nil { - continue + if sts.Annotations == nil { + found = false + return nil } - replicasStr, exists := annotations[PreRestoreReplicasAnnotation] + replicasStr, exists := sts.Annotations[PreRestoreReplicasAnnotation] if !exists { - continue + found = false + return nil } var originalReplicas int32 if _, err := fmt.Sscanf(replicasStr, "%d", &originalReplicas); err != nil { - return scaledResources, fmt.Errorf("failed to parse replicas annotation for resource %s: %w", resource.GetName(), err) + return fmt.Errorf("failed to parse replicas annotation: %w", err) } - // Scale up to original replica count - resource.SetReplicas(originalReplicas) + sts.Spec.Replicas = &originalReplicas + delete(sts.Annotations, PreRestoreReplicasAnnotation) - // Remove the annotation - delete(annotations, PreRestoreReplicasAnnotation) - resource.SetAnnotations(annotations) - - if err := resource.Update(ctx, client, namespace); err != nil { - return scaledResources, fmt.Errorf("failed to scale up resource %s: %w", resource.GetName(), err) + _, err = client.AppsV1().StatefulSets(namespace).Update(ctx, sts, metav1.UpdateOptions{}) + if err == nil { + scaledTo = originalReplicas + found = true } + return err + }) - // Record scaled resource - scaledResources = append(scaledResources, AppsScale{ - Name: resource.GetName(), - Replicas: originalReplicas, - }) - } - - return scaledResources, nil + return scaledTo, found, err } // ScaleDownDeployments scales down deployments matching a label selector to 0 replicas @@ -343,13 +386,19 @@ func (c *Client) ScaleDownDeployments(namespace, labelSelector string) ([]AppsSc return nil, fmt.Errorf("failed to list deployments: %w", err) } - // Convert to ScalableResource slice - resources := make([]ScalableResource, len(deployments.Items)) - for i := range deployments.Items { - resources[i] = &deploymentAdapter{deployment: &deployments.Items[i]} + var scaledResources []AppsScale + for _, dep := range deployments.Items { + originalReplicas, err := scaleDownDeployment(ctx, c.clientset, namespace, dep.Name) + if err != nil { + return scaledResources, fmt.Errorf("failed to scale down deployment %s: %w", dep.Name, err) + } + scaledResources = append(scaledResources, AppsScale{ + Name: dep.Name, + Replicas: originalReplicas, + }) } - return scaleDownResources(ctx, c.clientset, namespace, resources) + return scaledResources, nil } // ScaleUpDeploymentsFromAnnotations scales up deployments that have the pre-restore-replicas annotation @@ -365,13 +414,26 @@ func (c *Client) ScaleUpDeploymentsFromAnnotations(namespace, labelSelector stri return nil, fmt.Errorf("failed to list deployments: %w", err) } - // Convert to ScalableResource slice - resources := make([]ScalableResource, len(deployments.Items)) - for i := range deployments.Items { - resources[i] = &deploymentAdapter{deployment: &deployments.Items[i]} + var scaledResources []AppsScale + for _, dep := range deployments.Items { + // Check if this deployment has the annotation (to avoid unnecessary API calls) + if dep.Annotations == nil || dep.Annotations[PreRestoreReplicasAnnotation] == "" { + continue + } + + scaledTo, found, err := scaleUpDeploymentFromAnnotation(ctx, c.clientset, namespace, dep.Name) + if err != nil { + return scaledResources, fmt.Errorf("failed to scale up deployment %s: %w", dep.Name, err) + } + if found { + scaledResources = append(scaledResources, AppsScale{ + Name: dep.Name, + Replicas: scaledTo, + }) + } } - return scaleUpResourcesFromAnnotations(ctx, c.clientset, namespace, resources) + return scaledResources, nil } // ScaleDownStatefulSets scales down statefulsets matching a label selector to 0 replicas @@ -387,13 +449,19 @@ func (c *Client) ScaleDownStatefulSets(namespace, labelSelector string) ([]AppsS return nil, fmt.Errorf("failed to list statefulsets: %w", err) } - // Convert to ScalableResource slice - resources := make([]ScalableResource, len(statefulSets.Items)) - for i := range statefulSets.Items { - resources[i] = &statefulSetAdapter{statefulSet: &statefulSets.Items[i]} + var scaledResources []AppsScale + for _, sts := range statefulSets.Items { + originalReplicas, err := scaleDownStatefulSet(ctx, c.clientset, namespace, sts.Name) + if err != nil { + return scaledResources, fmt.Errorf("failed to scale down statefulset %s: %w", sts.Name, err) + } + scaledResources = append(scaledResources, AppsScale{ + Name: sts.Name, + Replicas: originalReplicas, + }) } - return scaleDownResources(ctx, c.clientset, namespace, resources) + return scaledResources, nil } // ScaleUpStatefulSetsFromAnnotations scales up statefulsets that have the pre-restore-replicas annotation @@ -409,13 +477,26 @@ func (c *Client) ScaleUpStatefulSetsFromAnnotations(namespace, labelSelector str return nil, fmt.Errorf("failed to list statefulsets: %w", err) } - // Convert to ScalableResource slice - resources := make([]ScalableResource, len(statefulSets.Items)) - for i := range statefulSets.Items { - resources[i] = &statefulSetAdapter{statefulSet: &statefulSets.Items[i]} + var scaledResources []AppsScale + for _, sts := range statefulSets.Items { + // Check if this statefulset has the annotation (to avoid unnecessary API calls) + if sts.Annotations == nil || sts.Annotations[PreRestoreReplicasAnnotation] == "" { + continue + } + + scaledTo, found, err := scaleUpStatefulSetFromAnnotation(ctx, c.clientset, namespace, sts.Name) + if err != nil { + return scaledResources, fmt.Errorf("failed to scale up statefulset %s: %w", sts.Name, err) + } + if found { + scaledResources = append(scaledResources, AppsScale{ + Name: sts.Name, + Replicas: scaledTo, + }) + } } - return scaleUpResourcesFromAnnotations(ctx, c.clientset, namespace, resources) + return scaledResources, nil } // NewTestClient creates a k8s Client for testing with a fake clientset. @@ -488,15 +569,15 @@ func (c *Client) SetRestoreLock(namespace, labelSelector, datastore, startedAt s return fmt.Errorf("failed to list deployments: %w", err) } - for i := range deployments.Items { - dep := &deployments.Items[i] - if dep.Annotations == nil { - dep.Annotations = make(map[string]string) - } - dep.Annotations[RestoreInProgressAnnotation] = datastore - dep.Annotations[RestoreStartedAtAnnotation] = startedAt - - if _, err := c.clientset.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{}); err != nil { + for _, dep := range deployments.Items { + err := updateDeploymentWithRetry(ctx, c.clientset, namespace, dep.Name, func(d *appsv1.Deployment) { + if d.Annotations == nil { + d.Annotations = make(map[string]string) + } + d.Annotations[RestoreInProgressAnnotation] = datastore + d.Annotations[RestoreStartedAtAnnotation] = startedAt + }) + if err != nil { return fmt.Errorf("failed to set restore lock on deployment %s: %w", dep.Name, err) } } @@ -509,15 +590,15 @@ func (c *Client) SetRestoreLock(namespace, labelSelector, datastore, startedAt s return fmt.Errorf("failed to list statefulsets: %w", err) } - for i := range statefulSets.Items { - sts := &statefulSets.Items[i] - if sts.Annotations == nil { - sts.Annotations = make(map[string]string) - } - sts.Annotations[RestoreInProgressAnnotation] = datastore - sts.Annotations[RestoreStartedAtAnnotation] = startedAt - - if _, err := c.clientset.AppsV1().StatefulSets(namespace).Update(ctx, sts, metav1.UpdateOptions{}); err != nil { + for _, sts := range statefulSets.Items { + err := updateStatefulSetWithRetry(ctx, c.clientset, namespace, sts.Name, func(s *appsv1.StatefulSet) { + if s.Annotations == nil { + s.Annotations = make(map[string]string) + } + s.Annotations[RestoreInProgressAnnotation] = datastore + s.Annotations[RestoreStartedAtAnnotation] = startedAt + }) + if err != nil { return fmt.Errorf("failed to set restore lock on statefulset %s: %w", sts.Name, err) } } @@ -554,24 +635,17 @@ func (c *Client) ClearRestoreLock(namespace, labelSelector string) error { return fmt.Errorf("failed to list deployments: %w", err) } - for i := range deployments.Items { - if !hasRestoreLockAnnotations(deployments.Items[i].Annotations) { + for _, dep := range deployments.Items { + if !hasRestoreLockAnnotations(dep.Annotations) { continue } - // Refetch to get latest version (may have been modified by scale-up) - dep, err := c.clientset.AppsV1().Deployments(namespace).Get(ctx, deployments.Items[i].Name, metav1.GetOptions{}) + err := updateDeploymentWithRetry(ctx, c.clientset, namespace, dep.Name, func(d *appsv1.Deployment) { + if d.Annotations != nil { + removeRestoreLockAnnotations(d.Annotations) + } + }) if err != nil { - return fmt.Errorf("failed to get deployment %s: %w", deployments.Items[i].Name, err) - } - - if dep.Annotations == nil { - continue - } - - removeRestoreLockAnnotations(dep.Annotations) - - if _, err := c.clientset.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{}); err != nil { return fmt.Errorf("failed to clear restore lock on deployment %s: %w", dep.Name, err) } } @@ -584,24 +658,17 @@ func (c *Client) ClearRestoreLock(namespace, labelSelector string) error { return fmt.Errorf("failed to list statefulsets: %w", err) } - for i := range statefulSets.Items { - if !hasRestoreLockAnnotations(statefulSets.Items[i].Annotations) { + for _, sts := range statefulSets.Items { + if !hasRestoreLockAnnotations(sts.Annotations) { continue } - // Refetch to get latest version (may have been modified by scale-up) - sts, err := c.clientset.AppsV1().StatefulSets(namespace).Get(ctx, statefulSets.Items[i].Name, metav1.GetOptions{}) + err := updateStatefulSetWithRetry(ctx, c.clientset, namespace, sts.Name, func(s *appsv1.StatefulSet) { + if s.Annotations != nil { + removeRestoreLockAnnotations(s.Annotations) + } + }) if err != nil { - return fmt.Errorf("failed to get statefulset %s: %w", statefulSets.Items[i].Name, err) - } - - if sts.Annotations == nil { - continue - } - - removeRestoreLockAnnotations(sts.Annotations) - - if _, err := c.clientset.AppsV1().StatefulSets(namespace).Update(ctx, sts, metav1.UpdateOptions{}); err != nil { return fmt.Errorf("failed to clear restore lock on statefulset %s: %w", sts.Name, err) } } diff --git a/internal/foundation/config/testdata/validConfigMapConfig.yaml b/internal/foundation/config/testdata/validConfigMapConfig.yaml index 02c68a3..ab505db 100644 --- a/internal/foundation/config/testdata/validConfigMapConfig.yaml +++ b/internal/foundation/config/testdata/validConfigMapConfig.yaml @@ -1,4 +1,4 @@ -# Valid ConfigMap Configuration for StackState Backup CLI +# Valid ConfigMap Configuration for SUSE Observability Backup CLI # This file contains the main configuration for Elasticsearch backup and restore operations. # It is typically stored in a Kubernetes ConfigMap. diff --git a/internal/foundation/config/testdata/validConfigMapOnly.yaml b/internal/foundation/config/testdata/validConfigMapOnly.yaml index c6d11ae..fd61761 100644 --- a/internal/foundation/config/testdata/validConfigMapOnly.yaml +++ b/internal/foundation/config/testdata/validConfigMapOnly.yaml @@ -1,4 +1,4 @@ -# Valid ConfigMap-Only Configuration for StackState Backup CLI +# Valid ConfigMap-Only Configuration for SUSE Observability Backup CLI # This file contains a complete configuration including credentials in the ConfigMap. # Use this for tests that don't involve Secret overrides. # In production, credentials should always be stored in Secrets, not ConfigMaps. diff --git a/internal/foundation/config/testdata/validSecretConfig.yaml b/internal/foundation/config/testdata/validSecretConfig.yaml index e004e52..8eea0eb 100644 --- a/internal/foundation/config/testdata/validSecretConfig.yaml +++ b/internal/foundation/config/testdata/validSecretConfig.yaml @@ -1,4 +1,4 @@ -# Valid Secret Configuration for StackState Backup CLI +# Valid Secret Configuration for SUSE Observability Backup CLI # This file contains sensitive credentials for S3/Minio access. # It is typically stored in a Kubernetes Secret and overrides values from the ConfigMap. # From 237e1391b0335cc6790e49f065f5d35c00fbcf50 Mon Sep 17 00:00:00 2001 From: Vladimir Iliakov Date: Tue, 3 Feb 2026 11:53:41 +0100 Subject: [PATCH 2/3] STAC-24228: Opencode adjustments --- .opencode/config.yaml | 44 ------------------------------------------- AGENTS.md | 5 +++++ 2 files changed, 5 insertions(+), 44 deletions(-) delete mode 100644 .opencode/config.yaml diff --git a/.opencode/config.yaml b/.opencode/config.yaml deleted file mode 100644 index b6a511d..0000000 --- a/.opencode/config.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# OpenCode Configuration for SUSE Observability Backup CLI - -## Project Information - -name: stackstate-backup-cli -description: Go CLI for managing backups/restores for SUSE Observability on Kubernetes - -## Agents - -agents: - code-reviewer: - path: .opencode/agents/code-reviewer.md - description: | - Code reviewer agent for Go CLI. Reviews code for: - - Project architecture compliance (5-layer design) - - Go best practices for CLI applications - - Cobra command patterns - - Kubernetes client-go patterns - - Code style and formatting - triggers: - - review - - /review - - code review - -## Key Documentation - -docs: - - ARCHITECTURE.md - - AGENTS.md - - README.md - -## Build Commands - -commands: - build: go build -o sts-backup . - test: go test ./... - lint: golangci-lint run --config=.golangci.yml ./... - verify-arch: | - # Verify foundation has no internal imports - go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/foundation/... | grep 'stackvista.*internal' || true - # Verify clients only import foundation - go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/clients/... | grep 'stackvista.*internal' | grep -v foundation || true - # Verify orchestration doesn't import other orchestration - go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/orchestration/... | grep 'stackvista.*orchestration' || true diff --git a/AGENTS.md b/AGENTS.md index bb93f2b..5e5826f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,6 +21,11 @@ go test -v ./internal/clients/elasticsearch/... # Lint (required before committing) golangci-lint run --config=.golangci.yml ./... + +# Verify architecture dependencies +go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/foundation/... | grep 'stackvista.*internal' || true +go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/clients/... | grep 'stackvista.*internal' | grep -v foundation || true +go list -f '{{.ImportPath}}: {{join .Imports "\n"}}' ./internal/orchestration/... | grep 'stackvista.*orchestration' || true ``` ## Code Style Guidelines From 5922bd22e1ab85483c03acb3ab35f64bb846aa50 Mon Sep 17 00:00:00 2001 From: Vladimir Iliakov Date: Tue, 3 Feb 2026 13:10:24 +0100 Subject: [PATCH 3/3] STAC-24228: Address comment --- internal/clients/k8s/client.go | 112 ++++++++++++--------------------- 1 file changed, 40 insertions(+), 72 deletions(-) diff --git a/internal/clients/k8s/client.go b/internal/clients/k8s/client.go index dc8d59a..b710dbd 100644 --- a/internal/clients/k8s/client.go +++ b/internal/clients/k8s/client.go @@ -175,11 +175,11 @@ type RestoreLockInfo struct { // DeploymentUpdateFunc is a function that modifies a deployment. // It receives a fresh copy of the deployment and should apply the desired changes. -type DeploymentUpdateFunc func(dep *appsv1.Deployment) +type DeploymentUpdateFunc func(dep *appsv1.Deployment) error // StatefulSetUpdateFunc is a function that modifies a statefulset. // It receives a fresh copy of the statefulset and should apply the desired changes. -type StatefulSetUpdateFunc func(sts *appsv1.StatefulSet) +type StatefulSetUpdateFunc func(sts *appsv1.StatefulSet) error // updateDeploymentWithRetry fetches a fresh copy of the deployment and applies the update function, // retrying on conflict errors (when resource version has changed). @@ -191,7 +191,10 @@ func updateDeploymentWithRetry(ctx context.Context, client kubernetes.Interface, return err } // Apply changes - updateFn(dep) + err = updateFn(dep) + if err != nil { + return err + } // Update _, err = client.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{}) return err @@ -208,7 +211,10 @@ func updateStatefulSetWithRetry(ctx context.Context, client kubernetes.Interface return err } // Apply changes - updateFn(sts) + err = updateFn(sts) + if err != nil { + return err + } // Update _, err = client.AppsV1().StatefulSets(namespace).Update(ctx, sts, metav1.UpdateOptions{}) return err @@ -222,28 +228,16 @@ func updateStatefulSetWithRetry(ctx context.Context, client kubernetes.Interface func scaleDownDeployment(ctx context.Context, client kubernetes.Interface, namespace, name string) (int32, error) { var originalReplicas int32 - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - dep, err := client.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) - if err != nil { - return err - } - + err := updateDeploymentWithRetry(ctx, client, namespace, name, func(dep *appsv1.Deployment) error { if dep.Spec.Replicas != nil { originalReplicas = *dep.Spec.Replicas } - - // Only update if not already at 0 - if originalReplicas > 0 { - if dep.Annotations == nil { - dep.Annotations = make(map[string]string) - } - dep.Annotations[PreRestoreReplicasAnnotation] = fmt.Sprintf("%d", originalReplicas) - zero := int32(0) - dep.Spec.Replicas = &zero - - _, err = client.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{}) - return err + if dep.Annotations == nil { + dep.Annotations = make(map[string]string) } + dep.Annotations[PreRestoreReplicasAnnotation] = fmt.Sprintf("%d", originalReplicas) + zero := int32(0) + dep.Spec.Replicas = &zero return nil }) @@ -257,28 +251,16 @@ func scaleDownDeployment(ctx context.Context, client kubernetes.Interface, names func scaleDownStatefulSet(ctx context.Context, client kubernetes.Interface, namespace, name string) (int32, error) { var originalReplicas int32 - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - sts, err := client.AppsV1().StatefulSets(namespace).Get(ctx, name, metav1.GetOptions{}) - if err != nil { - return err - } - + err := updateStatefulSetWithRetry(ctx, client, namespace, name, func(sts *appsv1.StatefulSet) error { if sts.Spec.Replicas != nil { originalReplicas = *sts.Spec.Replicas } - - // Only update if not already at 0 - if originalReplicas > 0 { - if sts.Annotations == nil { - sts.Annotations = make(map[string]string) - } - sts.Annotations[PreRestoreReplicasAnnotation] = fmt.Sprintf("%d", originalReplicas) - zero := int32(0) - sts.Spec.Replicas = &zero - - _, err = client.AppsV1().StatefulSets(namespace).Update(ctx, sts, metav1.UpdateOptions{}) - return err + if sts.Annotations == nil { + sts.Annotations = make(map[string]string) } + sts.Annotations[PreRestoreReplicasAnnotation] = fmt.Sprintf("%d", originalReplicas) + zero := int32(0) + sts.Spec.Replicas = &zero return nil }) @@ -291,14 +273,9 @@ func scaleDownStatefulSet(ctx context.Context, client kubernetes.Interface, name //nolint:dupl // Deployment and StatefulSet are different K8s types requiring separate implementations func scaleUpDeploymentFromAnnotation(ctx context.Context, client kubernetes.Interface, namespace, name string) (int32, bool, error) { var scaledTo int32 - var found bool - - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - dep, err := client.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) - if err != nil { - return err - } + found := true + err := updateDeploymentWithRetry(ctx, client, namespace, name, func(dep *appsv1.Deployment) error { if dep.Annotations == nil { found = false return nil @@ -315,15 +292,11 @@ func scaleUpDeploymentFromAnnotation(ctx context.Context, client kubernetes.Inte return fmt.Errorf("failed to parse replicas annotation: %w", err) } - dep.Spec.Replicas = &originalReplicas delete(dep.Annotations, PreRestoreReplicasAnnotation) + dep.Spec.Replicas = &originalReplicas + scaledTo = originalReplicas - _, err = client.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{}) - if err == nil { - scaledTo = originalReplicas - found = true - } - return err + return nil }) return scaledTo, found, err @@ -335,14 +308,9 @@ func scaleUpDeploymentFromAnnotation(ctx context.Context, client kubernetes.Inte //nolint:dupl // Deployment and StatefulSet are different K8s types requiring separate implementations func scaleUpStatefulSetFromAnnotation(ctx context.Context, client kubernetes.Interface, namespace, name string) (int32, bool, error) { var scaledTo int32 - var found bool - - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - sts, err := client.AppsV1().StatefulSets(namespace).Get(ctx, name, metav1.GetOptions{}) - if err != nil { - return err - } + found := true + err := updateStatefulSetWithRetry(ctx, client, namespace, name, func(sts *appsv1.StatefulSet) error { if sts.Annotations == nil { found = false return nil @@ -359,15 +327,11 @@ func scaleUpStatefulSetFromAnnotation(ctx context.Context, client kubernetes.Int return fmt.Errorf("failed to parse replicas annotation: %w", err) } - sts.Spec.Replicas = &originalReplicas delete(sts.Annotations, PreRestoreReplicasAnnotation) + sts.Spec.Replicas = &originalReplicas + scaledTo = originalReplicas - _, err = client.AppsV1().StatefulSets(namespace).Update(ctx, sts, metav1.UpdateOptions{}) - if err == nil { - scaledTo = originalReplicas - found = true - } - return err + return nil }) return scaledTo, found, err @@ -570,12 +534,13 @@ func (c *Client) SetRestoreLock(namespace, labelSelector, datastore, startedAt s } for _, dep := range deployments.Items { - err := updateDeploymentWithRetry(ctx, c.clientset, namespace, dep.Name, func(d *appsv1.Deployment) { + err := updateDeploymentWithRetry(ctx, c.clientset, namespace, dep.Name, func(d *appsv1.Deployment) error { if d.Annotations == nil { d.Annotations = make(map[string]string) } d.Annotations[RestoreInProgressAnnotation] = datastore d.Annotations[RestoreStartedAtAnnotation] = startedAt + return nil }) if err != nil { return fmt.Errorf("failed to set restore lock on deployment %s: %w", dep.Name, err) @@ -591,12 +556,13 @@ func (c *Client) SetRestoreLock(namespace, labelSelector, datastore, startedAt s } for _, sts := range statefulSets.Items { - err := updateStatefulSetWithRetry(ctx, c.clientset, namespace, sts.Name, func(s *appsv1.StatefulSet) { + err := updateStatefulSetWithRetry(ctx, c.clientset, namespace, sts.Name, func(s *appsv1.StatefulSet) error { if s.Annotations == nil { s.Annotations = make(map[string]string) } s.Annotations[RestoreInProgressAnnotation] = datastore s.Annotations[RestoreStartedAtAnnotation] = startedAt + return nil }) if err != nil { return fmt.Errorf("failed to set restore lock on statefulset %s: %w", sts.Name, err) @@ -640,10 +606,11 @@ func (c *Client) ClearRestoreLock(namespace, labelSelector string) error { continue } - err := updateDeploymentWithRetry(ctx, c.clientset, namespace, dep.Name, func(d *appsv1.Deployment) { + err := updateDeploymentWithRetry(ctx, c.clientset, namespace, dep.Name, func(d *appsv1.Deployment) error { if d.Annotations != nil { removeRestoreLockAnnotations(d.Annotations) } + return nil }) if err != nil { return fmt.Errorf("failed to clear restore lock on deployment %s: %w", dep.Name, err) @@ -663,10 +630,11 @@ func (c *Client) ClearRestoreLock(namespace, labelSelector string) error { continue } - err := updateStatefulSetWithRetry(ctx, c.clientset, namespace, sts.Name, func(s *appsv1.StatefulSet) { + err := updateStatefulSetWithRetry(ctx, c.clientset, namespace, sts.Name, func(s *appsv1.StatefulSet) error { if s.Annotations != nil { removeRestoreLockAnnotations(s.Annotations) } + return nil }) if err != nil { return fmt.Errorf("failed to clear restore lock on statefulset %s: %w", sts.Name, err)