From 30a219c91f055f82760d709fc9e91209edbe8b38 Mon Sep 17 00:00:00 2001 From: Tobias Trabelsi Date: Sun, 31 Dec 2023 23:02:32 +0100 Subject: [PATCH] (feat) Cost optimized mode --- CHANGELOG | 19 ++++ README.md | 3 + cmd/woodpecker-autoscaler.go | 170 ++++++++++++++++++------------- internal/config/config.go | 1 + internal/hetzner/hetzneragent.go | 10 ++ 5 files changed, 135 insertions(+), 68 deletions(-) create mode 100644 CHANGELOG diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..144fd34 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,19 @@ +CHANGELOG + +v1.1.0 + +Updated Dependencies +Restructured Main event loop +Cost optimized mode to make use of the fully hour that is billed by hetzner + +v1.0.1 + +Fix woodpecker agent decom + +v1.0.0 + +First stable release + +v0.0.1 + +First test release \ No newline at end of file diff --git a/README.md b/README.md index 3f7b5d8..4f6bb11 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,8 @@ env: value: "define_it" - name: WOODPECKER_AUTOSCALER_HCLOUD_SSH_KEY value: "define_it" + - name: WOODPECKER_AUTOSCALER_COST_OPTIMIZED + value: "true" ``` you can also create a secret manually with these information and reference the existing secret like this in the `values.yaml`: @@ -79,6 +81,7 @@ WOODPECKER_AUTOSCALER_HCLOUD_TOKEN="define_it" WOODPECKER_AUTOSCALER_HCLOUD_INSTANCE_TYPE=cpx21 WOODPECKER_AUTOSCALER_HCLOUD_LOCATION="define_it" WOODPECKER_AUTOSCALER_HCLOUD_SSH_KEY="define_it" +WOODPECKER_AUTOSCALER_COST_OPTIMIZED="true" ``` Now reload the systemd daemons and start the service: diff --git a/cmd/woodpecker-autoscaler.go b/cmd/woodpecker-autoscaler.go index d745249..22bc38e 100644 --- a/cmd/woodpecker-autoscaler.go +++ b/cmd/woodpecker-autoscaler.go @@ -13,6 +13,106 @@ import ( log "github.com/sirupsen/logrus" ) +func SpawnNewAgent(cfg *config.Config) { + agent, err := woodpecker.CreateWoodpeckerAgent(cfg) + if err != nil { + log.WithFields(log.Fields{ + "Caller": "SpawnNewAgent", + }).Fatal(fmt.Sprintf("Error creating new agent: %s", err.Error())) + } + server, err := hetzner.CreateNewAgent(cfg, agent) + if err != nil { + log.WithFields(log.Fields{ + "Caller": "SpawnNewAgent", + }).Fatal(fmt.Sprintf("Error spawning new agent: %s", err.Error())) + } + for { + server, err = hetzner.RefreshNodeInfo(cfg, server.ID) + if err != nil { + log.WithFields(log.Fields{ + "Caller": "SpawnNewAgent", + }).Fatal(fmt.Sprintf("Failed to start Agent: %s", err.Error())) + } + if server.Status == hcloud.ServerStatusRunning { + log.WithFields(log.Fields{ + "Caller": "SpawnNewAgent", + }).Infof("%s started!", server.Name) + break + } + log.WithFields(log.Fields{ + "Caller": "SpawnNewAgent", + }).Infof("%s is in status %s", server.Name, server.Status) + time.Sleep(30 * time.Second) + } +} + +func CheckJobs(cfg *config.Config, ownedNodes []hcloud.Server, pendingTasks int) { + log.WithFields(log.Fields{ + "Caller": "CheckJobs", + }).Info("Checking if agents can be removed") + runningTasks, err := woodpecker.CheckRunning(cfg) + if err != nil { + log.WithFields(log.Fields{ + "Caller": "CheckJobs", + }).Fatal(fmt.Sprintf("Error checking woodpecker queue: %s", err.Error())) + } + if (runningTasks <= len(ownedNodes) && runningTasks != 0) || pendingTasks > 0 { + log.WithFields(log.Fields{ + "Caller": "CheckJobs", + }).Info("Still found running tasks. No agent to be removed") + } else { + if len(ownedNodes) == 0 { + log.WithFields(log.Fields{ + "Caller": "CheckJobs", + }).Info("Nothing running and not owning any nodes") + } else { + log.WithFields(log.Fields{ + "Caller": "CheckJobs", + }).Info("No tasks running. Will remove agents") + Decom(cfg, ownedNodes) + } + } +} + +func Decom(cfg *config.Config, ownedNodes []hcloud.Server) { + for _, server := range ownedNodes { + if cfg.CostOptimizedMode { + runtime, err := hetzner.CheckRuntime(cfg, &server) + if err != nil { + log.WithFields(log.Fields{ + "Caller": "Decom", + }).Warnf("Error while checking runtime of node %s: %s", server.Name, err.Error()) + } + log.WithFields(log.Fields{ + "Caller": "Decom", + }).Debugf("Node %s is running for %f", server.Name, runtime.Minutes()) + // Check if next check if sooner than the 60 Minute mark of the next hetzner check + // https://docs.hetzner.com/cloud/billing/faq/#how-do-you-bill-your-servers + if (runtime + time.Duration(cfg.CheckInterval)*time.Minute) < 60 { + log.WithFields(log.Fields{ + "Caller": "Decom", + }).Infof("Skipping node termination of %s (running for %f Minutes) in Cost Optimized Mode", server.Name, runtime.Minutes()) + continue + } + } + agentId, err := hetzner.DecomNode(cfg, &server) + if err != nil { + log.WithFields(log.Fields{ + "Caller": "Decom", + }).Warnf("Error while deleting node %s: %s", server.Name, err.Error()) + } + err = woodpecker.DecomAgent(cfg, agentId) + if err != nil { + log.WithFields(log.Fields{ + "Caller": "Decom", + }).Warnf("Could not delete node %s in woodpecker: %s", server.Name, err.Error()) + } + log.WithFields(log.Fields{ + "Caller": "Decom", + }).Infof("Deleted node %s", server.Name) + } +} + func main() { cfg, err := config.GenConfig() @@ -52,75 +152,9 @@ func main() { "Caller": "Main", }).Infof("Currently owning %d Agents", len(ownedNodes)) if pendingTasks > len(ownedNodes) { - agent, err := woodpecker.CreateWoodpeckerAgent(cfg) - if err != nil { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Fatal(fmt.Sprintf("Error creating new agent: %s", err.Error())) - } - server, err := hetzner.CreateNewAgent(cfg, agent) - if err != nil { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Fatal(fmt.Sprintf("Error spawning new agent: %s", err.Error())) - } - for { - server, err = hetzner.RefreshNodeInfo(cfg, server.ID) - if err != nil { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Fatal(fmt.Sprintf("Failed to start Agent: %s", err.Error())) - } - if server.Status == hcloud.ServerStatusRunning { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Infof("%s started!", server.Name) - break - } - log.WithFields(log.Fields{ - "Caller": "Main", - }).Infof("%s is in status %s", server.Name, server.Status) - time.Sleep(30 * time.Second) - } + SpawnNewAgent(cfg) } else { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Info("Checking if agents can be removed") - runningTasks, err := woodpecker.CheckRunning(cfg) - if err != nil { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Fatal(fmt.Sprintf("Error checking woodpecker queue: %s", err.Error())) - } - if (runningTasks <= len(ownedNodes) && runningTasks != 0) || pendingTasks > 0 { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Info("Still found running tasks. No agent to be removed") - } else { - if len(ownedNodes) == 0 { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Info("Nothing running and not owning any nodes") - } else { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Info("No tasks running. Will remove agents") - for _, server := range ownedNodes { - agentId, err := hetzner.DecomNode(cfg, &server) - if err != nil { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Warnf("Error while deleting node %s: %s", server.Name, err.Error()) - } - err = woodpecker.DecomAgent(cfg, agentId) - if err != nil { - log.WithFields(log.Fields{ - "Caller": "Main", - }).Warnf("Could not delete node %s in woodpecker: %s", server.Name, err.Error()) - } - } - } - } + CheckJobs(cfg, ownedNodes, pendingTasks) } log.WithFields(log.Fields{ "Caller": "Main", diff --git a/internal/config/config.go b/internal/config/config.go index ec93307..58a6405 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -12,6 +12,7 @@ type Config = struct { LogLevel string `default:"Info" env:"WOODPECKER_AUTOSCALER_LOGLEVEL"` CheckInterval int `default:"15" env:"WOODPECKER_AUTOSCALER_CHECK_INTERVAL"` DryRun bool `default:"false" env:"WOODPECKER_AUTOSCALER_DRY_RUN"` + CostOptimizedMode bool `default:"false" env:"WOODPECKER_AUTOSCALER_COST_OPTIMIZED"` WoodpeckerLabelSelector string `default:"uploadfilter24.eu/instance-role=Woodpecker" env:"WOODPECKER_AUTOSCALER_WOODPECKER_LABEL_SELECTOR"` WoodpeckerInstance string `default:"" env:"WOODPECKER_AUTOSCALER_WOODPECKER_INSTANCE"` WoodpeckerGrpc string `default:"" env:"WOODPECKER_AUTOSCALER_WOODPECKER_GRPC"` diff --git a/internal/hetzner/hetzneragent.go b/internal/hetzner/hetzneragent.go index f02f45b..31365c6 100644 --- a/internal/hetzner/hetzneragent.go +++ b/internal/hetzner/hetzneragent.go @@ -8,6 +8,7 @@ import ( "strconv" "strings" "text/template" + "time" "git.uploadfilter24.eu/covidnetes/woodpecker-autoscaler/internal/config" "git.uploadfilter24.eu/covidnetes/woodpecker-autoscaler/internal/models" @@ -173,3 +174,12 @@ func RefreshNodeInfo(cfg *config.Config, serverID int) (*hcloud.Server, error) { } return server, nil } + +func CheckRuntime(cfg *config.Config, server *hcloud.Server) (time.Duration, error) { + server, err := RefreshNodeInfo(cfg, server.ID) + now := time.Now() + if err != nil { + return time.Duration(0), errors.New(fmt.Sprintf("Could not check Runtime: %s", err.Error())) + } + return server.Created.Sub(now), nil +}