(feat) Cost optimized mode #8

Merged
lerentis merged 1 commits from feature/tt/implement-cost-optimization into main 2024-01-03 15:23:57 +00:00
5 changed files with 135 additions and 68 deletions

19
CHANGELOG Normal file
View File

@ -0,0 +1,19 @@
CHANGELOG
v1.1.0
Updated Dependencies
Restructured Main event loop
Cost optimized mode to make use of the fully hour that is billed by hetzner
v1.0.1
Fix woodpecker agent decom
v1.0.0
First stable release
v0.0.1
First test release

View File

@ -32,6 +32,8 @@ env:
value: "define_it"
- name: WOODPECKER_AUTOSCALER_HCLOUD_SSH_KEY
value: "define_it"
- name: WOODPECKER_AUTOSCALER_COST_OPTIMIZED
value: "true"
```
you can also create a secret manually with these information and reference the existing secret like this in the `values.yaml`:
@ -79,6 +81,7 @@ WOODPECKER_AUTOSCALER_HCLOUD_TOKEN="define_it"
WOODPECKER_AUTOSCALER_HCLOUD_INSTANCE_TYPE=cpx21
WOODPECKER_AUTOSCALER_HCLOUD_LOCATION="define_it"
WOODPECKER_AUTOSCALER_HCLOUD_SSH_KEY="define_it"
WOODPECKER_AUTOSCALER_COST_OPTIMIZED="true"
```
Now reload the systemd daemons and start the service:

View File

@ -13,6 +13,106 @@ import (
log "github.com/sirupsen/logrus"
)
func SpawnNewAgent(cfg *config.Config) {
agent, err := woodpecker.CreateWoodpeckerAgent(cfg)
if err != nil {
log.WithFields(log.Fields{
"Caller": "SpawnNewAgent",
}).Fatal(fmt.Sprintf("Error creating new agent: %s", err.Error()))
}
server, err := hetzner.CreateNewAgent(cfg, agent)
if err != nil {
log.WithFields(log.Fields{
"Caller": "SpawnNewAgent",
}).Fatal(fmt.Sprintf("Error spawning new agent: %s", err.Error()))
}
for {
server, err = hetzner.RefreshNodeInfo(cfg, server.ID)
if err != nil {
log.WithFields(log.Fields{
"Caller": "SpawnNewAgent",
}).Fatal(fmt.Sprintf("Failed to start Agent: %s", err.Error()))
}
if server.Status == hcloud.ServerStatusRunning {
log.WithFields(log.Fields{
"Caller": "SpawnNewAgent",
}).Infof("%s started!", server.Name)
break
}
log.WithFields(log.Fields{
"Caller": "SpawnNewAgent",
}).Infof("%s is in status %s", server.Name, server.Status)
time.Sleep(30 * time.Second)
}
}
func CheckJobs(cfg *config.Config, ownedNodes []hcloud.Server, pendingTasks int) {
log.WithFields(log.Fields{
"Caller": "CheckJobs",
}).Info("Checking if agents can be removed")
runningTasks, err := woodpecker.CheckRunning(cfg)
if err != nil {
log.WithFields(log.Fields{
"Caller": "CheckJobs",
}).Fatal(fmt.Sprintf("Error checking woodpecker queue: %s", err.Error()))
}
if (runningTasks <= len(ownedNodes) && runningTasks != 0) || pendingTasks > 0 {
log.WithFields(log.Fields{
"Caller": "CheckJobs",
}).Info("Still found running tasks. No agent to be removed")
} else {
if len(ownedNodes) == 0 {
log.WithFields(log.Fields{
"Caller": "CheckJobs",
}).Info("Nothing running and not owning any nodes")
} else {
log.WithFields(log.Fields{
"Caller": "CheckJobs",
}).Info("No tasks running. Will remove agents")
Decom(cfg, ownedNodes)
}
}
}
func Decom(cfg *config.Config, ownedNodes []hcloud.Server) {
for _, server := range ownedNodes {
if cfg.CostOptimizedMode {
runtime, err := hetzner.CheckRuntime(cfg, &server)
if err != nil {
log.WithFields(log.Fields{
"Caller": "Decom",
}).Warnf("Error while checking runtime of node %s: %s", server.Name, err.Error())
}
log.WithFields(log.Fields{
"Caller": "Decom",
}).Debugf("Node %s is running for %f", server.Name, runtime.Minutes())
// Check if next check if sooner than the 60 Minute mark of the next hetzner check
// https://docs.hetzner.com/cloud/billing/faq/#how-do-you-bill-your-servers
if (runtime + time.Duration(cfg.CheckInterval)*time.Minute) < 60 {
log.WithFields(log.Fields{
"Caller": "Decom",
}).Infof("Skipping node termination of %s (running for %f Minutes) in Cost Optimized Mode", server.Name, runtime.Minutes())
continue
}
}
agentId, err := hetzner.DecomNode(cfg, &server)
if err != nil {
log.WithFields(log.Fields{
"Caller": "Decom",
}).Warnf("Error while deleting node %s: %s", server.Name, err.Error())
}
err = woodpecker.DecomAgent(cfg, agentId)
if err != nil {
log.WithFields(log.Fields{
"Caller": "Decom",
}).Warnf("Could not delete node %s in woodpecker: %s", server.Name, err.Error())
}
log.WithFields(log.Fields{
"Caller": "Decom",
}).Infof("Deleted node %s", server.Name)
}
}
func main() {
cfg, err := config.GenConfig()
@ -52,75 +152,9 @@ func main() {
"Caller": "Main",
}).Infof("Currently owning %d Agents", len(ownedNodes))
if pendingTasks > len(ownedNodes) {
agent, err := woodpecker.CreateWoodpeckerAgent(cfg)
if err != nil {
log.WithFields(log.Fields{
"Caller": "Main",
}).Fatal(fmt.Sprintf("Error creating new agent: %s", err.Error()))
}
server, err := hetzner.CreateNewAgent(cfg, agent)
if err != nil {
log.WithFields(log.Fields{
"Caller": "Main",
}).Fatal(fmt.Sprintf("Error spawning new agent: %s", err.Error()))
}
for {
server, err = hetzner.RefreshNodeInfo(cfg, server.ID)
if err != nil {
log.WithFields(log.Fields{
"Caller": "Main",
}).Fatal(fmt.Sprintf("Failed to start Agent: %s", err.Error()))
}
if server.Status == hcloud.ServerStatusRunning {
log.WithFields(log.Fields{
"Caller": "Main",
}).Infof("%s started!", server.Name)
break
}
log.WithFields(log.Fields{
"Caller": "Main",
}).Infof("%s is in status %s", server.Name, server.Status)
time.Sleep(30 * time.Second)
}
SpawnNewAgent(cfg)
} else {
log.WithFields(log.Fields{
"Caller": "Main",
}).Info("Checking if agents can be removed")
runningTasks, err := woodpecker.CheckRunning(cfg)
if err != nil {
log.WithFields(log.Fields{
"Caller": "Main",
}).Fatal(fmt.Sprintf("Error checking woodpecker queue: %s", err.Error()))
}
if (runningTasks <= len(ownedNodes) && runningTasks != 0) || pendingTasks > 0 {
log.WithFields(log.Fields{
"Caller": "Main",
}).Info("Still found running tasks. No agent to be removed")
} else {
if len(ownedNodes) == 0 {
log.WithFields(log.Fields{
"Caller": "Main",
}).Info("Nothing running and not owning any nodes")
} else {
log.WithFields(log.Fields{
"Caller": "Main",
}).Info("No tasks running. Will remove agents")
for _, server := range ownedNodes {
agentId, err := hetzner.DecomNode(cfg, &server)
if err != nil {
log.WithFields(log.Fields{
"Caller": "Main",
}).Warnf("Error while deleting node %s: %s", server.Name, err.Error())
}
err = woodpecker.DecomAgent(cfg, agentId)
if err != nil {
log.WithFields(log.Fields{
"Caller": "Main",
}).Warnf("Could not delete node %s in woodpecker: %s", server.Name, err.Error())
}
}
}
}
CheckJobs(cfg, ownedNodes, pendingTasks)
}
log.WithFields(log.Fields{
"Caller": "Main",

View File

@ -12,6 +12,7 @@ type Config = struct {
LogLevel string `default:"Info" env:"WOODPECKER_AUTOSCALER_LOGLEVEL"`
CheckInterval int `default:"15" env:"WOODPECKER_AUTOSCALER_CHECK_INTERVAL"`
DryRun bool `default:"false" env:"WOODPECKER_AUTOSCALER_DRY_RUN"`
CostOptimizedMode bool `default:"false" env:"WOODPECKER_AUTOSCALER_COST_OPTIMIZED"`
WoodpeckerLabelSelector string `default:"uploadfilter24.eu/instance-role=Woodpecker" env:"WOODPECKER_AUTOSCALER_WOODPECKER_LABEL_SELECTOR"`
WoodpeckerInstance string `default:"" env:"WOODPECKER_AUTOSCALER_WOODPECKER_INSTANCE"`
WoodpeckerGrpc string `default:"" env:"WOODPECKER_AUTOSCALER_WOODPECKER_GRPC"`

View File

@ -8,6 +8,7 @@ import (
"strconv"
"strings"
"text/template"
"time"
"git.uploadfilter24.eu/covidnetes/woodpecker-autoscaler/internal/config"
"git.uploadfilter24.eu/covidnetes/woodpecker-autoscaler/internal/models"
@ -173,3 +174,12 @@ func RefreshNodeInfo(cfg *config.Config, serverID int) (*hcloud.Server, error) {
}
return server, nil
}
func CheckRuntime(cfg *config.Config, server *hcloud.Server) (time.Duration, error) {
server, err := RefreshNodeInfo(cfg, server.ID)
now := time.Now()
if err != nil {
return time.Duration(0), errors.New(fmt.Sprintf("Could not check Runtime: %s", err.Error()))
}
return server.Created.Sub(now), nil
}