Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
2072462
Update Certs
Honigeintopf Oct 25, 2024
65cc193
Update Readme to include "-n firewall"
Honigeintopf Oct 25, 2024
68d79ea
Created test to check if unhealty firewall is replaced when unhealthy
Honigeintopf Oct 28, 2024
fd71798
Added delte after healthtimeout is exceeded, still need to adjust int…
Honigeintopf Oct 31, 2024
0de0032
Added integration tests and deletion of fw after unhealthytimeout
Honigeintopf Nov 4, 2024
9605a18
refactor
Honigeintopf Nov 4, 2024
c6b5758
Fix Refactoring
Honigeintopf Nov 4, 2024
2fa826d
Finish refactor
Honigeintopf Nov 7, 2024
47f4029
Updated allocation timeout to longer than created timeout
Honigeintopf Nov 7, 2024
21d648c
Check if firewall is creating before setting allocation timeout
Honigeintopf Nov 7, 2024
4d9affd
Updated with seed
Honigeintopf Nov 7, 2024
0262546
update integration test
Honigeintopf Nov 8, 2024
fe0994c
Adjust test to not use retry on conflict
Honigeintopf Nov 8, 2024
3c98792
Merge branch 'main' into firewall-health-check
Honigeintopf May 19, 2025
41371c9
Merge branch 'main' into firewall-health-check
majst01 Oct 28, 2025
aec1033
Update integration/integration_test.go
Honigeintopf Jan 22, 2026
15bdf7b
check for allocation timeout set
Honigeintopf Jan 22, 2026
0510288
Merge branch 'main' into firewall-health-check
Gerrit91 Jan 22, 2026
8a4f4dc
Update controllers/set/status.go
Honigeintopf Jan 23, 2026
31b364e
Update controllers/set/status.go
Honigeintopf Jan 23, 2026
6e4d69c
Update controllers/set/status.go
Honigeintopf Jan 23, 2026
3d92644
Apply suggestions from code review
Honigeintopf Jan 27, 2026
1323e3f
set seed reconcile time
Honigeintopf Feb 4, 2026
8472f4e
remove annotation of fw to set reconcile connected but never reconciled.
Honigeintopf Feb 4, 2026
8cf61d4
only apply health timeoput if we actually have a seed connected once
Honigeintopf Feb 4, 2026
851fb18
allow 0s timeout to disable health timeout
Honigeintopf Feb 4, 2026
f4574a6
set health timeout if cond not met and fw phase running
Honigeintopf Feb 9, 2026
f6afb92
new condition foir fw
Honigeintopf Feb 10, 2026
d6f38a2
use monitor specific conditions
Honigeintopf Feb 10, 2026
1e2328f
Update api/v2/types_firewall.go
Honigeintopf Feb 10, 2026
7aa9704
Merge remote-tracking branch 'origin/main' into firewall-health-check
Gerrit91 Feb 11, 2026
f69eef7
Re-iterate status evaluation. (#86)
Gerrit91 Feb 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ To play with the FCM, you can also run this controller inside the [mini-lab](htt
1. Deploy the FCM into the mini-lab with `make deploy`
1. Adapt the example [firewalldeployment.yaml](config/examples/firewalldeployment.yaml) and apply with `kubectl apply -f config/examples/firewalldeployment.yaml`
1. Note that the firewall-controller will not be able to connect to the mini-lab due to network restrictions, so the firewall will not get ready.
- You can make the firewall become ready anyway by setting the annotation `kubectl annotate fw <fw-nsme> firewall.metal-stack.io/no-controller-connection=true`
- You can make the firewall become ready anyway by setting the annotation `kubectl annotate fw <fw-nsme> -n firewall firewall.metal-stack.io/no-controller-connection=true`
4 changes: 2 additions & 2 deletions api/v2/config/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,10 @@ func (c *NewControllerConfig) validate() error {
if c.ProgressDeadline <= 0 {
return fmt.Errorf("progress deadline must be specified")
}
if c.FirewallHealthTimeout <= 0 {
if c.FirewallHealthTimeout < 0 {
return fmt.Errorf("firewall health timeout must be specified")
}
if c.CreateTimeout <= 0 {
if c.CreateTimeout < 0 {
return fmt.Errorf("create timeout must be specified")
}

Expand Down
171 changes: 171 additions & 0 deletions api/v2/types_firewall.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package v2

import (
"fmt"
"sort"
"strconv"
"time"

"github.com/metal-stack/metal-lib/pkg/pointer"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -183,6 +185,9 @@ const (
FirewallMonitorDeployed ConditionType = "MonitorDeployed"
// FirewallDistanceConfigured indicates that the firewall-controller has configured the given firewall distance.
FirewallDistanceConfigured ConditionType = "Distance"
// FirewallProvisioned indicates that all health conditions have been met at least once.
// Once set to true, it stays true and is used to detect condition degradation.
FirewallProvisioned ConditionType = "Provisioned"
)

// ShootAccess contains secret references to construct a shoot client in the firewall-controller to update its firewall monitor.
Expand Down Expand Up @@ -351,3 +356,169 @@ func SortFirewallsByImportance(fws []*Firewall) {
return !a.CreationTimestamp.Before(&b.CreationTimestamp)
})
}

type (
FirewallStatusResult string

FirewallStatusEvalResult struct {
Result FirewallStatusResult
Reason string
TimeoutIn *time.Duration
}
)

const (
FirewallStatusReady FirewallStatusResult = "ready"
FirewallStatusProgressing FirewallStatusResult = "progressing"
FirewallStatusUnhealthy FirewallStatusResult = "unhealthy"
FirewallStatusHealthTimeout FirewallStatusResult = "health-timeout"
FirewallStatusCreateTimeout FirewallStatusResult = "create-timeout"
)

func EvaluateFirewallStatus(fw *Firewall, createTimeout, healthTimeout time.Duration) *FirewallStatusEvalResult {
var (
checkForTimeout = func(fw *Firewall, condition ConditionType, timeout time.Duration) (time.Duration, bool) {
if timeout == 0 {
return 0, false
}

var (
cond = pointer.SafeDeref(fw.Status.Conditions.Get(condition))
transitionTime = cond.LastTransitionTime.Time
deadline = time.Until(transitionTime.Add(timeout))
)

if deadline < 0 {
return 0, true
}

return deadline, false
}

collectUnhealthyConditions = func(cts ...ConditionType) []*Condition {
var res []*Condition

for _, ct := range cts {
cond := fw.Status.Conditions.Get(ct)
if cond == nil {
res = append(res, &Condition{Type: ct})
} else if cond.Status != ConditionTrue {
res = append(res, cond)
}
}

return res
}

unhealthyTypes []string
timeoutIn *time.Duration
)

switch fw.Status.Phase {
case FirewallPhaseCreating, FirewallPhaseCrashing:
unhealthyConds := collectUnhealthyConditions(
FirewallCreated,
FirewallReady,
FirewallProvisioned,
)

if len(unhealthyConds) == 0 {
return &FirewallStatusEvalResult{
Result: FirewallStatusReady,
Reason: "",
}
}

if createTimeout > 0 {
if t, ok := checkForTimeout(fw, FirewallReady, createTimeout); ok {
return &FirewallStatusEvalResult{
Result: FirewallStatusCreateTimeout,
Reason: fmt.Sprintf("%s create timeout exceeded, firewall not provisioned in time", createTimeout.String()),
}
} else if createTimeout != 0 {
timeoutIn = &t
}
}

for _, c := range unhealthyConds {
unhealthyTypes = append(unhealthyTypes, string(c.Type))
}

return &FirewallStatusEvalResult{
Result: FirewallStatusProgressing,
Reason: fmt.Sprintf("not all health conditions are true: %v", unhealthyTypes),
TimeoutIn: timeoutIn,
}

case FirewallPhaseRunning:
fallthrough

default:
unhealthyConds := collectUnhealthyConditions(
FirewallCreated,
FirewallReady,
FirewallProvisioned,
FirewallControllerConnected,
FirewallControllerSeedConnected,
FirewallDistanceConfigured,
)

if len(unhealthyConds) == 0 {
return &FirewallStatusEvalResult{
Result: FirewallStatusReady,
Reason: "",
}
}

var (
ready = pointer.SafeDeref(fw.Status.Conditions.Get(FirewallReady)).Status == ConditionTrue
provisioned = pointer.SafeDeref(fw.Status.Conditions.Get(FirewallProvisioned)).Status == ConditionTrue
connected = pointer.SafeDeref(fw.Status.Conditions.Get(FirewallControllerConnected)).Status == ConditionTrue
seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(FirewallControllerSeedConnected)).Status == ConditionTrue
)

if provisioned {
switch {
case !seedConnected:
if t, ok := checkForTimeout(fw, FirewallControllerSeedConnected, healthTimeout); ok {
return &FirewallStatusEvalResult{
Result: FirewallStatusHealthTimeout,
Reason: fmt.Sprintf("%s health timeout exceeded, seed connection lost", healthTimeout.String()),
}
} else if healthTimeout != 0 {
timeoutIn = &t
}

case !connected:
if t, ok := checkForTimeout(fw, FirewallControllerConnected, healthTimeout); ok {
return &FirewallStatusEvalResult{
Result: FirewallStatusHealthTimeout,
Reason: fmt.Sprintf("%s health timeout exceeded, firewall monitor not reconciled anymore", healthTimeout.String()),
}
} else if healthTimeout != 0 {
timeoutIn = &t
}

case !ready:
if t, ok := checkForTimeout(fw, FirewallReady, healthTimeout); ok {
return &FirewallStatusEvalResult{
Result: FirewallStatusHealthTimeout,
Reason: fmt.Sprintf("%s health timeout exceeded, firewall is not ready from perspective of the metal-api", healthTimeout.String()),
}
} else if healthTimeout != 0 {
timeoutIn = &t
}
}
}

for _, c := range unhealthyConds {
unhealthyTypes = append(unhealthyTypes, string(c.Type))
}

return &FirewallStatusEvalResult{
Result: FirewallStatusUnhealthy,
Reason: fmt.Sprintf("not all health conditions are true: %v", unhealthyTypes),
TimeoutIn: timeoutIn,
}
}
}
Loading
Loading