From 2072462b4a7755c84aecc2a48744f70474197352 Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Fri, 25 Oct 2024 14:49:54 +0200 Subject: [PATCH 01/28] Update Certs --- config/examples/certs/ca-key.pem | 6 +- config/examples/certs/ca.pem | 16 ++--- config/examples/certs/tls.crt | 22 +++--- config/examples/certs/tls.key | 6 +- config/examples/kustomize/patch-webhooks.yaml | 72 +++++++++---------- 5 files changed, 61 insertions(+), 61 deletions(-) diff --git a/config/examples/certs/ca-key.pem b/config/examples/certs/ca-key.pem index 8f5fb66..542f656 100644 --- a/config/examples/certs/ca-key.pem +++ b/config/examples/certs/ca-key.pem @@ -1,5 +1,5 @@ -----BEGIN EC PRIVATE KEY----- -MHcCAQEEIBRabFggNFg6LUPxY5AeplDzeqZQmnsnFY9OmWQW2eGBoAoGCCqGSM49 -AwEHoUQDQgAEkP91tJGv5pIytEgKOlwTeksfWC1MczdEmj8ouOiaQfFvCkLl5NB/ -uRLrjoR8vDamER2UM+BumDy1XfM849aIww== +MHcCAQEEIMdzRnQT5XJYI5YdllH2IC4TDpkkoswIUSPxVggCmz8uoAoGCCqGSM49 +AwEHoUQDQgAEzPBxsUSwbxKnyOHzLBxJtne4EKF2dktJ7cgiq88H4i2QWvH8Eu5f +WlSuos1/tjF7NdnZwdR3F09M3FWN2z32vw== -----END EC PRIVATE KEY----- diff --git a/config/examples/certs/ca.pem b/config/examples/certs/ca.pem index dbb0bc8..01cbd20 100644 --- a/config/examples/certs/ca.pem +++ b/config/examples/certs/ca.pem @@ -1,12 +1,12 @@ -----BEGIN CERTIFICATE----- -MIIBvTCCAWSgAwIBAgIUY2eiJLpYQK4h35iDJbGsUPZlsAcwCgYIKoZIzj0EAwIw +MIIBvTCCAWSgAwIBAgIUK74MlGBl5v/PxcvYR1gX/4ZahecwCgYIKoZIzj0EAwIw PTELMAkGA1UEBhMCREUxDzANBgNVBAgTBk11bmljaDEQMA4GA1UEBxMHQmF2YXJp -YTELMAkGA1UEAxMCY2EwHhcNMjMwNDE4MDc1NDAwWhcNMjgwNDE2MDc1NDAwWjA9 +YTELMAkGA1UEAxMCY2EwHhcNMjQxMDI1MTI0MDAwWhcNMjkxMDI0MTI0MDAwWjA9 MQswCQYDVQQGEwJERTEPMA0GA1UECBMGTXVuaWNoMRAwDgYDVQQHEwdCYXZhcmlh -MQswCQYDVQQDEwJjYTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABJD/dbSRr+aS -MrRICjpcE3pLH1gtTHM3RJo/KLjomkHxbwpC5eTQf7kS646EfLw2phEdlDPgbpg8 -tV3zPOPWiMOjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0G -A1UdDgQWBBRL7+6t0aYt/vvqePoDdyJsQ6DQ5jAKBggqhkjOPQQDAgNHADBEAiB5 -4nITXzq23b7HZWf/TN22DQX+9Ajc2xOws2lwlx8TpQIgSP0zTa3yGeabqBgjmANZ -GTYZaSABLBAoQ1Lt5E6sCVs= +MQswCQYDVQQDEwJjYTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABMzwcbFEsG8S +p8jh8ywcSbZ3uBChdnZLSe3IIqvPB+ItkFrx/BLuX1pUrqLNf7YxezXZ2cHUdxdP +TNxVjds99r+jQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0G +A1UdDgQWBBRmKUtHhVtOaft2ka15nfnH6agg8zAKBggqhkjOPQQDAgNHADBEAiAz +dCfM0jLlTDzaEXz5z1XEg8LhJWQV5YYoF+DUlJiU/gIgfSvcno9zARAKNNH06qF0 +XCzKTrC60QhD+N1wFN7X2og= -----END CERTIFICATE----- diff --git a/config/examples/certs/tls.crt b/config/examples/certs/tls.crt index 58f1a0f..8df5e63 100644 --- a/config/examples/certs/tls.crt +++ b/config/examples/certs/tls.crt @@ -1,15 +1,15 @@ -----BEGIN CERTIFICATE----- -MIICRDCCAeqgAwIBAgIUHwoSR0+noLCqqJ10vEJkTAng4GowCgYIKoZIzj0EAwIw +MIICQzCCAeqgAwIBAgIUZtyTg/sZOeE2HL7hDL6lVCo+QBcwCgYIKoZIzj0EAwIw PTELMAkGA1UEBhMCREUxDzANBgNVBAgTBk11bmljaDEQMA4GA1UEBxMHQmF2YXJp -YTELMAkGA1UEAxMCY2EwHhcNMjMwNDE4MDc1NDAwWhcNMjQwNDE3MDc1NDAwWjBE +YTELMAkGA1UEAxMCY2EwHhcNMjQxMDI1MTI0MDAwWhcNMjUxMDI1MTI0MDAwWjBE MQswCQYDVQQGEwJERTEPMA0GA1UECBMGTXVuaWNoMRAwDgYDVQQHEwdCYXZhcmlh -MRIwEAYDVQQDEwlsb2NhbGhvc3QwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAARf -FNJn/7dufCbR0AC+BnTyvhn98yvOiD+ASWXaVYeBgsuB9GfUWlVyp+fjdAkgWNZd -4S4uNz6aD1G/KlE6GBFQo4HAMIG9MA4GA1UdDwEB/wQEAwIFoDAdBgNVHSUEFjAU -BggrBgEFBQcDAQYIKwYBBQUHAwIwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQUj7eN -RLAK/BYVXeJfk6iS1xyJZRowHwYDVR0jBBgwFoAUS+/urdGmLf776nj6A3cibEOg -0OYwPgYDVR0RBDcwNYIJbG9jYWxob3N0gihmaXJld2FsbC1jb250cm9sbGVyLW1h -bmFnZXIuZmlyZXdhbGwuc3ZjMAoGCCqGSM49BAMCA0gAMEUCIQDsfaRwE5W901yK -JAQfSYlT+txLN8cdseHeDLXTwBo2IAIgV0g9f6F8KbyY6dvPHkoArRbZMIa3PFyL -/rflwrZzrPY= +MRIwEAYDVQQDEwlsb2NhbGhvc3QwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAARN +eruOjegpfrIkOew6QNy5HsOXzL+Oie/ubpUxphleQhX7/pLjGNvo8ueWDyN0ZZ0G +vxexgYUDZkXh19dg9RzQo4HAMIG9MA4GA1UdDwEB/wQEAwIFoDAdBgNVHSUEFjAU +BggrBgEFBQcDAQYIKwYBBQUHAwIwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQUyxBq +6HMZNcJlyn+b0GRQqPwvepgwHwYDVR0jBBgwFoAUZilLR4VbTmn7dpGteZ35x+mo +IPMwPgYDVR0RBDcwNYIJbG9jYWxob3N0gihmaXJld2FsbC1jb250cm9sbGVyLW1h +bmFnZXIuZmlyZXdhbGwuc3ZjMAoGCCqGSM49BAMCA0cAMEQCIEIHZ3Uj6fNvYgKv +JbI28i8nsdF3PbCGhLW6XnFABwqBAiAP9KPZf9zAAN8DHum2s1sOYTVOHGm4drkq +NLAFeNNXbg== -----END CERTIFICATE----- diff --git a/config/examples/certs/tls.key b/config/examples/certs/tls.key index 6af5725..7e37484 100644 --- a/config/examples/certs/tls.key +++ b/config/examples/certs/tls.key @@ -1,5 +1,5 @@ -----BEGIN EC PRIVATE KEY----- -MHcCAQEEIGkp4UEW0A/611PSa/ryMg+7c2yB11ZqtA/GR1yMaeq+oAoGCCqGSM49 -AwEHoUQDQgAEXxTSZ/+3bnwm0dAAvgZ08r4Z/fMrzog/gEll2lWHgYLLgfRn1FpV -cqfn43QJIFjWXeEuLjc+mg9RvypROhgRUA== +MHcCAQEEIJZT9vmyYJDxyP3gyJpkeS02M0hgXlrrrjTCmlmUOcQ0oAoGCCqGSM49 +AwEHoUQDQgAETXq7jo3oKX6yJDnsOkDcuR7Dl8y/jonv7m6VMaYZXkIV+/6S4xjb +6PLnlg8jdGWdBr8XsYGFA2ZF4dfXYPUc0A== -----END EC PRIVATE KEY----- diff --git a/config/examples/kustomize/patch-webhooks.yaml b/config/examples/kustomize/patch-webhooks.yaml index 5a5d127..9ab2d62 100644 --- a/config/examples/kustomize/patch-webhooks.yaml +++ b/config/examples/kustomize/patch-webhooks.yaml @@ -4,45 +4,45 @@ kind: MutatingWebhookConfiguration metadata: name: mutating-webhook-configuration webhooks: -- name: firewall.metal-stack.io - clientConfig: - caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVWTJlaUpMcFlRSzRoMzVpREpiR3NVUFpsc0Fjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05Nak13TkRFNE1EYzFOREF3V2hjTk1qZ3dOREUyTURjMU5EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJKRC9kYlNScithUwpNclJJQ2pwY0UzcExIMWd0VEhNM1JKby9LTGpvbWtIeGJ3cEM1ZVRRZjdrUzY0NkVmTHcycGhFZGxEUGdicGc4CnRWM3pQT1BXaU1PalFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJMNys2dDBhWXQvdnZxZVBvRGR5SnNRNkRRNWpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlCNQo0bklUWHpxMjNiN0haV2YvVE4yMkRRWCs5QWpjMnhPd3MybHdseDhUcFFJZ1NQMHpUYTN5R2VhYnFCZ2ptQU5aCkdUWVphU0FCTEJBb1ExTHQ1RTZzQ1ZzPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== - service: - name: firewall-controller-manager - namespace: firewall -- name: firewallset.metal-stack.io - clientConfig: - caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVWTJlaUpMcFlRSzRoMzVpREpiR3NVUFpsc0Fjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05Nak13TkRFNE1EYzFOREF3V2hjTk1qZ3dOREUyTURjMU5EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJKRC9kYlNScithUwpNclJJQ2pwY0UzcExIMWd0VEhNM1JKby9LTGpvbWtIeGJ3cEM1ZVRRZjdrUzY0NkVmTHcycGhFZGxEUGdicGc4CnRWM3pQT1BXaU1PalFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJMNys2dDBhWXQvdnZxZVBvRGR5SnNRNkRRNWpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlCNQo0bklUWHpxMjNiN0haV2YvVE4yMkRRWCs5QWpjMnhPd3MybHdseDhUcFFJZ1NQMHpUYTN5R2VhYnFCZ2ptQU5aCkdUWVphU0FCTEJBb1ExTHQ1RTZzQ1ZzPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== - service: - name: firewall-controller-manager - namespace: firewall -- name: firewalldeployment.metal-stack.io - clientConfig: - caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVWTJlaUpMcFlRSzRoMzVpREpiR3NVUFpsc0Fjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05Nak13TkRFNE1EYzFOREF3V2hjTk1qZ3dOREUyTURjMU5EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJKRC9kYlNScithUwpNclJJQ2pwY0UzcExIMWd0VEhNM1JKby9LTGpvbWtIeGJ3cEM1ZVRRZjdrUzY0NkVmTHcycGhFZGxEUGdicGc4CnRWM3pQT1BXaU1PalFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJMNys2dDBhWXQvdnZxZVBvRGR5SnNRNkRRNWpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlCNQo0bklUWHpxMjNiN0haV2YvVE4yMkRRWCs5QWpjMnhPd3MybHdseDhUcFFJZ1NQMHpUYTN5R2VhYnFCZ2ptQU5aCkdUWVphU0FCTEJBb1ExTHQ1RTZzQ1ZzPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== - service: - name: firewall-controller-manager - namespace: firewall + - name: firewall.metal-stack.io + clientConfig: + caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVSzc0TWxHQmw1di9QeGN2WVIxZ1gvNFphaGVjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05NalF4TURJMU1USTBNREF3V2hjTk1qa3hNREkwTVRJME1EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJNendjYkZFc0c4UwpwOGpoOHl3Y1NiWjN1QkNoZG5aTFNlM0lJcXZQQitJdGtGcngvQkx1WDFwVXJxTE5mN1l4ZXpYWjJjSFVkeGRQClROeFZqZHM5OXIralFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJtS1V0SGhWdE9hZnQya2ExNW5mbkg2YWdnOHpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlBegpkQ2ZNMGpMbFREemFFWHo1ejFYRWc4TGhKV1FWNVlZb0YrRFVsSmlVL2dJZ2ZTdmNubzl6QVJBS05OSDA2cUYwClhDektUckM2MFFoRCtOMXdGTjdYMm9nPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + service: + name: firewall-controller-manager + namespace: firewall + - name: firewallset.metal-stack.io + clientConfig: + caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVSzc0TWxHQmw1di9QeGN2WVIxZ1gvNFphaGVjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05NalF4TURJMU1USTBNREF3V2hjTk1qa3hNREkwTVRJME1EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJNendjYkZFc0c4UwpwOGpoOHl3Y1NiWjN1QkNoZG5aTFNlM0lJcXZQQitJdGtGcngvQkx1WDFwVXJxTE5mN1l4ZXpYWjJjSFVkeGRQClROeFZqZHM5OXIralFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJtS1V0SGhWdE9hZnQya2ExNW5mbkg2YWdnOHpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlBegpkQ2ZNMGpMbFREemFFWHo1ejFYRWc4TGhKV1FWNVlZb0YrRFVsSmlVL2dJZ2ZTdmNubzl6QVJBS05OSDA2cUYwClhDektUckM2MFFoRCtOMXdGTjdYMm9nPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + service: + name: firewall-controller-manager + namespace: firewall + - name: firewalldeployment.metal-stack.io + clientConfig: + caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVSzc0TWxHQmw1di9QeGN2WVIxZ1gvNFphaGVjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05NalF4TURJMU1USTBNREF3V2hjTk1qa3hNREkwTVRJME1EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJNendjYkZFc0c4UwpwOGpoOHl3Y1NiWjN1QkNoZG5aTFNlM0lJcXZQQitJdGtGcngvQkx1WDFwVXJxTE5mN1l4ZXpYWjJjSFVkeGRQClROeFZqZHM5OXIralFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJtS1V0SGhWdE9hZnQya2ExNW5mbkg2YWdnOHpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlBegpkQ2ZNMGpMbFREemFFWHo1ejFYRWc4TGhKV1FWNVlZb0YrRFVsSmlVL2dJZ2ZTdmNubzl6QVJBS05OSDA2cUYwClhDektUckM2MFFoRCtOMXdGTjdYMm9nPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + service: + name: firewall-controller-manager + namespace: firewall --- apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: name: validating-webhook-configuration webhooks: -- name: firewall.metal-stack.io - clientConfig: - caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVWTJlaUpMcFlRSzRoMzVpREpiR3NVUFpsc0Fjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05Nak13TkRFNE1EYzFOREF3V2hjTk1qZ3dOREUyTURjMU5EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJKRC9kYlNScithUwpNclJJQ2pwY0UzcExIMWd0VEhNM1JKby9LTGpvbWtIeGJ3cEM1ZVRRZjdrUzY0NkVmTHcycGhFZGxEUGdicGc4CnRWM3pQT1BXaU1PalFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJMNys2dDBhWXQvdnZxZVBvRGR5SnNRNkRRNWpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlCNQo0bklUWHpxMjNiN0haV2YvVE4yMkRRWCs5QWpjMnhPd3MybHdseDhUcFFJZ1NQMHpUYTN5R2VhYnFCZ2ptQU5aCkdUWVphU0FCTEJBb1ExTHQ1RTZzQ1ZzPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== - service: - name: firewall-controller-manager - namespace: firewall -- name: firewallset.metal-stack.io - clientConfig: - caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVWTJlaUpMcFlRSzRoMzVpREpiR3NVUFpsc0Fjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05Nak13TkRFNE1EYzFOREF3V2hjTk1qZ3dOREUyTURjMU5EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJKRC9kYlNScithUwpNclJJQ2pwY0UzcExIMWd0VEhNM1JKby9LTGpvbWtIeGJ3cEM1ZVRRZjdrUzY0NkVmTHcycGhFZGxEUGdicGc4CnRWM3pQT1BXaU1PalFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJMNys2dDBhWXQvdnZxZVBvRGR5SnNRNkRRNWpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlCNQo0bklUWHpxMjNiN0haV2YvVE4yMkRRWCs5QWpjMnhPd3MybHdseDhUcFFJZ1NQMHpUYTN5R2VhYnFCZ2ptQU5aCkdUWVphU0FCTEJBb1ExTHQ1RTZzQ1ZzPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== - service: - name: firewall-controller-manager - namespace: firewall -- name: firewalldeployment.metal-stack.io - clientConfig: - caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVWTJlaUpMcFlRSzRoMzVpREpiR3NVUFpsc0Fjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05Nak13TkRFNE1EYzFOREF3V2hjTk1qZ3dOREUyTURjMU5EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJKRC9kYlNScithUwpNclJJQ2pwY0UzcExIMWd0VEhNM1JKby9LTGpvbWtIeGJ3cEM1ZVRRZjdrUzY0NkVmTHcycGhFZGxEUGdicGc4CnRWM3pQT1BXaU1PalFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJMNys2dDBhWXQvdnZxZVBvRGR5SnNRNkRRNWpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlCNQo0bklUWHpxMjNiN0haV2YvVE4yMkRRWCs5QWpjMnhPd3MybHdseDhUcFFJZ1NQMHpUYTN5R2VhYnFCZ2ptQU5aCkdUWVphU0FCTEJBb1ExTHQ1RTZzQ1ZzPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== - service: - name: firewall-controller-manager - namespace: firewall + - name: firewall.metal-stack.io + clientConfig: + caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVSzc0TWxHQmw1di9QeGN2WVIxZ1gvNFphaGVjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05NalF4TURJMU1USTBNREF3V2hjTk1qa3hNREkwTVRJME1EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJNendjYkZFc0c4UwpwOGpoOHl3Y1NiWjN1QkNoZG5aTFNlM0lJcXZQQitJdGtGcngvQkx1WDFwVXJxTE5mN1l4ZXpYWjJjSFVkeGRQClROeFZqZHM5OXIralFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJtS1V0SGhWdE9hZnQya2ExNW5mbkg2YWdnOHpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlBegpkQ2ZNMGpMbFREemFFWHo1ejFYRWc4TGhKV1FWNVlZb0YrRFVsSmlVL2dJZ2ZTdmNubzl6QVJBS05OSDA2cUYwClhDektUckM2MFFoRCtOMXdGTjdYMm9nPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + service: + name: firewall-controller-manager + namespace: firewall + - name: firewallset.metal-stack.io + clientConfig: + caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVSzc0TWxHQmw1di9QeGN2WVIxZ1gvNFphaGVjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05NalF4TURJMU1USTBNREF3V2hjTk1qa3hNREkwTVRJME1EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJNendjYkZFc0c4UwpwOGpoOHl3Y1NiWjN1QkNoZG5aTFNlM0lJcXZQQitJdGtGcngvQkx1WDFwVXJxTE5mN1l4ZXpYWjJjSFVkeGRQClROeFZqZHM5OXIralFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJtS1V0SGhWdE9hZnQya2ExNW5mbkg2YWdnOHpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlBegpkQ2ZNMGpMbFREemFFWHo1ejFYRWc4TGhKV1FWNVlZb0YrRFVsSmlVL2dJZ2ZTdmNubzl6QVJBS05OSDA2cUYwClhDektUckM2MFFoRCtOMXdGTjdYMm9nPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + service: + name: firewall-controller-manager + namespace: firewall + - name: firewalldeployment.metal-stack.io + clientConfig: + caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ2VENDQVdTZ0F3SUJBZ0lVSzc0TWxHQmw1di9QeGN2WVIxZ1gvNFphaGVjd0NnWUlLb1pJemowRUF3SXcKUFRFTE1Ba0dBMVVFQmhNQ1JFVXhEekFOQmdOVkJBZ1RCazExYm1samFERVFNQTRHQTFVRUJ4TUhRbUYyWVhKcApZVEVMTUFrR0ExVUVBeE1DWTJFd0hoY05NalF4TURJMU1USTBNREF3V2hjTk1qa3hNREkwTVRJME1EQXdXakE5Ck1Rc3dDUVlEVlFRR0V3SkVSVEVQTUEwR0ExVUVDQk1HVFhWdWFXTm9NUkF3RGdZRFZRUUhFd2RDWVhaaGNtbGgKTVFzd0NRWURWUVFERXdKallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJNendjYkZFc0c4UwpwOGpoOHl3Y1NiWjN1QkNoZG5aTFNlM0lJcXZQQitJdGtGcngvQkx1WDFwVXJxTE5mN1l4ZXpYWjJjSFVkeGRQClROeFZqZHM5OXIralFqQkFNQTRHQTFVZER3RUIvd1FFQXdJQkJqQVBCZ05WSFJNQkFmOEVCVEFEQVFIL01CMEcKQTFVZERnUVdCQlJtS1V0SGhWdE9hZnQya2ExNW5mbkg2YWdnOHpBS0JnZ3Foa2pPUFFRREFnTkhBREJFQWlBegpkQ2ZNMGpMbFREemFFWHo1ejFYRWc4TGhKV1FWNVlZb0YrRFVsSmlVL2dJZ2ZTdmNubzl6QVJBS05OSDA2cUYwClhDektUckM2MFFoRCtOMXdGTjdYMm9nPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + service: + name: firewall-controller-manager + namespace: firewall From 65cc19329eb80a9af1e5dff5096968a6ed06a6fa Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Fri, 25 Oct 2024 14:52:13 +0200 Subject: [PATCH 02/28] Update Readme to include "-n firewall" --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 27c45bc..190459d 100644 --- a/README.md +++ b/README.md @@ -51,4 +51,4 @@ To play with the FCM, you can also run this controller inside the [mini-lab](htt 1. Deploy the FCM into the mini-lab with `make deploy` 1. Adapt the example [firewalldeployment.yaml](config/examples/firewalldeployment.yaml) and apply with `kubectl apply -f config/examples/firewalldeployment.yaml` 1. Note that the firewall-controller will not be able to connect to the mini-lab due to network restrictions, so the firewall will not get ready. - - You can make the firewall become ready anyway by setting the annotation `kubectl annotate fw firewall.metal-stack.io/no-controller-connection=true` + - You can make the firewall become ready anyway by setting the annotation `kubectl annotate fw -n firewall firewall.metal-stack.io/no-controller-connection=true` From 68d79ea7cb1377ccdf0006f7121cc389ed1636f6 Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Mon, 28 Oct 2024 16:55:42 +0100 Subject: [PATCH 03/28] Created test to check if unhealty firewall is replaced when unhealthy --- integration/integration_test.go | 49 +++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/integration/integration_test.go b/integration/integration_test.go index daf0296..b4f6421 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -162,6 +162,53 @@ var _ = Context("integration test", Ordered, func() { Expect(client.IgnoreAlreadyExists(k8sClient.Create(ctx, shootTokenSecret.DeepCopy()))).To(Succeed()) }) + When("creating a firewall deployment that simulates unhealthiness", Ordered, func() { + var fwSet *v2.FirewallSet + + BeforeAll(func() { + // Create the Firewall Deployment + fwDeployment := deployment() + Expect(k8sClient.Create(ctx, fwDeployment)).To(Succeed()) + + // Wait for the FirewallSet to be created + Eventually(func() error { + fwSetList := &v2.FirewallSetList{} + err := k8sClient.List(ctx, fwSetList, client.InNamespace(namespaceName)) + if err != nil { + return err + } + if len(fwSetList.Items) == 0 { + return fmt.Errorf("no firewall sets found") + } + fwSet = &fwSetList.Items[0] + return nil + }, 15*time.Second, interval).Should(Succeed(), "FirewallSet should be created") + }) + + It("should update the deployment status to reflect the unhealthy replica", func() { + // Simulate unhealthiness by updating the FirewallSet status + fwSet.Status.UnhealthyReplicas = 1 + Expect(k8sClient.Status().Update(ctx, fwSet)).To(Succeed()) + + // Wait for the deployment status to reflect the unhealthy replica + Eventually(func() int { + fetchedDeployment := &v2.FirewallDeployment{} + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(deployment()), fetchedDeployment)).To(Succeed()) + return fetchedDeployment.Status.UnhealthyReplicas + }, 15*time.Second, interval).Should(Equal(1), "unhealthy replicas should be reported") + }) + + It("should eventually replace the unhealthy firewall", func() { + // Wait for the controller to replace the unhealthy firewall + Eventually(func() bool { + fwSetList := &v2.FirewallSetList{} + Expect(k8sClient.List(ctx, fwSetList, client.InNamespace(namespaceName))).To(Succeed()) + // Check if a new FirewallSet has been created + return len(fwSetList.Items) > 1 + }, 60*time.Second, interval).Should(BeTrue(), "A new FirewallSet should be created to replace the unhealthy one") + }) + }) + Describe("the rolling update", Ordered, func() { When("creating a firewall deployment", Ordered, func() { It("the creation works", func() { @@ -1910,5 +1957,7 @@ var _ = Context("integration test", Ordered, func() { }) }) }) + }) + }) From fd71798c937689ae9c2695a47fa106c23b443ab5 Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Thu, 31 Oct 2024 16:30:09 +0100 Subject: [PATCH 04/28] Added delte after healthtimeout is exceeded, still need to adjust integration tests --- controllers/set/delete.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/controllers/set/delete.go b/controllers/set/delete.go index 8843ff4..fa8f985 100644 --- a/controllers/set/delete.go +++ b/controllers/set/delete.go @@ -58,6 +58,20 @@ func (c *controller) deleteAfterTimeout(r *controllers.Ctx[*v2.FirewallSet], fws connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue + if c.isFirewallUnhealthy(fw) { + allocationTimestamp := pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp + if time.Since(allocationTimestamp.Time) > c.c.GetFirewallHealthTimeout() { + r.Log.Info("unhealthy firewall not recovering, deleting from set", "firewall-name", fw.Name) + + err := c.deleteFirewalls(r, fw) + if err != nil { + return nil, err + } + + result = append(result, fw) + continue + } + } if !connected && time.Since(fw.CreationTimestamp.Time) > c.c.GetCreateTimeout() { r.Log.Info("firewall not getting ready, deleting from set", "firewall-name", fw.Name) @@ -67,8 +81,19 @@ func (c *controller) deleteAfterTimeout(r *controllers.Ctx[*v2.FirewallSet], fws } result = append(result, fw) + } } return result, nil } + +func (c *controller) isFirewallUnhealthy(fw *v2.Firewall) bool { + created := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue + ready := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue + connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue + seedConnected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue + distance := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue + + return !(created && ready && connected && seedConnected && distance) +} From 0de0032727a286b53691594928f2a1e3864b64a1 Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Mon, 4 Nov 2024 14:40:51 +0100 Subject: [PATCH 05/28] Added integration tests and deletion of fw after unhealthytimeout --- controllers/set/delete.go | 51 ++++++------ controllers/set/reconcile.go | 2 +- integration/integration_test.go | 116 +++++++++++++++++----------- integration/metal_resources_test.go | 86 +++++++++++++++++++++ integration/suite_test.go | 2 +- 5 files changed, 186 insertions(+), 71 deletions(-) diff --git a/controllers/set/delete.go b/controllers/set/delete.go index fa8f985..4988449 100644 --- a/controllers/set/delete.go +++ b/controllers/set/delete.go @@ -46,32 +46,28 @@ func (c *controller) deleteFirewalls(r *controllers.Ctx[*v2.FirewallSet], fws .. return nil } -func (c *controller) deleteAfterTimeout(r *controllers.Ctx[*v2.FirewallSet], fws ...*v2.Firewall) ([]*v2.Firewall, error) { +func (c *controller) deleteIfUnhealthyOrTimeout(r *controllers.Ctx[*v2.FirewallSet], fws ...*v2.Firewall) ([]*v2.Firewall, error) { var result []*v2.Firewall for _, fw := range fws { fw := fw - if fw.Status.Phase != v2.FirewallPhaseCreating { - continue - } - connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue if c.isFirewallUnhealthy(fw) { - allocationTimestamp := pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp - if time.Since(allocationTimestamp.Time) > c.c.GetFirewallHealthTimeout() { - r.Log.Info("unhealthy firewall not recovering, deleting from set", "firewall-name", fw.Name) - - err := c.deleteFirewalls(r, fw) - if err != nil { - return nil, err - } - - result = append(result, fw) - continue + r.Log.Info("unhealthy firewall not recovering, deleting from set", "firewall-name", fw.Name) + err := c.deleteFirewalls(r, fw) + if err != nil { + return nil, err } + result = append(result, fw) + continue } + + if fw.Status.Phase != v2.FirewallPhaseCreating { + continue + } + if !connected && time.Since(fw.CreationTimestamp.Time) > c.c.GetCreateTimeout() { r.Log.Info("firewall not getting ready, deleting from set", "firewall-name", fw.Name) @@ -89,11 +85,22 @@ func (c *controller) deleteAfterTimeout(r *controllers.Ctx[*v2.FirewallSet], fws } func (c *controller) isFirewallUnhealthy(fw *v2.Firewall) bool { - created := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue - ready := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue - connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue - seedConnected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue - distance := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue - return !(created && ready && connected && seedConnected && distance) + var ( + created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue + ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue + connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue + seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue + distance = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue + ) + + if created && ready && connected && seedConnected && distance { + return false + } + + if created && time.Since(pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time) > c.c.GetFirewallHealthTimeout() { + return true + } + + return false } diff --git a/controllers/set/reconcile.go b/controllers/set/reconcile.go index 6591ea3..53ca2dd 100644 --- a/controllers/set/reconcile.go +++ b/controllers/set/reconcile.go @@ -109,7 +109,7 @@ func (c *controller) Reconcile(r *controllers.Ctx[*v2.FirewallSet]) error { } } - deletedFws, err := c.deleteAfterTimeout(r, ownedFirewalls...) + deletedFws, err := c.deleteIfUnhealthyOrTimeout(r, ownedFirewalls...) if err != nil { return err } diff --git a/integration/integration_test.go b/integration/integration_test.go index b4f6421..7d48b99 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -162,53 +162,6 @@ var _ = Context("integration test", Ordered, func() { Expect(client.IgnoreAlreadyExists(k8sClient.Create(ctx, shootTokenSecret.DeepCopy()))).To(Succeed()) }) - When("creating a firewall deployment that simulates unhealthiness", Ordered, func() { - var fwSet *v2.FirewallSet - - BeforeAll(func() { - // Create the Firewall Deployment - fwDeployment := deployment() - Expect(k8sClient.Create(ctx, fwDeployment)).To(Succeed()) - - // Wait for the FirewallSet to be created - Eventually(func() error { - fwSetList := &v2.FirewallSetList{} - err := k8sClient.List(ctx, fwSetList, client.InNamespace(namespaceName)) - if err != nil { - return err - } - if len(fwSetList.Items) == 0 { - return fmt.Errorf("no firewall sets found") - } - fwSet = &fwSetList.Items[0] - return nil - }, 15*time.Second, interval).Should(Succeed(), "FirewallSet should be created") - }) - - It("should update the deployment status to reflect the unhealthy replica", func() { - // Simulate unhealthiness by updating the FirewallSet status - fwSet.Status.UnhealthyReplicas = 1 - Expect(k8sClient.Status().Update(ctx, fwSet)).To(Succeed()) - - // Wait for the deployment status to reflect the unhealthy replica - Eventually(func() int { - fetchedDeployment := &v2.FirewallDeployment{} - Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(deployment()), fetchedDeployment)).To(Succeed()) - return fetchedDeployment.Status.UnhealthyReplicas - }, 15*time.Second, interval).Should(Equal(1), "unhealthy replicas should be reported") - }) - - It("should eventually replace the unhealthy firewall", func() { - // Wait for the controller to replace the unhealthy firewall - Eventually(func() bool { - fwSetList := &v2.FirewallSetList{} - Expect(k8sClient.List(ctx, fwSetList, client.InNamespace(namespaceName))).To(Succeed()) - // Check if a new FirewallSet has been created - return len(fwSetList.Items) > 1 - }, 60*time.Second, interval).Should(BeTrue(), "A new FirewallSet should be created to replace the unhealthy one") - }) - }) - Describe("the rolling update", Ordered, func() { When("creating a firewall deployment", Ordered, func() { It("the creation works", func() { @@ -1960,4 +1913,73 @@ var _ = Context("integration test", Ordered, func() { }) + When("creating a firewall set that simulates unhealthiness", Ordered, func() { + var firewallSet *v2.FirewallSet + + BeforeAll(func() { + swapMetalClient(&metalclient.MetalMockFns{ + Firewall: func(m *mock.Mock) { + m.On("AllocateFirewall", mock.Anything, nil).Return(&metalfirewall.AllocateFirewallOK{Payload: firewall3}, nil).Maybe() + m.On("FindFirewall", mock.Anything, nil).Return(&metalfirewall.FindFirewallOK{Payload: firewall3}, nil).Maybe() + m.On("FindFirewalls", mock.Anything, nil).Return(&metalfirewall.FindFirewallsOK{Payload: []*models.V1FirewallResponse{firewall3}}, nil).Maybe() + }, + Network: func(m *mock.Mock) { + m.On("FindNetwork", mock.Anything, nil).Return(&network.FindNetworkOK{Payload: network1}, nil).Maybe() + }, + Machine: func(m *mock.Mock) { + m.On("UpdateMachine", mock.Anything, nil).Return(&machine.UpdateMachineOK{Payload: &models.V1MachineResponse{}}, nil).Maybe() + m.On("FreeMachine", mock.Anything, nil).Return(&machine.FreeMachineOK{Payload: &models.V1MachineResponse{ID: firewall3.ID}}, nil).Maybe() + }, + Image: func(m *mock.Mock) { + m.On("FindLatestImage", mock.Anything, nil).Return(&image.FindLatestImageOK{Payload: image1}, nil).Maybe() + }, + }) + + Expect(k8sClient.Create(ctx, deployment())).To(Succeed()) + Eventually(func() error { + firewallSetList := &v2.FirewallSetList{} + err := k8sClient.List(ctx, firewallSetList, client.InNamespace(namespaceName)) + if err != nil { + return err + } + if len(firewallSetList.Items) == 0 { + return fmt.Errorf("no firewall sets found") + } + firewallSet = &firewallSetList.Items[0] + return nil + }, 15*time.Second, interval).Should(Succeed(), "FirewallSet should be created") + }) + + It("should simulate unhealthiness and trigger deletion", func() { + firewallList := &v2.FirewallList{} + Eventually(func() int { + + err := k8sClient.List(ctx, firewallList, client.InNamespace(firewallSet.Namespace)) + if err != nil { + return 0 + } + return len(firewallList.Items) + }, 15*time.Second, interval).Should(BeNumerically(">", 0), "Should have at least one firewall") + + By("waiting for the firewall to be deleted") + Eventually(func() bool { + for _, fw := range firewallList.Items { + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(&fw), &v2.Firewall{}) + if !apierrors.IsNotFound(err) { + return false + } + } + return true + }, 10*time.Second, interval).Should(BeTrue(), "All Firewalls should be deleted") + + By("verifying that a new firewall has been created") + Eventually(func() int { + newFirewallList := &v2.FirewallList{} + Expect(k8sClient.List(ctx, newFirewallList, client.InNamespace(firewallSet.Namespace))).To(Succeed()) + return len(newFirewallList.Items) + }, 10*time.Second, interval).Should(Equal(1), "A new firewall should be created") + }) + + }) + }) diff --git a/integration/metal_resources_test.go b/integration/metal_resources_test.go index ef87655..00b2c86 100644 --- a/integration/metal_resources_test.go +++ b/integration/metal_resources_test.go @@ -311,6 +311,92 @@ var ( Vrf: 50, Vrfshared: true, } + firewall3 = &models.V1FirewallResponse{ + Allocation: &models.V1MachineAllocation{ + BootInfo: &models.V1BootInfo{ + Bootloaderid: pointer.Pointer("bootloaderid"), + Cmdline: pointer.Pointer("cmdline"), + ImageID: pointer.Pointer("imageid"), + Initrd: pointer.Pointer("initrd"), + Kernel: pointer.Pointer("kernel"), + OsPartition: pointer.Pointer("ospartition"), + PrimaryDisk: pointer.Pointer("primarydisk"), + }, + Created: pointer.Pointer(strfmt.DateTime(testTime.Add(-20 * 24 * time.Hour))), + Creator: pointer.Pointer("creator"), + Description: "firewall allocation 3", + Filesystemlayout: fsl1, + Hostname: pointer.Pointer("firewall-hostname-3"), + Image: image1, + Name: pointer.Pointer("firewall-3"), + Networks: []*models.V1MachineNetwork{ + { + Asn: pointer.Pointer(int64(200)), + Destinationprefixes: []string{"2.2.2.2"}, + Ips: []string{"1.1.1.1"}, + Nat: pointer.Pointer(false), + Networkid: pointer.Pointer("private"), + Networktype: pointer.Pointer(net.PrivatePrimaryUnshared), + Prefixes: []string{"prefixes"}, + Private: pointer.Pointer(true), + Underlay: pointer.Pointer(false), + Vrf: pointer.Pointer(int64(100)), + }, + }, + Project: pointer.Pointer("project-1"), + Reinstall: pointer.Pointer(false), + Role: pointer.Pointer(models.V1MachineAllocationRoleFirewall), + SSHPubKeys: []string{"sshpubkey"}, + Succeeded: pointer.Pointer(true), + UserData: "---userdata---", + }, + Bios: &models.V1MachineBIOS{ + Date: pointer.Pointer("biosdata"), + Vendor: pointer.Pointer("biosvendor"), + Version: pointer.Pointer("biosversion"), + }, + Description: "firewall 1", + Events: &models.V1MachineRecentProvisioningEvents{ + CrashLoop: pointer.Pointer(true), + FailedMachineReclaim: pointer.Pointer(true), + LastErrorEvent: &models.V1MachineProvisioningEvent{ + Event: pointer.Pointer("Crashed"), + Message: "crash", + Time: strfmt.DateTime(testTime.Add(-10 * 24 * time.Hour)), + }, + LastEventTime: strfmt.DateTime(testTime.Add(-7 * 24 * time.Hour)), + Log: []*models.V1MachineProvisioningEvent{ + { + Event: pointer.Pointer("Phoned Home"), + Message: "phoning home", + Time: strfmt.DateTime(testTime.Add(-7 * 24 * time.Hour)), + }, + }, + }, + Hardware: &models.V1MachineHardware{ + CPUCores: pointer.Pointer(int32(16)), + Disks: []*models.V1MachineBlockDevice{}, + Memory: pointer.Pointer(int64(32)), + Nics: []*models.V1MachineNic{}, + }, + ID: pointer.Pointer("3"), + Ledstate: &models.V1ChassisIdentifyLEDState{ + Description: pointer.Pointer(""), + Value: pointer.Pointer(""), + }, + Liveliness: pointer.Pointer("Unhealthy"), + Name: "firewall-3", + Partition: partition1, + Rackid: "rack-1", + Size: size1, + State: &models.V1MachineState{ + Description: pointer.Pointer("state"), + Issuer: "issuer", + MetalHammerVersion: pointer.Pointer("version"), + Value: pointer.Pointer(""), + }, + Tags: []string{"a"}, + } ) // we are sharing a client for the tests, so we need to make sure we do not run contradicting tests in parallel diff --git a/integration/suite_test.go b/integration/suite_test.go index 608e49f..e3834a9 100644 --- a/integration/suite_test.go +++ b/integration/suite_test.go @@ -130,7 +130,7 @@ var _ = BeforeSuite(func() { ClusterTag: fmt.Sprintf("%s=%s", tag.ClusterID, "cluster-a"), SafetyBackoff: 10 * time.Second, ProgressDeadline: 10 * time.Minute, - FirewallHealthTimeout: 20 * time.Minute, + FirewallHealthTimeout: 19 * 24 * time.Hour, CreateTimeout: 10 * time.Minute, }) Expect(err).ToNot(HaveOccurred()) From 9605a18a1b032500201974274e49678158b622d0 Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Mon, 4 Nov 2024 15:31:00 +0100 Subject: [PATCH 06/28] refactor --- controllers/set/delete.go | 31 +++------------------------ controllers/set/status.go | 45 +++++++++++++++++++++++---------------- 2 files changed, 30 insertions(+), 46 deletions(-) diff --git a/controllers/set/delete.go b/controllers/set/delete.go index 4988449..f0129fe 100644 --- a/controllers/set/delete.go +++ b/controllers/set/delete.go @@ -45,15 +45,10 @@ func (c *controller) deleteFirewalls(r *controllers.Ctx[*v2.FirewallSet], fws .. return nil } - func (c *controller) deleteIfUnhealthyOrTimeout(r *controllers.Ctx[*v2.FirewallSet], fws ...*v2.Firewall) ([]*v2.Firewall, error) { var result []*v2.Firewall for _, fw := range fws { - fw := fw - - connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue - if c.isFirewallUnhealthy(fw) { r.Log.Info("unhealthy firewall not recovering, deleting from set", "firewall-name", fw.Name) err := c.deleteFirewalls(r, fw) @@ -67,40 +62,20 @@ func (c *controller) deleteIfUnhealthyOrTimeout(r *controllers.Ctx[*v2.FirewallS if fw.Status.Phase != v2.FirewallPhaseCreating { continue } - + connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue if !connected && time.Since(fw.CreationTimestamp.Time) > c.c.GetCreateTimeout() { r.Log.Info("firewall not getting ready, deleting from set", "firewall-name", fw.Name) - err := c.deleteFirewalls(r, fw) if err != nil { return nil, err } - result = append(result, fw) - } } - return result, nil } func (c *controller) isFirewallUnhealthy(fw *v2.Firewall) bool { - - var ( - created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue - ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue - connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue - seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue - distance = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue - ) - - if created && ready && connected && seedConnected && distance { - return false - } - - if created && time.Since(pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time) > c.c.GetFirewallHealthTimeout() { - return true - } - - return false + statusReport := evaluateFirewallConditions(fw, c.c.GetFirewallHealthTimeout()) + return statusReport.IsUnhealthy } diff --git a/controllers/set/status.go b/controllers/set/status.go index a71b8d2..c74aae2 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -8,35 +8,44 @@ import ( "github.com/metal-stack/metal-lib/pkg/pointer" ) +type FirewallConditionStatus struct { + IsReady, IsProgressing, IsUnhealthy bool +} + +func evaluateFirewallConditions(fw *v2.Firewall, healthTimeout time.Duration) FirewallConditionStatus { + created := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue + ready := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue + connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue + seedConnected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue + distance := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue + + allConditionsMet := created && ready && connected && seedConnected && distance + allocationTimeExceeded := created && time.Since(pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time) < healthTimeout + unhealthyTimeExceeded := created && time.Since(pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time) > healthTimeout + + return FirewallConditionStatus{ + IsReady: allConditionsMet, + IsProgressing: created && allocationTimeExceeded, + IsUnhealthy: !allConditionsMet || unhealthyTimeExceeded, + } +} + func (c *controller) setStatus(r *controllers.Ctx[*v2.FirewallSet], ownedFirewalls []*v2.Firewall) error { r.Target.Status.TargetReplicas = r.Target.Spec.Replicas - r.Target.Status.ReadyReplicas = 0 r.Target.Status.ProgressingReplicas = 0 r.Target.Status.UnhealthyReplicas = 0 for _, fw := range ownedFirewalls { - var ( - fw = fw - - created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue - ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue - connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue - seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue - distance = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue - ) + statusReport := evaluateFirewallConditions(fw, c.c.GetFirewallHealthTimeout()) - if created && ready && connected && seedConnected && distance { + if statusReport.IsReady { r.Target.Status.ReadyReplicas++ - continue - } - - if created && time.Since(pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time) < c.c.GetFirewallHealthTimeout() { + } else if statusReport.IsProgressing { r.Target.Status.ProgressingReplicas++ - continue + } else { + r.Target.Status.UnhealthyReplicas++ } - - r.Target.Status.UnhealthyReplicas++ } revision, err := controllers.Revision(r.Target) From c6b57588e619305e31255184e1b29473fb497177 Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Mon, 4 Nov 2024 15:59:58 +0100 Subject: [PATCH 07/28] Fix Refactoring --- controllers/set/delete.go | 12 +++++++++++- controllers/set/status.go | 13 +++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/controllers/set/delete.go b/controllers/set/delete.go index f0129fe..36918e4 100644 --- a/controllers/set/delete.go +++ b/controllers/set/delete.go @@ -76,6 +76,16 @@ func (c *controller) deleteIfUnhealthyOrTimeout(r *controllers.Ctx[*v2.FirewallS } func (c *controller) isFirewallUnhealthy(fw *v2.Firewall) bool { + statusReport := evaluateFirewallConditions(fw, c.c.GetFirewallHealthTimeout()) - return statusReport.IsUnhealthy + + if statusReport.IsReady { + return false + } + + if statusReport.IsUnhealthy { + return true + } + + return false } diff --git a/controllers/set/status.go b/controllers/set/status.go index c74aae2..d2076fe 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -25,8 +25,8 @@ func evaluateFirewallConditions(fw *v2.Firewall, healthTimeout time.Duration) Fi return FirewallConditionStatus{ IsReady: allConditionsMet, - IsProgressing: created && allocationTimeExceeded, - IsUnhealthy: !allConditionsMet || unhealthyTimeExceeded, + IsProgressing: allocationTimeExceeded, + IsUnhealthy: unhealthyTimeExceeded, } } @@ -38,14 +38,15 @@ func (c *controller) setStatus(r *controllers.Ctx[*v2.FirewallSet], ownedFirewal for _, fw := range ownedFirewalls { statusReport := evaluateFirewallConditions(fw, c.c.GetFirewallHealthTimeout()) - if statusReport.IsReady { r.Target.Status.ReadyReplicas++ - } else if statusReport.IsProgressing { + continue + } + if statusReport.IsProgressing { r.Target.Status.ProgressingReplicas++ - } else { - r.Target.Status.UnhealthyReplicas++ + continue } + r.Target.Status.UnhealthyReplicas++ } revision, err := controllers.Revision(r.Target) From 2fa826d6bd4b918ac82fd056ee1bdabeef0c76c0 Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Thu, 7 Nov 2024 12:18:52 +0100 Subject: [PATCH 08/28] Finish refactor --- controllers/set/delete.go | 38 +++++------------------- controllers/set/status.go | 61 ++++++++++++++++++++++++++------------- 2 files changed, 48 insertions(+), 51 deletions(-) diff --git a/controllers/set/delete.go b/controllers/set/delete.go index 36918e4..31dc198 100644 --- a/controllers/set/delete.go +++ b/controllers/set/delete.go @@ -6,7 +6,6 @@ import ( v2 "github.com/metal-stack/firewall-controller-manager/api/v2" "github.com/metal-stack/firewall-controller-manager/controllers" - "github.com/metal-stack/metal-lib/pkg/pointer" ) func (c *controller) Delete(r *controllers.Ctx[*v2.FirewallSet]) error { @@ -49,43 +48,20 @@ func (c *controller) deleteIfUnhealthyOrTimeout(r *controllers.Ctx[*v2.FirewallS var result []*v2.Firewall for _, fw := range fws { - if c.isFirewallUnhealthy(fw) { - r.Log.Info("unhealthy firewall not recovering, deleting from set", "firewall-name", fw.Name) - err := c.deleteFirewalls(r, fw) - if err != nil { - return nil, err - } - result = append(result, fw) - continue - } + status := c.evaluateFirewallConditions(fw) + + switch { + case status.CreateTimeout || status.HealthTimeout: + r.Log.Info("firewall health or creation timeout exceeded, deleting from set", "firewall-name", fw.Name) - if fw.Status.Phase != v2.FirewallPhaseCreating { - continue - } - connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue - if !connected && time.Since(fw.CreationTimestamp.Time) > c.c.GetCreateTimeout() { - r.Log.Info("firewall not getting ready, deleting from set", "firewall-name", fw.Name) err := c.deleteFirewalls(r, fw) if err != nil { return nil, err } + result = append(result, fw) } - } - return result, nil -} - -func (c *controller) isFirewallUnhealthy(fw *v2.Firewall) bool { - statusReport := evaluateFirewallConditions(fw, c.c.GetFirewallHealthTimeout()) - - if statusReport.IsReady { - return false - } - - if statusReport.IsUnhealthy { - return true } - - return false + return result, nil } diff --git a/controllers/set/status.go b/controllers/set/status.go index d2076fe..e46a810 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -8,25 +8,44 @@ import ( "github.com/metal-stack/metal-lib/pkg/pointer" ) -type FirewallConditionStatus struct { - IsReady, IsProgressing, IsUnhealthy bool +type firewallConditionStatus struct { + IsReady bool + CreateTimeout bool + HealthTimeout bool } -func evaluateFirewallConditions(fw *v2.Firewall, healthTimeout time.Duration) FirewallConditionStatus { - created := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue - ready := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue - connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue - seedConnected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue - distance := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue +func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditionStatus { + unhealthyTimeout := c.c.GetFirewallHealthTimeout() + allocationTimeout := c.c.GetCreateTimeout() - allConditionsMet := created && ready && connected && seedConnected && distance - allocationTimeExceeded := created && time.Since(pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time) < healthTimeout - unhealthyTimeExceeded := created && time.Since(pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time) > healthTimeout + var ( + created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue + ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue + connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue + seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue + distanceConfigured = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue + allConditionsMet = created && ready && connected && seedConnected && distanceConfigured + createTimeoutExceeded bool + healthTimeoutExceeded bool + ) - return FirewallConditionStatus{ + allocationTimestamp := pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time + timeSinceAllocation := time.Since(allocationTimestamp) + + if created && timeSinceAllocation > allocationTimeout { + createTimeoutExceeded = true + return firewallConditionStatus{CreateTimeout: true} + } + + if created && timeSinceAllocation > unhealthyTimeout { + healthTimeoutExceeded = true + return firewallConditionStatus{HealthTimeout: true} + } + + return firewallConditionStatus{ IsReady: allConditionsMet, - IsProgressing: allocationTimeExceeded, - IsUnhealthy: unhealthyTimeExceeded, + CreateTimeout: createTimeoutExceeded, + HealthTimeout: healthTimeoutExceeded, } } @@ -37,16 +56,18 @@ func (c *controller) setStatus(r *controllers.Ctx[*v2.FirewallSet], ownedFirewal r.Target.Status.UnhealthyReplicas = 0 for _, fw := range ownedFirewalls { - statusReport := evaluateFirewallConditions(fw, c.c.GetFirewallHealthTimeout()) - if statusReport.IsReady { + statusReport := c.evaluateFirewallConditions(fw) + + switch { + case statusReport.IsReady: r.Target.Status.ReadyReplicas++ continue - } - if statusReport.IsProgressing { - r.Target.Status.ProgressingReplicas++ + case statusReport.CreateTimeout || statusReport.HealthTimeout: + r.Target.Status.UnhealthyReplicas++ continue } - r.Target.Status.UnhealthyReplicas++ + + r.Target.Status.ProgressingReplicas++ } revision, err := controllers.Revision(r.Target) From 47f40299d677d815b9d068305bedc16ef1481f26 Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Thu, 7 Nov 2024 12:50:16 +0100 Subject: [PATCH 09/28] Updated allocation timeout to longer than created timeout --- controllers/set/status.go | 24 ++++++++++++------------ integration/suite_test.go | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index e46a810..61b3686 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -19,33 +19,33 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi allocationTimeout := c.c.GetCreateTimeout() var ( - created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue - ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue - connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue - seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue - distanceConfigured = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue - allConditionsMet = created && ready && connected && seedConnected && distanceConfigured - createTimeoutExceeded bool - healthTimeoutExceeded bool + created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue + ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue + connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue + seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue + distanceConfigured = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue + allConditionsMet = created && ready && connected && seedConnected && distanceConfigured ) allocationTimestamp := pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time timeSinceAllocation := time.Since(allocationTimestamp) + if allConditionsMet { + return firewallConditionStatus{IsReady: true} + } + if created && timeSinceAllocation > allocationTimeout { - createTimeoutExceeded = true return firewallConditionStatus{CreateTimeout: true} } if created && timeSinceAllocation > unhealthyTimeout { - healthTimeoutExceeded = true return firewallConditionStatus{HealthTimeout: true} } return firewallConditionStatus{ IsReady: allConditionsMet, - CreateTimeout: createTimeoutExceeded, - HealthTimeout: healthTimeoutExceeded, + CreateTimeout: false, + HealthTimeout: false, } } diff --git a/integration/suite_test.go b/integration/suite_test.go index e3834a9..8683098 100644 --- a/integration/suite_test.go +++ b/integration/suite_test.go @@ -131,7 +131,7 @@ var _ = BeforeSuite(func() { SafetyBackoff: 10 * time.Second, ProgressDeadline: 10 * time.Minute, FirewallHealthTimeout: 19 * 24 * time.Hour, - CreateTimeout: 10 * time.Minute, + CreateTimeout: 19 * 24 * time.Hour, }) Expect(err).ToNot(HaveOccurred()) From 21d648cb66bbef50cee5ab0d68371b3bb9e469fc Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Thu, 7 Nov 2024 13:18:08 +0100 Subject: [PATCH 10/28] Check if firewall is creating before setting allocation timeout --- controllers/set/status.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index 61b3686..b02e11c 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -35,7 +35,12 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi } if created && timeSinceAllocation > allocationTimeout { - return firewallConditionStatus{CreateTimeout: true} + + // If the firewall is still creating, don't set a timeout + if fw.Status.Phase != v2.FirewallPhaseCreating { + return firewallConditionStatus{CreateTimeout: true} + } + } if created && timeSinceAllocation > unhealthyTimeout { From 4d9affd6403debf7b999a5f0b8e7ed3aea03c38e Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Thu, 7 Nov 2024 14:20:01 +0100 Subject: [PATCH 11/28] Updated with seed --- controllers/set/status.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index b02e11c..ad3aa83 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -27,7 +27,7 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi allConditionsMet = created && ready && connected && seedConnected && distanceConfigured ) - allocationTimestamp := pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time + allocationTimestamp := pointer.SafeDeref(fw.Status.ControllerStatus).SeedUpdated.Time timeSinceAllocation := time.Since(allocationTimestamp) if allConditionsMet { @@ -43,7 +43,7 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi } - if created && timeSinceAllocation > unhealthyTimeout { + if unhealthyTimeout != 0 && created && timeSinceAllocation > unhealthyTimeout { return firewallConditionStatus{HealthTimeout: true} } From 026254622c37f4984475f9b46f95ada5cea95666 Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Fri, 8 Nov 2024 10:32:23 +0100 Subject: [PATCH 12/28] update integration test --- controllers/set/status.go | 19 +++++++++--------- integration/integration_test.go | 34 ++++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index ad3aa83..3ed9291 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -27,26 +27,25 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi allConditionsMet = created && ready && connected && seedConnected && distanceConfigured ) - allocationTimestamp := pointer.SafeDeref(fw.Status.ControllerStatus).SeedUpdated.Time - timeSinceAllocation := time.Since(allocationTimestamp) + seedUpdatedTime := pointer.SafeDeref(fw.Status.ControllerStatus).SeedUpdated.Time + timeSinceReconcile := time.Since(seedUpdatedTime) if allConditionsMet { return firewallConditionStatus{IsReady: true} } - if created && timeSinceAllocation > allocationTimeout { - - // If the firewall is still creating, don't set a timeout - if fw.Status.Phase != v2.FirewallPhaseCreating { - return firewallConditionStatus{CreateTimeout: true} - } - + // duration after which a firewall in the creation phase will be recreated, exceeded + if fw.Status.Phase == v2.FirewallPhaseCreating && timeSinceReconcile > allocationTimeout { + c.log.Info("create timeout reached") + return firewallConditionStatus{CreateTimeout: true} } - if unhealthyTimeout != 0 && created && timeSinceAllocation > unhealthyTimeout { + if seedConnected && unhealthyTimeout != 0 && created && timeSinceReconcile > unhealthyTimeout { + c.log.Info("unhealthy timeout reached") return firewallConditionStatus{HealthTimeout: true} } + //if everything returns false, it is progressing return firewallConditionStatus{ IsReady: allConditionsMet, CreateTimeout: false, diff --git a/integration/integration_test.go b/integration/integration_test.go index 7d48b99..28928a5 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -15,6 +15,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" testcommon "github.com/metal-stack/firewall-controller-manager/integration/common" @@ -1936,6 +1937,7 @@ var _ = Context("integration test", Ordered, func() { }) Expect(k8sClient.Create(ctx, deployment())).To(Succeed()) + Eventually(func() error { firewallSetList := &v2.FirewallSetList{} err := k8sClient.List(ctx, firewallSetList, client.InNamespace(namespaceName)) @@ -1953,7 +1955,6 @@ var _ = Context("integration test", Ordered, func() { It("should simulate unhealthiness and trigger deletion", func() { firewallList := &v2.FirewallList{} Eventually(func() int { - err := k8sClient.List(ctx, firewallList, client.InNamespace(firewallSet.Namespace)) if err != nil { return 0 @@ -1961,13 +1962,36 @@ var _ = Context("integration test", Ordered, func() { return len(firewallList.Items) }, 15*time.Second, interval).Should(BeNumerically(">", 0), "Should have at least one firewall") - By("waiting for the firewall to be deleted") Eventually(func() bool { - for _, fw := range firewallList.Items { - err := k8sClient.Get(ctx, client.ObjectKeyFromObject(&fw), &v2.Firewall{}) - if !apierrors.IsNotFound(err) { + for _, item := range firewallList.Items { + var fw v2.Firewall + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(&item), &fw) + if err != nil { + fmt.Printf("Failed to get firewall: %v\n", err) return false } + + if fw.Status.ControllerStatus == nil { + fw.Status.ControllerStatus = &v2.ControllerConnection{} + } + //add a fake concile so the unhealty firewall gets deleted + fw.Status.ControllerStatus.SeedUpdated.Time = time.Now().Add(-20 * 24 * time.Hour) + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(&fw), &fw); err != nil { + return err + } + if fw.Status.ControllerStatus == nil { + fw.Status.ControllerStatus = &v2.ControllerConnection{} + } + fw.Status.ControllerStatus.SeedUpdated.Time = time.Now().Add(-20 * 24 * time.Hour) + return k8sClient.Status().Update(ctx, &fw) + }) + + if err != nil { + fmt.Printf("Failed to update firewall status: %v\n", err) + return false + } + } return true }, 10*time.Second, interval).Should(BeTrue(), "All Firewalls should be deleted") From fe0994c2b2f25d01dce81b7c4f277d6d8be5c6b9 Mon Sep 17 00:00:00 2001 From: Honigeintopf Date: Fri, 8 Nov 2024 10:41:36 +0100 Subject: [PATCH 13/28] Adjust test to not use retry on conflict --- integration/integration_test.go | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/integration/integration_test.go b/integration/integration_test.go index 28928a5..cc46940 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -15,7 +15,6 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/util/retry" testcommon "github.com/metal-stack/firewall-controller-manager/integration/common" @@ -1962,13 +1961,13 @@ var _ = Context("integration test", Ordered, func() { return len(firewallList.Items) }, 15*time.Second, interval).Should(BeNumerically(">", 0), "Should have at least one firewall") - Eventually(func() bool { + Eventually(func() error { for _, item := range firewallList.Items { var fw v2.Firewall err := k8sClient.Get(ctx, client.ObjectKeyFromObject(&item), &fw) if err != nil { fmt.Printf("Failed to get firewall: %v\n", err) - return false + return err } if fw.Status.ControllerStatus == nil { @@ -1976,25 +1975,22 @@ var _ = Context("integration test", Ordered, func() { } //add a fake concile so the unhealty firewall gets deleted fw.Status.ControllerStatus.SeedUpdated.Time = time.Now().Add(-20 * 24 * time.Hour) - err = retry.RetryOnConflict(retry.DefaultRetry, func() error { - if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(&fw), &fw); err != nil { - return err - } - if fw.Status.ControllerStatus == nil { - fw.Status.ControllerStatus = &v2.ControllerConnection{} - } - fw.Status.ControllerStatus.SeedUpdated.Time = time.Now().Add(-20 * 24 * time.Hour) - return k8sClient.Status().Update(ctx, &fw) - }) - + if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(&fw), &fw); err != nil { + return err + } + if fw.Status.ControllerStatus == nil { + fw.Status.ControllerStatus = &v2.ControllerConnection{} + } + fw.Status.ControllerStatus.SeedUpdated.Time = time.Now().Add(-20 * 24 * time.Hour) + err = k8sClient.Status().Update(ctx, &fw) if err != nil { fmt.Printf("Failed to update firewall status: %v\n", err) - return false + return err } - } - return true - }, 10*time.Second, interval).Should(BeTrue(), "All Firewalls should be deleted") + + return nil + }, 10*time.Second, interval).Should(Succeed(), "All Firewalls should be deleted") By("verifying that a new firewall has been created") Eventually(func() int { From aec103335334abcca34c4b2b02d4e478e895a1e2 Mon Sep 17 00:00:00 2001 From: Ebubekir Ates <109050136+Honigeintopf@users.noreply.github.com> Date: Thu, 22 Jan 2026 09:32:40 +0100 Subject: [PATCH 14/28] Update integration/integration_test.go Co-authored-by: Stefan Majer --- integration/integration_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/integration_test.go b/integration/integration_test.go index 3f5be90..e2eab81 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -1984,7 +1984,7 @@ var _ = Context("integration test", Ordered, func() { fw.Status.ControllerStatus.SeedUpdated.Time = time.Now().Add(-20 * 24 * time.Hour) err = k8sClient.Status().Update(ctx, &fw) if err != nil { - fmt.Printf("Failed to update firewall status: %v\n", err) + return fmt.Errorf("Failed to update firewall status: %v\n", err) return err } } From 15bdf7b417da7fc250027dfe279fd8b27253c32e Mon Sep 17 00:00:00 2001 From: honigeintopf Date: Thu, 22 Jan 2026 12:16:46 +0100 Subject: [PATCH 15/28] check for allocation timeout set --- controllers/set/status.go | 10 ++++++---- integration/integration_test.go | 12 ++---------- integration/suite_test.go | 8 +++++--- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index 3ed9291..632519f 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -29,17 +29,19 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi seedUpdatedTime := pointer.SafeDeref(fw.Status.ControllerStatus).SeedUpdated.Time timeSinceReconcile := time.Since(seedUpdatedTime) + allocationTime := pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time if allConditionsMet { return firewallConditionStatus{IsReady: true} } // duration after which a firewall in the creation phase will be recreated, exceeded - if fw.Status.Phase == v2.FirewallPhaseCreating && timeSinceReconcile > allocationTimeout { - c.log.Info("create timeout reached") - return firewallConditionStatus{CreateTimeout: true} + if allocationTimeout != 0 && fw.Status.Phase == v2.FirewallPhaseCreating && !allocationTime.IsZero() { + if time.Since(allocationTime) > allocationTimeout { + c.log.Info("create timeout reached") + return firewallConditionStatus{CreateTimeout: true} + } } - if seedConnected && unhealthyTimeout != 0 && created && timeSinceReconcile > unhealthyTimeout { c.log.Info("unhealthy timeout reached") return firewallConditionStatus{HealthTimeout: true} diff --git a/integration/integration_test.go b/integration/integration_test.go index e2eab81..6bd568b 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -1974,18 +1974,10 @@ var _ = Context("integration test", Ordered, func() { fw.Status.ControllerStatus = &v2.ControllerConnection{} } //add a fake concile so the unhealty firewall gets deleted - fw.Status.ControllerStatus.SeedUpdated.Time = time.Now().Add(-20 * 24 * time.Hour) - if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(&fw), &fw); err != nil { - return err - } - if fw.Status.ControllerStatus == nil { - fw.Status.ControllerStatus = &v2.ControllerConnection{} - } - fw.Status.ControllerStatus.SeedUpdated.Time = time.Now().Add(-20 * 24 * time.Hour) + fw.Status.ControllerStatus.SeedUpdated.Time = time.Now().Add(-(firewallHealthTimeout + time.Minute)) err = k8sClient.Status().Update(ctx, &fw) if err != nil { - return fmt.Errorf("Failed to update firewall status: %v\n", err) - return err + return fmt.Errorf("failed to update firewall status: %w", err) } } diff --git a/integration/suite_test.go b/integration/suite_test.go index 8683098..1eaea8b 100644 --- a/integration/suite_test.go +++ b/integration/suite_test.go @@ -32,7 +32,9 @@ import ( ) const ( - namespaceName = "test" + namespaceName = "test" + firewallHealthTimeout = 19 * 24 * time.Hour + firewallCreateTimeout = 19 * 24 * time.Hour ) var ( @@ -130,8 +132,8 @@ var _ = BeforeSuite(func() { ClusterTag: fmt.Sprintf("%s=%s", tag.ClusterID, "cluster-a"), SafetyBackoff: 10 * time.Second, ProgressDeadline: 10 * time.Minute, - FirewallHealthTimeout: 19 * 24 * time.Hour, - CreateTimeout: 19 * 24 * time.Hour, + FirewallHealthTimeout: firewallHealthTimeout, + CreateTimeout: firewallCreateTimeout, }) Expect(err).ToNot(HaveOccurred()) From 8a4f4dc66ce10ef32709e824b9c5722bb8a7969f Mon Sep 17 00:00:00 2001 From: Ebubekir Ates <109050136+Honigeintopf@users.noreply.github.com> Date: Fri, 23 Jan 2026 12:37:12 +0100 Subject: [PATCH 16/28] Update controllers/set/status.go Co-authored-by: Gerrit --- controllers/set/status.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index 632519f..40fdf1e 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -43,7 +43,7 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi } } if seedConnected && unhealthyTimeout != 0 && created && timeSinceReconcile > unhealthyTimeout { - c.log.Info("unhealthy timeout reached") + c.log.Info("health timeout exceeded", "firewall-name", fw.Name, "last-reconciled-at", seedUpdatedTime.String(), "timeout-after", unhealthyTimeout.String()) return firewallConditionStatus{HealthTimeout: true} } From 31b364e20a9b0fe23dfe8d76b47267baaa335a4b Mon Sep 17 00:00:00 2001 From: Ebubekir Ates <109050136+Honigeintopf@users.noreply.github.com> Date: Fri, 23 Jan 2026 12:37:18 +0100 Subject: [PATCH 17/28] Update controllers/set/status.go Co-authored-by: Gerrit --- controllers/set/status.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index 40fdf1e..945a9cd 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -15,21 +15,21 @@ type firewallConditionStatus struct { } func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditionStatus { - unhealthyTimeout := c.c.GetFirewallHealthTimeout() - allocationTimeout := c.c.GetCreateTimeout() - var ( + unhealthyTimeout = c.c.GetFirewallHealthTimeout() + allocationTimeout = c.c.GetCreateTimeout() + created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue distanceConfigured = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue allConditionsMet = created && ready && connected && seedConnected && distanceConfigured - ) - seedUpdatedTime := pointer.SafeDeref(fw.Status.ControllerStatus).SeedUpdated.Time - timeSinceReconcile := time.Since(seedUpdatedTime) - allocationTime := pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time + seedUpdatedTime = pointer.SafeDeref(fw.Status.ControllerStatus).SeedUpdated.Time + timeSinceReconcile = time.Since(seedUpdatedTime) + allocationTime = pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time + ) if allConditionsMet { return firewallConditionStatus{IsReady: true} From 6e4d69c1763f4361b2066556be6f13c2f4a18d29 Mon Sep 17 00:00:00 2001 From: Ebubekir Ates <109050136+Honigeintopf@users.noreply.github.com> Date: Fri, 23 Jan 2026 15:09:32 +0100 Subject: [PATCH 18/28] Update controllers/set/status.go Co-authored-by: Gerrit --- controllers/set/status.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index 945a9cd..ab3d5df 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -38,7 +38,7 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi // duration after which a firewall in the creation phase will be recreated, exceeded if allocationTimeout != 0 && fw.Status.Phase == v2.FirewallPhaseCreating && !allocationTime.IsZero() { if time.Since(allocationTime) > allocationTimeout { - c.log.Info("create timeout reached") + c.log.Info("create timeout exceeded", "firewall-name", fw.Name, "allocated-at", allocationTime.String(), "timeout-after", allocationTimeout.String()) return firewallConditionStatus{CreateTimeout: true} } } From 3d9264492f0dd6586bd7fce6dffa129f5cb644fd Mon Sep 17 00:00:00 2001 From: Ebubekir Ates <109050136+Honigeintopf@users.noreply.github.com> Date: Tue, 27 Jan 2026 11:23:29 +0100 Subject: [PATCH 19/28] Apply suggestions from code review Co-authored-by: Gerrit --- controllers/set/status.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index ab3d5df..f0c8a2d 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -42,7 +42,7 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi return firewallConditionStatus{CreateTimeout: true} } } - if seedConnected && unhealthyTimeout != 0 && created && timeSinceReconcile > unhealthyTimeout { + if (!ready || !seedConnected || !connected) && unhealthyTimeout != 0 && created && timeSinceReconcile > unhealthyTimeout { c.log.Info("health timeout exceeded", "firewall-name", fw.Name, "last-reconciled-at", seedUpdatedTime.String(), "timeout-after", unhealthyTimeout.String()) return firewallConditionStatus{HealthTimeout: true} } From 1323e3fccef83f75ef28dde07c510a50985f956f Mon Sep 17 00:00:00 2001 From: honigeintopf Date: Wed, 4 Feb 2026 10:18:51 +0100 Subject: [PATCH 20/28] set seed reconcile time --- integration/integration_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/integration/integration_test.go b/integration/integration_test.go index 6bd568b..3a7f9b3 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -176,6 +176,7 @@ var _ = Context("integration test", Ordered, func() { }, Machine: func(m *mock.Mock) { m.On("UpdateMachine", mock.Anything, nil).Return(&machine.UpdateMachineOK{Payload: &models.V1MachineResponse{}}, nil).Maybe() + m.On("FreeMachine", mock.Anything, nil).Return(&machine.FreeMachineOK{Payload: &models.V1MachineResponse{ID: firewall1.ID}}, nil).Maybe() }, Image: func(m *mock.Mock) { m.On("FindLatestImage", mock.Anything, nil).Return(&image.FindLatestImageOK{Payload: image1}, nil).Maybe() @@ -215,6 +216,14 @@ var _ = Context("integration test", Ordered, func() { fw = testcommon.WaitForResourceAmount(k8sClient, ctx, namespaceName, 1, &v2.FirewallList{}, func(l *v2.FirewallList) []*v2.Firewall { return l.GetItems() }, 15*time.Second) + + // Prevent immediate health-timeout in tests by setting a recent seed reconciliation time. + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(fw), fw)).To(Succeed()) + if fw.Status.ControllerStatus == nil { + fw.Status.ControllerStatus = &v2.ControllerConnection{} + } + fw.Status.ControllerStatus.SeedUpdated = metav1.Now() + Expect(k8sClient.Status().Update(ctx, fw)).To(Succeed()) }) It("should create a firewall monitor", func() { From 8472f4e6775c2e1363a7d32b43b4e2687a2a921f Mon Sep 17 00:00:00 2001 From: honigeintopf Date: Wed, 4 Feb 2026 10:45:07 +0100 Subject: [PATCH 21/28] remove annotation of fw to set reconcile connected but never reconciled. --- integration/integration_test.go | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/integration/integration_test.go b/integration/integration_test.go index 3a7f9b3..8709463 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -217,19 +217,33 @@ var _ = Context("integration test", Ordered, func() { return l.GetItems() }, 15*time.Second) - // Prevent immediate health-timeout in tests by setting a recent seed reconciliation time. + // Ensure no-controller-connection is not set so health-timeout checks don't fire immediately. Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(fw), fw)).To(Succeed()) - if fw.Status.ControllerStatus == nil { - fw.Status.ControllerStatus = &v2.ControllerConnection{} - } - fw.Status.ControllerStatus.SeedUpdated = metav1.Now() - Expect(k8sClient.Status().Update(ctx, fw)).To(Succeed()) + Expect(v2.RemoveAnnotation(ctx, k8sClient, fw, v2.FirewallNoControllerConnectionAnnotation)).To(Succeed()) }) It("should create a firewall monitor", func() { - mon = testcommon.WaitForResourceAmount(k8sClient, ctx, namespaceName, 1, &v2.FirewallMonitorList{}, func(l *v2.FirewallMonitorList) []*v2.FirewallMonitor { - return l.GetItems() - }, 15*time.Second) + // Wait for a monitor and immediately fake a controller status update to avoid health-timeout deletion. + Eventually(func() error { + list := &v2.FirewallMonitorList{} + if err := k8sClient.List(ctx, list, client.InNamespace(namespaceName)); err != nil { + return err + } + if len(list.Items) != 1 { + return fmt.Errorf("expected 1 firewall monitor, got %d", len(list.Items)) + } + + mon = list.Items[0].DeepCopy() + now := metav1.Now() + mon.ControllerStatus = &v2.ControllerStatus{ + Updated: now, + SeedUpdated: now, + Distance: v2.FirewallShortestDistance, + DistanceSupported: true, + } + + return k8sClient.Update(ctx, mon) + }, 15*time.Second, interval).Should(Succeed()) }) It("should allow an update of the firewall monitor", func() { From 8cf61d4506de523688d7fb0a0a02eb2357202421 Mon Sep 17 00:00:00 2001 From: honigeintopf Date: Wed, 4 Feb 2026 10:56:28 +0100 Subject: [PATCH 22/28] only apply health timeoput if we actually have a seed connected once --- controllers/set/status.go | 3 ++- integration/integration_test.go | 28 +++------------------------- 2 files changed, 5 insertions(+), 26 deletions(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index f0c8a2d..cc992fc 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -42,7 +42,8 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi return firewallConditionStatus{CreateTimeout: true} } } - if (!ready || !seedConnected || !connected) && unhealthyTimeout != 0 && created && timeSinceReconcile > unhealthyTimeout { + // Only apply health timeout once we have a non-zero seed reconcile timestamp. + if (!ready || !seedConnected || !connected) && unhealthyTimeout != 0 && created && !seedUpdatedTime.IsZero() && timeSinceReconcile > unhealthyTimeout { c.log.Info("health timeout exceeded", "firewall-name", fw.Name, "last-reconciled-at", seedUpdatedTime.String(), "timeout-after", unhealthyTimeout.String()) return firewallConditionStatus{HealthTimeout: true} } diff --git a/integration/integration_test.go b/integration/integration_test.go index 8709463..98fae60 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -216,34 +216,12 @@ var _ = Context("integration test", Ordered, func() { fw = testcommon.WaitForResourceAmount(k8sClient, ctx, namespaceName, 1, &v2.FirewallList{}, func(l *v2.FirewallList) []*v2.Firewall { return l.GetItems() }, 15*time.Second) - - // Ensure no-controller-connection is not set so health-timeout checks don't fire immediately. - Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(fw), fw)).To(Succeed()) - Expect(v2.RemoveAnnotation(ctx, k8sClient, fw, v2.FirewallNoControllerConnectionAnnotation)).To(Succeed()) }) It("should create a firewall monitor", func() { - // Wait for a monitor and immediately fake a controller status update to avoid health-timeout deletion. - Eventually(func() error { - list := &v2.FirewallMonitorList{} - if err := k8sClient.List(ctx, list, client.InNamespace(namespaceName)); err != nil { - return err - } - if len(list.Items) != 1 { - return fmt.Errorf("expected 1 firewall monitor, got %d", len(list.Items)) - } - - mon = list.Items[0].DeepCopy() - now := metav1.Now() - mon.ControllerStatus = &v2.ControllerStatus{ - Updated: now, - SeedUpdated: now, - Distance: v2.FirewallShortestDistance, - DistanceSupported: true, - } - - return k8sClient.Update(ctx, mon) - }, 15*time.Second, interval).Should(Succeed()) + mon = testcommon.WaitForResourceAmount(k8sClient, ctx, namespaceName, 1, &v2.FirewallMonitorList{}, func(l *v2.FirewallMonitorList) []*v2.FirewallMonitor { + return l.GetItems() + }, 15*time.Second) }) It("should allow an update of the firewall monitor", func() { From 851fb1895857568ad3b996835efe744f005a0c09 Mon Sep 17 00:00:00 2001 From: honigeintopf Date: Wed, 4 Feb 2026 11:14:48 +0100 Subject: [PATCH 23/28] allow 0s timeout to disable health timeout --- api/v2/config/controller.go | 4 ++-- controllers/set/delete.go | 4 +++- controllers/set/status.go | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/api/v2/config/controller.go b/api/v2/config/controller.go index f12a185..8a607e3 100644 --- a/api/v2/config/controller.go +++ b/api/v2/config/controller.go @@ -182,10 +182,10 @@ func (c *NewControllerConfig) validate() error { if c.ProgressDeadline <= 0 { return fmt.Errorf("progress deadline must be specified") } - if c.FirewallHealthTimeout <= 0 { + if c.FirewallHealthTimeout < 0 { return fmt.Errorf("firewall health timeout must be specified") } - if c.CreateTimeout <= 0 { + if c.CreateTimeout < 0 { return fmt.Errorf("create timeout must be specified") } diff --git a/controllers/set/delete.go b/controllers/set/delete.go index 31dc198..ffbe2ee 100644 --- a/controllers/set/delete.go +++ b/controllers/set/delete.go @@ -46,12 +46,14 @@ func (c *controller) deleteFirewalls(r *controllers.Ctx[*v2.FirewallSet], fws .. } func (c *controller) deleteIfUnhealthyOrTimeout(r *controllers.Ctx[*v2.FirewallSet], fws ...*v2.Firewall) ([]*v2.Firewall, error) { var result []*v2.Firewall + createTimeout := c.c.GetCreateTimeout() + healthTimeout := c.c.GetFirewallHealthTimeout() for _, fw := range fws { status := c.evaluateFirewallConditions(fw) switch { - case status.CreateTimeout || status.HealthTimeout: + case (createTimeout > 0 && status.CreateTimeout) || (healthTimeout > 0 && status.HealthTimeout): r.Log.Info("firewall health or creation timeout exceeded, deleting from set", "firewall-name", fw.Name) err := c.deleteFirewalls(r, fw) diff --git a/controllers/set/status.go b/controllers/set/status.go index cc992fc..97f5cbd 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -36,14 +36,14 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi } // duration after which a firewall in the creation phase will be recreated, exceeded - if allocationTimeout != 0 && fw.Status.Phase == v2.FirewallPhaseCreating && !allocationTime.IsZero() { + if allocationTimeout > 0 && fw.Status.Phase == v2.FirewallPhaseCreating && !allocationTime.IsZero() { if time.Since(allocationTime) > allocationTimeout { c.log.Info("create timeout exceeded", "firewall-name", fw.Name, "allocated-at", allocationTime.String(), "timeout-after", allocationTimeout.String()) return firewallConditionStatus{CreateTimeout: true} } } // Only apply health timeout once we have a non-zero seed reconcile timestamp. - if (!ready || !seedConnected || !connected) && unhealthyTimeout != 0 && created && !seedUpdatedTime.IsZero() && timeSinceReconcile > unhealthyTimeout { + if (!ready || !seedConnected || !connected) && unhealthyTimeout > 0 && created && !seedUpdatedTime.IsZero() && timeSinceReconcile > unhealthyTimeout { c.log.Info("health timeout exceeded", "firewall-name", fw.Name, "last-reconciled-at", seedUpdatedTime.String(), "timeout-after", unhealthyTimeout.String()) return firewallConditionStatus{HealthTimeout: true} } From f4574a6986e18826e5e40cf34afdad97f94e7f31 Mon Sep 17 00:00:00 2001 From: honigeintopf Date: Mon, 9 Feb 2026 13:48:59 +0100 Subject: [PATCH 24/28] set health timeout if cond not met and fw phase running --- controllers/set/status.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index 97f5cbd..11e4321 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -47,7 +47,10 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi c.log.Info("health timeout exceeded", "firewall-name", fw.Name, "last-reconciled-at", seedUpdatedTime.String(), "timeout-after", unhealthyTimeout.String()) return firewallConditionStatus{HealthTimeout: true} } - + // Firewall was set to ready at one point, but then one of the conditions in the meantime were set to false so the firewall is unhealthy + if !allConditionsMet && fw.Status.Phase == v2.FirewallPhaseRunning { + return firewallConditionStatus{HealthTimeout: true} + } //if everything returns false, it is progressing return firewallConditionStatus{ IsReady: allConditionsMet, @@ -69,6 +72,7 @@ func (c *controller) setStatus(r *controllers.Ctx[*v2.FirewallSet], ownedFirewal case statusReport.IsReady: r.Target.Status.ReadyReplicas++ continue + case statusReport.CreateTimeout || statusReport.HealthTimeout: r.Target.Status.UnhealthyReplicas++ continue From f6afb92ff49a086d607eacc40ea01c843fdc52ee Mon Sep 17 00:00:00 2001 From: honigeintopf Date: Tue, 10 Feb 2026 11:56:04 +0100 Subject: [PATCH 25/28] new condition foir fw --- api/v2/types_firewall.go | 3 +++ controllers/firewall/reconcile.go | 5 +++++ controllers/firewall/status.go | 16 ++++++++++++++++ controllers/set/status.go | 6 ++++-- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/api/v2/types_firewall.go b/api/v2/types_firewall.go index 34b18e8..1df7499 100644 --- a/api/v2/types_firewall.go +++ b/api/v2/types_firewall.go @@ -183,6 +183,9 @@ const ( FirewallMonitorDeployed ConditionType = "MonitorDeployed" // FirewallDistanceConfigured indicates that the firewall-controller has configured the given firewall distance. FirewallDistanceConfigured ConditionType = "Distance" + // FirewallHealthy indicates that all health conditions have been met at least once. + // Once set to true, it stays true and is used to detect condition degradation. + FirewallHealthy ConditionType = "Healthy" ) // ShootAccess contains secret references to construct a shoot client in the firewall-controller to update its firewall monitor. diff --git a/controllers/firewall/reconcile.go b/controllers/firewall/reconcile.go index 89c07bc..0335630 100644 --- a/controllers/firewall/reconcile.go +++ b/controllers/firewall/reconcile.go @@ -30,6 +30,11 @@ func (c *controller) Reconcile(r *controllers.Ctx[*v2.Firewall]) error { } SetFirewallStatusFromMonitor(r.Target, mon) + + if isAllConditionsMet(r.Target) { + cond := v2.NewCondition(v2.FirewallHealthy, v2.ConditionTrue, "Healthy", "All firewall conditions have been met.") + r.Target.Status.Conditions.Set(cond) + } }() fws, err := c.firewallCache.Get(r.Ctx, r.Target) diff --git a/controllers/firewall/status.go b/controllers/firewall/status.go index 07e8698..594b7da 100644 --- a/controllers/firewall/status.go +++ b/controllers/firewall/status.go @@ -192,3 +192,19 @@ func SetFirewallStatusFromMonitor(fw *v2.Firewall, mon *v2.FirewallMonitor) { fw.Status.Conditions.Set(cond) } } + +func isAllConditionsMet(fw *v2.Firewall) bool { + for _, ct := range []v2.ConditionType{ + v2.FirewallCreated, + v2.FirewallReady, + v2.FirewallControllerConnected, + v2.FirewallControllerSeedConnected, + v2.FirewallDistanceConfigured, + } { + cond := fw.Status.Conditions.Get(ct) + if cond == nil || cond.Status != v2.ConditionTrue { + return false + } + } + return true +} diff --git a/controllers/set/status.go b/controllers/set/status.go index 11e4321..e0f0aa2 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -47,8 +47,10 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi c.log.Info("health timeout exceeded", "firewall-name", fw.Name, "last-reconciled-at", seedUpdatedTime.String(), "timeout-after", unhealthyTimeout.String()) return firewallConditionStatus{HealthTimeout: true} } - // Firewall was set to ready at one point, but then one of the conditions in the meantime were set to false so the firewall is unhealthy - if !allConditionsMet && fw.Status.Phase == v2.FirewallPhaseRunning { + // Firewall was healthy at one point (all conditions were met), but then one of the conditions degraded so the firewall is unhealthy + wasHealthy := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallHealthy)).Status == v2.ConditionTrue + if !allConditionsMet && wasHealthy && unhealthyTimeout > 0 { + c.log.Info("firewall conditions degraded", "firewall-name", fw.Name) return firewallConditionStatus{HealthTimeout: true} } //if everything returns false, it is progressing From d6f38a2df7333255aa8b94271b6c873cfc2e98ef Mon Sep 17 00:00:00 2001 From: honigeintopf Date: Tue, 10 Feb 2026 12:32:57 +0100 Subject: [PATCH 26/28] use monitor specific conditions --- controllers/set/status.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/controllers/set/status.go b/controllers/set/status.go index e0f0aa2..5098d57 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -47,10 +47,13 @@ func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditi c.log.Info("health timeout exceeded", "firewall-name", fw.Name, "last-reconciled-at", seedUpdatedTime.String(), "timeout-after", unhealthyTimeout.String()) return firewallConditionStatus{HealthTimeout: true} } - // Firewall was healthy at one point (all conditions were met), but then one of the conditions degraded so the firewall is unhealthy + // Firewall was healthy at one point (all conditions were met), but then one of the monitor conditions + // degraded so the firewall is unhealthy. Only check monitor conditions (connected, seedConnected, distanceConfigured) + // because the ready condition degradation is already handled by the time-based health timeout above. wasHealthy := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallHealthy)).Status == v2.ConditionTrue - if !allConditionsMet && wasHealthy && unhealthyTimeout > 0 { - c.log.Info("firewall conditions degraded", "firewall-name", fw.Name) + monitorConditionsDegraded := !connected || !seedConnected || !distanceConfigured + if monitorConditionsDegraded && wasHealthy && unhealthyTimeout > 0 { + c.log.Info("firewall monitor conditions degraded", "firewall-name", fw.Name) return firewallConditionStatus{HealthTimeout: true} } //if everything returns false, it is progressing From 1e2328f3a7debc59cbe77f1c848dc221f9e2549a Mon Sep 17 00:00:00 2001 From: Ebubekir Ates <109050136+Honigeintopf@users.noreply.github.com> Date: Tue, 10 Feb 2026 14:27:48 +0100 Subject: [PATCH 27/28] Update api/v2/types_firewall.go Co-authored-by: Gerrit --- api/v2/types_firewall.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/v2/types_firewall.go b/api/v2/types_firewall.go index 1df7499..d6fc16d 100644 --- a/api/v2/types_firewall.go +++ b/api/v2/types_firewall.go @@ -183,7 +183,7 @@ const ( FirewallMonitorDeployed ConditionType = "MonitorDeployed" // FirewallDistanceConfigured indicates that the firewall-controller has configured the given firewall distance. FirewallDistanceConfigured ConditionType = "Distance" - // FirewallHealthy indicates that all health conditions have been met at least once. + // FirewallProvisioned indicates that all health conditions have been met at least once. // Once set to true, it stays true and is used to detect condition degradation. FirewallHealthy ConditionType = "Healthy" ) From f69eef7c6b2c97c3867d0fdedec89c0737697b88 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 17 Feb 2026 14:00:21 +0100 Subject: [PATCH 28/28] Re-iterate status evaluation. (#86) --- api/v2/types_firewall.go | 170 +++++++++++++++++++++++++- api/v2/types_firewall_test.go | 183 ++++++++++++++++++++++++++++ api/v2/zz_generated.deepcopy.go | 21 ++++ controllers/deployment/delete.go | 3 +- controllers/deployment/reconcile.go | 3 +- controllers/deployment/recreate.go | 4 +- controllers/deployment/rolling.go | 3 +- controllers/firewall/delete.go | 4 +- controllers/firewall/reconcile.go | 16 ++- controllers/firewall/status.go | 35 +++++- controllers/generic_controller.go | 14 +-- controllers/set/delete.go | 27 +--- controllers/set/reconcile.go | 13 +- controllers/set/status.go | 75 ++---------- controllers/timeout/controller.go | 44 +++++++ controllers/timeout/reconcile.go | 102 ++++++++++++++++ integration/integration_test.go | 65 ++++++++++ integration/metal_resources_test.go | 72 +++++------ integration/suite_test.go | 15 ++- main.go | 8 +- 20 files changed, 713 insertions(+), 164 deletions(-) create mode 100644 controllers/timeout/controller.go create mode 100644 controllers/timeout/reconcile.go diff --git a/api/v2/types_firewall.go b/api/v2/types_firewall.go index d6fc16d..c8cf401 100644 --- a/api/v2/types_firewall.go +++ b/api/v2/types_firewall.go @@ -1,8 +1,10 @@ package v2 import ( + "fmt" "sort" "strconv" + "time" "github.com/metal-stack/metal-lib/pkg/pointer" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -185,7 +187,7 @@ const ( FirewallDistanceConfigured ConditionType = "Distance" // FirewallProvisioned indicates that all health conditions have been met at least once. // Once set to true, it stays true and is used to detect condition degradation. - FirewallHealthy ConditionType = "Healthy" + FirewallProvisioned ConditionType = "Provisioned" ) // ShootAccess contains secret references to construct a shoot client in the firewall-controller to update its firewall monitor. @@ -354,3 +356,169 @@ func SortFirewallsByImportance(fws []*Firewall) { return !a.CreationTimestamp.Before(&b.CreationTimestamp) }) } + +type ( + FirewallStatusResult string + + FirewallStatusEvalResult struct { + Result FirewallStatusResult + Reason string + TimeoutIn *time.Duration + } +) + +const ( + FirewallStatusReady FirewallStatusResult = "ready" + FirewallStatusProgressing FirewallStatusResult = "progressing" + FirewallStatusUnhealthy FirewallStatusResult = "unhealthy" + FirewallStatusHealthTimeout FirewallStatusResult = "health-timeout" + FirewallStatusCreateTimeout FirewallStatusResult = "create-timeout" +) + +func EvaluateFirewallStatus(fw *Firewall, createTimeout, healthTimeout time.Duration) *FirewallStatusEvalResult { + var ( + checkForTimeout = func(fw *Firewall, condition ConditionType, timeout time.Duration) (time.Duration, bool) { + if timeout == 0 { + return 0, false + } + + var ( + cond = pointer.SafeDeref(fw.Status.Conditions.Get(condition)) + transitionTime = cond.LastTransitionTime.Time + deadline = time.Until(transitionTime.Add(timeout)) + ) + + if deadline < 0 { + return 0, true + } + + return deadline, false + } + + collectUnhealthyConditions = func(cts ...ConditionType) []*Condition { + var res []*Condition + + for _, ct := range cts { + cond := fw.Status.Conditions.Get(ct) + if cond == nil { + res = append(res, &Condition{Type: ct}) + } else if cond.Status != ConditionTrue { + res = append(res, cond) + } + } + + return res + } + + unhealthyTypes []string + timeoutIn *time.Duration + ) + + switch fw.Status.Phase { + case FirewallPhaseCreating, FirewallPhaseCrashing: + unhealthyConds := collectUnhealthyConditions( + FirewallCreated, + FirewallReady, + FirewallProvisioned, + ) + + if len(unhealthyConds) == 0 { + return &FirewallStatusEvalResult{ + Result: FirewallStatusReady, + Reason: "", + } + } + + if createTimeout > 0 { + if t, ok := checkForTimeout(fw, FirewallReady, createTimeout); ok { + return &FirewallStatusEvalResult{ + Result: FirewallStatusCreateTimeout, + Reason: fmt.Sprintf("%s create timeout exceeded, firewall not provisioned in time", createTimeout.String()), + } + } else if createTimeout != 0 { + timeoutIn = &t + } + } + + for _, c := range unhealthyConds { + unhealthyTypes = append(unhealthyTypes, string(c.Type)) + } + + return &FirewallStatusEvalResult{ + Result: FirewallStatusProgressing, + Reason: fmt.Sprintf("not all health conditions are true: %v", unhealthyTypes), + TimeoutIn: timeoutIn, + } + + case FirewallPhaseRunning: + fallthrough + + default: + unhealthyConds := collectUnhealthyConditions( + FirewallCreated, + FirewallReady, + FirewallProvisioned, + FirewallControllerConnected, + FirewallControllerSeedConnected, + FirewallDistanceConfigured, + ) + + if len(unhealthyConds) == 0 { + return &FirewallStatusEvalResult{ + Result: FirewallStatusReady, + Reason: "", + } + } + + var ( + ready = pointer.SafeDeref(fw.Status.Conditions.Get(FirewallReady)).Status == ConditionTrue + provisioned = pointer.SafeDeref(fw.Status.Conditions.Get(FirewallProvisioned)).Status == ConditionTrue + connected = pointer.SafeDeref(fw.Status.Conditions.Get(FirewallControllerConnected)).Status == ConditionTrue + seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(FirewallControllerSeedConnected)).Status == ConditionTrue + ) + + if provisioned { + switch { + case !seedConnected: + if t, ok := checkForTimeout(fw, FirewallControllerSeedConnected, healthTimeout); ok { + return &FirewallStatusEvalResult{ + Result: FirewallStatusHealthTimeout, + Reason: fmt.Sprintf("%s health timeout exceeded, seed connection lost", healthTimeout.String()), + } + } else if healthTimeout != 0 { + timeoutIn = &t + } + + case !connected: + if t, ok := checkForTimeout(fw, FirewallControllerConnected, healthTimeout); ok { + return &FirewallStatusEvalResult{ + Result: FirewallStatusHealthTimeout, + Reason: fmt.Sprintf("%s health timeout exceeded, firewall monitor not reconciled anymore", healthTimeout.String()), + } + } else if healthTimeout != 0 { + timeoutIn = &t + } + + case !ready: + if t, ok := checkForTimeout(fw, FirewallReady, healthTimeout); ok { + return &FirewallStatusEvalResult{ + Result: FirewallStatusHealthTimeout, + Reason: fmt.Sprintf("%s health timeout exceeded, firewall is not ready from perspective of the metal-api", healthTimeout.String()), + } + } else if healthTimeout != 0 { + timeoutIn = &t + } + } + } + + for _, c := range unhealthyConds { + unhealthyTypes = append(unhealthyTypes, string(c.Type)) + } + + return &FirewallStatusEvalResult{ + Result: FirewallStatusUnhealthy, + Reason: fmt.Sprintf("not all health conditions are true: %v", unhealthyTypes), + TimeoutIn: timeoutIn, + } + } +} diff --git a/api/v2/types_firewall_test.go b/api/v2/types_firewall_test.go index e145730..142a912 100644 --- a/api/v2/types_firewall_test.go +++ b/api/v2/types_firewall_test.go @@ -6,7 +6,10 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/metal-stack/metal-lib/pkg/pointer" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "testing/synctest" ) func Test_SortFirewallsByImportance(t *testing.T) { @@ -107,3 +110,183 @@ func Test_SortFirewallsByImportance(t *testing.T) { }) } } + +func Test_EvaluateFirewallStatus(t *testing.T) { + tests := []struct { + name string + modFn func(fw *Firewall) + healthTimeout time.Duration + createTimeout time.Duration + want *FirewallStatusEvalResult + wantReason string + }{ + { + name: "ready firewall in running phase", + modFn: nil, + want: &FirewallStatusEvalResult{ + Result: FirewallStatusReady, + }, + }, + { + name: "unhealthy firewall in running phase due to firewall monitor not reconciling", + modFn: func(fw *Firewall) { + fw.Status.Conditions.Set(Condition{ + Type: FirewallControllerConnected, + Status: ConditionFalse, + }) + }, + want: &FirewallStatusEvalResult{ + Result: FirewallStatusUnhealthy, + Reason: "not all health conditions are true: [Connected]", + }, + }, + { + name: "unhealthy firewall in running phase due to firewall not reconciling", + modFn: func(fw *Firewall) { + fw.Status.Conditions.Set(Condition{ + Type: FirewallControllerSeedConnected, + Status: ConditionFalse, + }) + }, + want: &FirewallStatusEvalResult{ + Result: FirewallStatusUnhealthy, + Reason: "not all health conditions are true: [SeedConnected]", + }, + }, + { + name: "unhealthy firewall in running phase due to readiness condition false", + modFn: func(fw *Firewall) { + fw.Status.Conditions.Set(Condition{ + Type: FirewallReady, + Status: ConditionFalse, + }) + }, + want: &FirewallStatusEvalResult{ + Result: FirewallStatusUnhealthy, + Reason: "not all health conditions are true: [Ready]", + }, + }, + { + name: "health timeout reached because seed not connected", + healthTimeout: 5 * time.Minute, + modFn: func(fw *Firewall) { + cond := fw.Status.Conditions.Get(FirewallControllerSeedConnected) + cond.Status = ConditionFalse + fw.Status.Conditions.Set(*cond) + }, + want: &FirewallStatusEvalResult{ + Result: FirewallStatusHealthTimeout, + Reason: "5m0s health timeout exceeded, seed connection lost", + }, + }, + { + name: "health timeout not yet reached", + healthTimeout: 15 * time.Minute, + modFn: func(fw *Firewall) { + cond := fw.Status.Conditions.Get(FirewallControllerSeedConnected) + cond.Status = ConditionFalse + fw.Status.Conditions.Set(*cond) + }, + want: &FirewallStatusEvalResult{ + Result: FirewallStatusUnhealthy, + Reason: "not all health conditions are true: [SeedConnected]", + TimeoutIn: pointer.Pointer(5 * time.Minute), + }, + }, + { + name: "create timeout reached because not provisioned", + createTimeout: 5 * time.Minute, + modFn: func(fw *Firewall) { + fw.Status.Phase = FirewallPhaseCreating + cond := fw.Status.Conditions.Get(FirewallProvisioned) + cond.Status = ConditionFalse + fw.Status.Conditions.Set(*cond) + }, + want: &FirewallStatusEvalResult{ + Result: FirewallStatusCreateTimeout, + Reason: "5m0s create timeout exceeded, firewall not provisioned in time", + }, + }, + { + name: "create timeout not yet reached", + createTimeout: 15 * time.Minute, + modFn: func(fw *Firewall) { + fw.Status.Phase = FirewallPhaseCreating + cond := fw.Status.Conditions.Get(FirewallProvisioned) + cond.Status = ConditionFalse + fw.Status.Conditions.Set(*cond) + }, + want: &FirewallStatusEvalResult{ + Result: FirewallStatusProgressing, + Reason: "not all health conditions are true: [Provisioned]", + TimeoutIn: pointer.Pointer(5 * time.Minute), + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + tenMinutesAgo := time.Now().Add(-10 * time.Minute) + + fw := &Firewall{ + Status: FirewallStatus{ + Phase: FirewallPhaseRunning, + Conditions: Conditions{ + { + Type: FirewallControllerConnected, + Status: ConditionTrue, + LastTransitionTime: metav1.NewTime(tenMinutesAgo), + LastUpdateTime: metav1.NewTime(tenMinutesAgo), + }, + { + Type: FirewallControllerSeedConnected, + Status: ConditionTrue, + LastTransitionTime: metav1.NewTime(tenMinutesAgo), + LastUpdateTime: metav1.NewTime(tenMinutesAgo), + }, + { + Type: FirewallCreated, + Status: ConditionTrue, + LastTransitionTime: metav1.NewTime(tenMinutesAgo), + LastUpdateTime: metav1.NewTime(tenMinutesAgo), + }, + { + Type: FirewallReady, + Status: ConditionTrue, + LastTransitionTime: metav1.NewTime(tenMinutesAgo), + LastUpdateTime: metav1.NewTime(tenMinutesAgo), + }, + { + Type: FirewallProvisioned, + Status: ConditionTrue, + LastTransitionTime: metav1.NewTime(tenMinutesAgo), + LastUpdateTime: metav1.NewTime(tenMinutesAgo), + }, + { + Type: FirewallDistanceConfigured, + Status: ConditionTrue, + LastTransitionTime: metav1.NewTime(tenMinutesAgo), + LastUpdateTime: metav1.NewTime(tenMinutesAgo), + }, + { + Type: FirewallMonitorDeployed, + Status: ConditionTrue, + LastTransitionTime: metav1.NewTime(tenMinutesAgo), + LastUpdateTime: metav1.NewTime(tenMinutesAgo), + }, + }, + }, + } + + if tt.modFn != nil { + tt.modFn(fw) + } + + got := EvaluateFirewallStatus(fw, tt.createTimeout, tt.healthTimeout) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("diff = %s", diff) + } + }) + }) + } +} diff --git a/api/v2/zz_generated.deepcopy.go b/api/v2/zz_generated.deepcopy.go index b9d9399..393c86c 100644 --- a/api/v2/zz_generated.deepcopy.go +++ b/api/v2/zz_generated.deepcopy.go @@ -6,6 +6,7 @@ package v2 import ( runtime "k8s.io/apimachinery/pkg/runtime" + timex "time" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. @@ -742,6 +743,26 @@ func (in *FirewallStatus) DeepCopy() *FirewallStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FirewallStatusEvalResult) DeepCopyInto(out *FirewallStatusEvalResult) { + *out = *in + if in.TimeoutIn != nil { + in, out := &in.TimeoutIn, &out.TimeoutIn + *out = new(timex.Duration) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FirewallStatusEvalResult. +func (in *FirewallStatusEvalResult) DeepCopy() *FirewallStatusEvalResult { + if in == nil { + return nil + } + out := new(FirewallStatusEvalResult) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *FirewallTemplateSpec) DeepCopyInto(out *FirewallTemplateSpec) { *out = *in diff --git a/controllers/deployment/delete.go b/controllers/deployment/delete.go index 7d0298c..5e77bd4 100644 --- a/controllers/deployment/delete.go +++ b/controllers/deployment/delete.go @@ -6,6 +6,7 @@ import ( v2 "github.com/metal-stack/firewall-controller-manager/api/v2" "github.com/metal-stack/firewall-controller-manager/controllers" + corev1 "k8s.io/api/core/v1" ) func (c *controller) Delete(r *controllers.Ctx[*v2.FirewallDeployment]) error { @@ -33,7 +34,7 @@ func (c *controller) deleteFirewallSets(r *controllers.Ctx[*v2.FirewallDeploymen r.Log.Info("set deletion timestamp on firewall set", "set-name", set.Name) - c.recorder.Eventf(set, nil, "Normal", "Delete", "deleted firewallset %s", set.Name) + c.recorder.Eventf(set, nil, corev1.EventTypeNormal, "Delete", "deleting set", "deleted firewall set %s", set.Name) } if len(sets) > 0 { diff --git a/controllers/deployment/reconcile.go b/controllers/deployment/reconcile.go index ef9c981..7ab6c5c 100644 --- a/controllers/deployment/reconcile.go +++ b/controllers/deployment/reconcile.go @@ -8,6 +8,7 @@ import ( "github.com/google/uuid" v2 "github.com/metal-stack/firewall-controller-manager/api/v2" "github.com/metal-stack/firewall-controller-manager/controllers" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/util/retry" @@ -217,7 +218,7 @@ func (c *controller) syncFirewallSet(r *controllers.Ctx[*v2.FirewallDeployment], cond := v2.NewCondition(v2.FirewallDeplomentProgressing, v2.ConditionTrue, "FirewallSetUpdated", fmt.Sprintf("Updated firewall set %q.", set.Name)) r.Target.Status.Conditions.Set(cond) - c.recorder.Eventf(set, nil, "Normal", "Update", "updated firewallset %s", set.Name) + c.recorder.Eventf(set, nil, corev1.EventTypeNormal, "Update", "updating set", "updated firewall set %s", set.Name) return nil } diff --git a/controllers/deployment/recreate.go b/controllers/deployment/recreate.go index da483c1..f5de6ba 100644 --- a/controllers/deployment/recreate.go +++ b/controllers/deployment/recreate.go @@ -6,6 +6,8 @@ import ( v2 "github.com/metal-stack/firewall-controller-manager/api/v2" "github.com/metal-stack/firewall-controller-manager/controllers" + + corev1 "k8s.io/api/core/v1" ) // recreateStrategy first deletes the existing firewall sets and then creates a new one @@ -20,7 +22,7 @@ func (c *controller) recreateStrategy(r *controllers.Ctx[*v2.FirewallDeployment] return err } - c.recorder.Eventf(set, nil, "Normal", "Recreate", "recreated firewallset old: %s new: %s", latestSet.Name, set.Name) + c.recorder.Eventf(set, nil, corev1.EventTypeNormal, "Recreate", "recreating set", "recreated firewall set, old: %s new: %s", latestSet.Name, set.Name) latestSet = set } diff --git a/controllers/deployment/rolling.go b/controllers/deployment/rolling.go index 07de9f6..61c48a5 100644 --- a/controllers/deployment/rolling.go +++ b/controllers/deployment/rolling.go @@ -6,6 +6,7 @@ import ( v2 "github.com/metal-stack/firewall-controller-manager/api/v2" "github.com/metal-stack/firewall-controller-manager/controllers" + corev1 "k8s.io/api/core/v1" ) // rollingUpdateStrategy first creates a new set and deletes the old one's when the new one becomes ready @@ -20,7 +21,7 @@ func (c *controller) rollingUpdateStrategy(r *controllers.Ctx[*v2.FirewallDeploy return err } - c.recorder.Eventf(newSet, nil, "Normal", "Create", "created firewallset %s", newSet.Name) + c.recorder.Eventf(newSet, nil, corev1.EventTypeNormal, "Create", "creating set", "created firewall set %s", newSet.Name) ownedSets = append(ownedSets, newSet) diff --git a/controllers/firewall/delete.go b/controllers/firewall/delete.go index 191584c..20f1de1 100644 --- a/controllers/firewall/delete.go +++ b/controllers/firewall/delete.go @@ -9,6 +9,8 @@ import ( "github.com/metal-stack/firewall-controller-manager/controllers" "github.com/metal-stack/metal-go/api/client/machine" apierrors "k8s.io/apimachinery/pkg/api/errors" + + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -46,7 +48,7 @@ func (c *controller) Delete(r *controllers.Ctx[*v2.Firewall]) error { r.Log.Info("deleted firewall", "firewall-name", f.Name, "id", *resp.Payload.ID) - c.recorder.Eventf(r.Target, nil, "Normal", "Delete", "deleted firewall %s id %s", r.Target.Name, *resp.Payload.ID) + c.recorder.Eventf(r.Target, nil, corev1.EventTypeNormal, "Delete", "deleting firewall", "deleted firewall %s id %s", r.Target.Name, *resp.Payload.ID) } return nil diff --git a/controllers/firewall/reconcile.go b/controllers/firewall/reconcile.go index 42467b0..13f25e6 100644 --- a/controllers/firewall/reconcile.go +++ b/controllers/firewall/reconcile.go @@ -12,6 +12,8 @@ import ( "github.com/metal-stack/metal-go/api/client/machine" "github.com/metal-stack/metal-go/api/models" "github.com/metal-stack/metal-lib/pkg/pointer" + + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" ) @@ -30,11 +32,6 @@ func (c *controller) Reconcile(r *controllers.Ctx[*v2.Firewall]) error { } SetFirewallStatusFromMonitor(r.Target, mon) - - if isAllConditionsMet(r.Target) { - cond := v2.NewCondition(v2.FirewallHealthy, v2.ConditionTrue, "Healthy", "All firewall conditions have been met.") - r.Target.Status.Conditions.Set(cond) - } }() fws, err := c.firewallCache.Get(r.Ctx, r.Target) @@ -59,6 +56,11 @@ func (c *controller) Reconcile(r *controllers.Ctx[*v2.Firewall]) error { cond := v2.NewCondition(v2.FirewallCreated, v2.ConditionTrue, "Created", fmt.Sprintf("Firewall %q created successfully.", pointer.SafeDeref(pointer.SafeDeref(f.Allocation).Name))) r.Target.Status.Conditions.Set(cond) + // this is mainly for tests when the firewall is already present + if r.Target.Status.Phase == v2.FirewallPhase("") { + r.Target.Status.Phase = v2.FirewallPhaseCreating + } + var currentStatus *v2.MachineStatus currentStatus, err = getMachineStatus(f) if err != nil { @@ -174,7 +176,7 @@ func (c *controller) createFirewall(r *controllers.Ctx[*v2.Firewall]) (*models.V cond := v2.NewCondition(v2.FirewallCreated, v2.ConditionTrue, "Created", fmt.Sprintf("Firewall %q created successfully.", pointer.SafeDeref(pointer.SafeDeref(resp.Payload.Allocation).Name))) r.Target.Status.Conditions.Set(cond) - c.recorder.Eventf(r.Target, nil, "Normal", "Create", "created firewall %s id %s", r.Target.Name, pointer.SafeDeref(resp.Payload.ID)) + c.recorder.Eventf(r.Target, nil, corev1.EventTypeNormal, "Create", "created firewall %s id %s", "creating firewall", r.Target.Name, pointer.SafeDeref(resp.Payload.ID)) return resp.Payload, nil } @@ -194,6 +196,7 @@ func isFirewallProgressing(status *v2.MachineStatus) bool { if status.LastEvent.Event != "Phoned Home" { return true } + return false } @@ -212,6 +215,7 @@ func isFirewallReady(status *v2.MachineStatus) bool { if status.LastEvent.Event == "Phoned Home" { return true } + return false } diff --git a/controllers/firewall/status.go b/controllers/firewall/status.go index b5b1b0f..38a1e09 100644 --- a/controllers/firewall/status.go +++ b/controllers/firewall/status.go @@ -129,6 +129,11 @@ func SetFirewallStatusFromMonitor(fw *v2.Firewall, mon *v2.FirewallMonitor) { cond = v2.NewCondition(v2.FirewallDistanceConfigured, v2.ConditionTrue, "NotChecking", "Not checking distance due to firewall annotation.") fw.Status.Conditions.Set(cond) + if isProvisioned(fw) { + cond := v2.NewCondition(v2.FirewallProvisioned, v2.ConditionTrue, "Provisioned", "All firewall conditions have been met.") + fw.Status.Conditions.Set(cond) + } + return } @@ -155,11 +160,30 @@ func SetFirewallStatusFromMonitor(fw *v2.Firewall, mon *v2.FirewallMonitor) { fw.Status.ControllerStatus = connection + var ( + // currently, the firewall-controller writes the reconcile time hard-coded every three minutes + // the FCM reconciles the firewall hard-coded at least every two minutes + // + // this can be visualized as: + // + // fc (write) w w w w + // | | | | + // t (minutes) 0--1--2--3--4--5--6--7--8--9--10-- + // | | | | | | + // FCM (read) r r r r r r + // + // so, read out data will contain t={0, 0, 3, 6, 6, 9}, which shows that the maximum distance is three minutes + maximumSeedUpdateDrift = 3 * time.Minute + // in this case, the firewall-controller almost permanently updates this value (fw.Spec.Interval, by default 10s) + // so we can assume the read out interval from the fcm firewall reconcile, which is maximum two minutes as described above + maximumShootUpdateDrift = 2 * time.Minute + ) + // Check if the firewall-controller has reconciled the shoot if connection.Updated.Time.IsZero() { cond := v2.NewCondition(v2.FirewallControllerConnected, v2.ConditionFalse, "NotConnected", "Controller has not yet connected to shoot.") fw.Status.Conditions.Set(cond) - } else if time.Since(connection.Updated.Time) > 5*time.Minute { + } else if time.Since(connection.Updated.Time) > maximumShootUpdateDrift { cond := v2.NewCondition(v2.FirewallControllerConnected, v2.ConditionFalse, "StoppedReconciling", fmt.Sprintf("Controller has stopped reconciling since %s to shoot.", connection.Updated.String())) fw.Status.Conditions.Set(cond) } else { @@ -171,7 +195,7 @@ func SetFirewallStatusFromMonitor(fw *v2.Firewall, mon *v2.FirewallMonitor) { if connection.SeedUpdated.Time.IsZero() { cond := v2.NewCondition(v2.FirewallControllerSeedConnected, v2.ConditionFalse, "NotConnected", "Controller has not yet connected to seed.") fw.Status.Conditions.Set(cond) - } else if time.Since(connection.SeedUpdated.Time) > 5*time.Minute { + } else if time.Since(connection.SeedUpdated.Time) > maximumSeedUpdateDrift { cond := v2.NewCondition(v2.FirewallControllerSeedConnected, v2.ConditionFalse, "StoppedReconciling", fmt.Sprintf("Controller has stopped reconciling since %s to seed.", connection.SeedUpdated.String())) fw.Status.Conditions.Set(cond) } else { @@ -190,9 +214,14 @@ func SetFirewallStatusFromMonitor(fw *v2.Firewall, mon *v2.FirewallMonitor) { cond := v2.NewCondition(v2.FirewallDistanceConfigured, v2.ConditionFalse, "NotConfigured", fmt.Sprintf("Controller has configured distance %d, but %d is specified.", connection.ActualDistance, fw.Distance)) fw.Status.Conditions.Set(cond) } + + if isProvisioned(fw) { + cond := v2.NewCondition(v2.FirewallProvisioned, v2.ConditionTrue, "Provisioned", "All firewall conditions have been met.") + fw.Status.Conditions.Set(cond) + } } -func isAllConditionsMet(fw *v2.Firewall) bool { +func isProvisioned(fw *v2.Firewall) bool { for _, ct := range []v2.ConditionType{ v2.FirewallCreated, v2.FirewallReady, diff --git a/controllers/generic_controller.go b/controllers/generic_controller.go index 5a585e2..73408e3 100644 --- a/controllers/generic_controller.go +++ b/controllers/generic_controller.go @@ -103,8 +103,7 @@ func (g GenericController[O]) Reconcile(ctx context.Context, req ctrl.Request) ( log.Info("reconciling resource deletion flow") err := g.reconciler.Delete(rctx) if err != nil { - var requeueErr *requeueError - if errors.As(err, &requeueErr) { + if requeueErr, ok := errors.AsType[*requeueError](err); ok { log.Info(requeueErr.Error()) return ctrl.Result{RequeueAfter: requeueErr.after}, nil //nolint:nilerr we need to return nil such that the requeue works } @@ -192,16 +191,13 @@ func (g GenericController[O]) Reconcile(ctx context.Context, req ctrl.Request) ( err := g.reconciler.Reconcile(rctx) if err != nil { - var requeueErr *requeueError - - switch { - case errors.As(err, &requeueErr): + if requeueErr, ok := errors.AsType[*requeueError](err); ok { log.Info(requeueErr.Error()) return ctrl.Result{RequeueAfter: requeueErr.after}, nil //nolint:nilerr we need to return nil such that the requeue works - default: - log.Error(err, "error during reconcile") - return ctrl.Result{}, err } + + log.Error(err, "error during reconcile") + return ctrl.Result{}, err } return ctrl.Result{}, statusErr diff --git a/controllers/set/delete.go b/controllers/set/delete.go index 14cb3a4..1ab22a4 100644 --- a/controllers/set/delete.go +++ b/controllers/set/delete.go @@ -6,6 +6,8 @@ import ( v2 "github.com/metal-stack/firewall-controller-manager/api/v2" "github.com/metal-stack/firewall-controller-manager/controllers" + + corev1 "k8s.io/api/core/v1" ) func (c *controller) Delete(r *controllers.Ctx[*v2.FirewallSet]) error { @@ -33,7 +35,7 @@ func (c *controller) deleteFirewalls(r *controllers.Ctx[*v2.FirewallSet], fws .. r.Log.Info("set deletion timestamp on firewall", "firewall-name", fw.Name) - c.recorder.Eventf(fw, nil, "Normal", "Delete", "deleted firewall %s", fw.Name) + c.recorder.Eventf(fw, nil, corev1.EventTypeNormal, "Delete", "deleting firewall", "deleted firewall %s", fw.Name) } if len(fws) > 0 { @@ -42,26 +44,3 @@ func (c *controller) deleteFirewalls(r *controllers.Ctx[*v2.FirewallSet], fws .. return nil } -func (c *controller) deleteIfUnhealthyOrTimeout(r *controllers.Ctx[*v2.FirewallSet], fws ...*v2.Firewall) ([]*v2.Firewall, error) { - var result []*v2.Firewall - createTimeout := c.c.GetCreateTimeout() - healthTimeout := c.c.GetFirewallHealthTimeout() - - for _, fw := range fws { - status := c.evaluateFirewallConditions(fw) - - switch { - case (createTimeout > 0 && status.CreateTimeout) || (healthTimeout > 0 && status.HealthTimeout): - r.Log.Info("firewall health or creation timeout exceeded, deleting from set", "firewall-name", fw.Name) - - err := c.deleteFirewalls(r, fw) - if err != nil { - return nil, err - } - - result = append(result, fw) - } - - } - return result, nil -} diff --git a/controllers/set/reconcile.go b/controllers/set/reconcile.go index 50231af..3f0aa53 100644 --- a/controllers/set/reconcile.go +++ b/controllers/set/reconcile.go @@ -8,8 +8,10 @@ import ( "github.com/google/uuid" v2 "github.com/metal-stack/firewall-controller-manager/api/v2" "github.com/metal-stack/firewall-controller-manager/controllers" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func (c *controller) Reconcile(r *controllers.Ctx[*v2.FirewallSet]) error { @@ -90,7 +92,7 @@ func (c *controller) Reconcile(r *controllers.Ctx[*v2.FirewallSet]) error { r.Log.Info("firewall created", "firewall-name", fw.Name) - c.recorder.Eventf(r.Target, nil, "Normal", "Create", "created firewall %s", fw.Name) + c.recorder.Eventf(r.Target, nil, corev1.EventTypeNormal, "Create", "creating firewall", "created firewall %s", fw.Name) ownedFirewalls = append(ownedFirewalls, fw) } @@ -110,13 +112,6 @@ func (c *controller) Reconcile(r *controllers.Ctx[*v2.FirewallSet]) error { } } - deletedFws, err := c.deleteIfUnhealthyOrTimeout(r, ownedFirewalls...) - if err != nil { - return err - } - - ownedFirewalls = controllers.Except(ownedFirewalls, deletedFws...) - err = c.setStatus(r, ownedFirewalls) if err != nil { return err diff --git a/controllers/set/status.go b/controllers/set/status.go index 5098d57..7b05812 100644 --- a/controllers/set/status.go +++ b/controllers/set/status.go @@ -1,69 +1,10 @@ package set import ( - "time" - v2 "github.com/metal-stack/firewall-controller-manager/api/v2" "github.com/metal-stack/firewall-controller-manager/controllers" - "github.com/metal-stack/metal-lib/pkg/pointer" ) -type firewallConditionStatus struct { - IsReady bool - CreateTimeout bool - HealthTimeout bool -} - -func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditionStatus { - var ( - unhealthyTimeout = c.c.GetFirewallHealthTimeout() - allocationTimeout = c.c.GetCreateTimeout() - - created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue - ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue - connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue - seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue - distanceConfigured = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue - allConditionsMet = created && ready && connected && seedConnected && distanceConfigured - - seedUpdatedTime = pointer.SafeDeref(fw.Status.ControllerStatus).SeedUpdated.Time - timeSinceReconcile = time.Since(seedUpdatedTime) - allocationTime = pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time - ) - - if allConditionsMet { - return firewallConditionStatus{IsReady: true} - } - - // duration after which a firewall in the creation phase will be recreated, exceeded - if allocationTimeout > 0 && fw.Status.Phase == v2.FirewallPhaseCreating && !allocationTime.IsZero() { - if time.Since(allocationTime) > allocationTimeout { - c.log.Info("create timeout exceeded", "firewall-name", fw.Name, "allocated-at", allocationTime.String(), "timeout-after", allocationTimeout.String()) - return firewallConditionStatus{CreateTimeout: true} - } - } - // Only apply health timeout once we have a non-zero seed reconcile timestamp. - if (!ready || !seedConnected || !connected) && unhealthyTimeout > 0 && created && !seedUpdatedTime.IsZero() && timeSinceReconcile > unhealthyTimeout { - c.log.Info("health timeout exceeded", "firewall-name", fw.Name, "last-reconciled-at", seedUpdatedTime.String(), "timeout-after", unhealthyTimeout.String()) - return firewallConditionStatus{HealthTimeout: true} - } - // Firewall was healthy at one point (all conditions were met), but then one of the monitor conditions - // degraded so the firewall is unhealthy. Only check monitor conditions (connected, seedConnected, distanceConfigured) - // because the ready condition degradation is already handled by the time-based health timeout above. - wasHealthy := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallHealthy)).Status == v2.ConditionTrue - monitorConditionsDegraded := !connected || !seedConnected || !distanceConfigured - if monitorConditionsDegraded && wasHealthy && unhealthyTimeout > 0 { - c.log.Info("firewall monitor conditions degraded", "firewall-name", fw.Name) - return firewallConditionStatus{HealthTimeout: true} - } - //if everything returns false, it is progressing - return firewallConditionStatus{ - IsReady: allConditionsMet, - CreateTimeout: false, - HealthTimeout: false, - } -} - func (c *controller) setStatus(r *controllers.Ctx[*v2.FirewallSet], ownedFirewalls []*v2.Firewall) error { r.Target.Status.TargetReplicas = r.Target.Spec.Replicas r.Target.Status.ReadyReplicas = 0 @@ -71,19 +12,21 @@ func (c *controller) setStatus(r *controllers.Ctx[*v2.FirewallSet], ownedFirewal r.Target.Status.UnhealthyReplicas = 0 for _, fw := range ownedFirewalls { - statusReport := c.evaluateFirewallConditions(fw) + status := v2.EvaluateFirewallStatus(fw, c.c.GetCreateTimeout(), c.c.GetFirewallHealthTimeout()) - switch { - case statusReport.IsReady: + switch status.Result { + case v2.FirewallStatusReady: r.Target.Status.ReadyReplicas++ continue - - case statusReport.CreateTimeout || statusReport.HealthTimeout: + case v2.FirewallStatusProgressing: + r.Target.Status.ProgressingReplicas++ + continue + case v2.FirewallStatusUnhealthy, v2.FirewallStatusCreateTimeout, v2.FirewallStatusHealthTimeout: + fallthrough + default: r.Target.Status.UnhealthyReplicas++ continue } - - r.Target.Status.ProgressingReplicas++ } revision, err := controllers.Revision(r.Target) diff --git a/controllers/timeout/controller.go b/controllers/timeout/controller.go new file mode 100644 index 0000000..a7bbc31 --- /dev/null +++ b/controllers/timeout/controller.go @@ -0,0 +1,44 @@ +package timeout + +import ( + "github.com/go-logr/logr" + "k8s.io/client-go/tools/events" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + v2 "github.com/metal-stack/firewall-controller-manager/api/v2" + "github.com/metal-stack/firewall-controller-manager/api/v2/config" + "github.com/metal-stack/firewall-controller-manager/controllers" +) + +type controller struct { + c *config.ControllerConfig + client client.Client + namespace string + log logr.Logger + recorder events.EventRecorder +} + +func SetupWithManager(log logr.Logger, recorder events.EventRecorder, mgr ctrl.Manager, c *config.ControllerConfig) error { + if c.GetFirewallHealthTimeout() <= 0 && c.GetCreateTimeout() <= 0 { + log.Info("not registering timeout controller because neither create nor health timeout configured") + return nil + } + + g := &controller{ + c: c, + log: log, + client: c.GetSeedClient(), + namespace: c.GetSeedNamespace(), + recorder: recorder, + } + + return ctrl.NewControllerManagedBy(mgr). + For( + &v2.FirewallSet{}, + ). + Named("FirewallHealthTimeout"). + WithEventFilter(predicate.NewPredicateFuncs(controllers.SkipOtherNamespace(c.GetSeedNamespace()))). + Complete(g) +} diff --git a/controllers/timeout/reconcile.go b/controllers/timeout/reconcile.go new file mode 100644 index 0000000..b072efb --- /dev/null +++ b/controllers/timeout/reconcile.go @@ -0,0 +1,102 @@ +package timeout + +import ( + "context" + "fmt" + "sort" + + v2 "github.com/metal-stack/firewall-controller-manager/api/v2" + "github.com/metal-stack/firewall-controller-manager/controllers" + apierrors "k8s.io/apimachinery/pkg/api/errors" + + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + corev1 "k8s.io/api/core/v1" +) + +func (c *controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + if req.Namespace != c.namespace { // should already be filtered out through predicate, but we will check anyway + return ctrl.Result{}, nil + } + + set := &v2.FirewallSet{} + if err := c.client.Get(ctx, req.NamespacedName, set, &client.GetOptions{}); err != nil { + if apierrors.IsNotFound(err) { + c.log.Info("resource no longer exists") + return ctrl.Result{}, nil + } + + return ctrl.Result{}, fmt.Errorf("error retrieving resource: %w", err) + } + + if !set.GetDeletionTimestamp().IsZero() { + return ctrl.Result{}, nil + } + + ownedFirewalls, _, err := controllers.GetOwnedResources(ctx, c.c.GetSeedClient(), set.Spec.Selector, set, &v2.FirewallList{}, func(fl *v2.FirewallList) []*v2.Firewall { + return fl.GetItems() + }) + if err != nil { + return ctrl.Result{}, fmt.Errorf("unable to get owned firewalls: %w", err) + } + + return c.deleteIfUnhealthyOrTimeout(ctx, ownedFirewalls...) +} + +func (c *controller) deleteIfUnhealthyOrTimeout(ctx context.Context, fws ...*v2.Firewall) (ctrl.Result, error) { + type fwWithStatus struct { + firewall *v2.Firewall + status *v2.FirewallStatusEvalResult + } + + var nextTimeouts []*fwWithStatus + + for _, fw := range fws { + status := v2.EvaluateFirewallStatus(fw, c.c.GetCreateTimeout(), c.c.GetFirewallHealthTimeout()) + + switch status.Result { + case v2.FirewallStatusCreateTimeout, v2.FirewallStatusHealthTimeout: + c.log.Info("firewall timeout exceeded, deleting from set", "reason", status.Reason, "firewall-name", fw.Name) + + if fw.DeletionTimestamp != nil { + c.log.Info("deletion timestamp on firewall already set", "firewall-name", fw.Name) + continue + } + + err := c.c.GetSeedClient().Delete(ctx, fw) + if err != nil { + return ctrl.Result{}, err + } + + c.recorder.Eventf(fw, nil, corev1.EventTypeNormal, "Delete", "deleting firewall", "deleted firewall %s due to %s", fw.Name, status) + + case v2.FirewallStatusUnhealthy: + if status.TimeoutIn != nil { + nextTimeouts = append(nextTimeouts, &fwWithStatus{ + firewall: fw, + status: status, + }) + } + } + } + + if len(nextTimeouts) > 0 { + sort.SliceStable(nextTimeouts, func(i, j int) bool { + return *nextTimeouts[i].status.TimeoutIn < *nextTimeouts[j].status.TimeoutIn + }) + + var ( + nextTimeout = nextTimeouts[0] + in = *nextTimeout.status.TimeoutIn + ) + + c.log.Info("scheduled check for next health timeout", "firewall-name", nextTimeout.firewall.Name, "reason", nextTimeout.status.Reason, "in", in.String()) + + return ctrl.Result{ + RequeueAfter: in, + }, nil + } + + return ctrl.Result{}, nil +} diff --git a/integration/integration_test.go b/integration/integration_test.go index df10bc3..1b51a7f 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -315,6 +315,7 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.Reason).To(Equal("Connected")) Expect(cond.Message).To(Equal(fmt.Sprintf("Controller reconciled shoot at %s.", mon.ControllerStatus.Updated.String()))) }) + It("should have the firewall-controller connected to seed condition true", func() { cond := testcommon.WaitForCondition(k8sClient, ctx, fw.DeepCopy(), func(fd *v2.Firewall) v2.Conditions { return fd.Status.Conditions @@ -325,6 +326,7 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.Reason).To(Equal("Connected")) Expect(cond.Message).To(Equal(fmt.Sprintf("Controller reconciled firewall at %s.", mon.ControllerStatus.SeedUpdated.String()))) }) + It("should have configured the distance", func() { cond := testcommon.WaitForCondition(k8sClient, ctx, fw.DeepCopy(), func(fd *v2.Firewall) v2.Conditions { return fd.Status.Conditions @@ -335,6 +337,13 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.Reason).To(Equal("Configured")) Expect(cond.Message).To(Equal(fmt.Sprintf("Controller has configured the specified distance %d.", v2.FirewallShortestDistance))) }) + + It("should be in the running phase", func() { + Eventually(func() v2.FirewallPhase { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(fw), fw)).To(Succeed()) + return fw.Status.Phase + }, 5*time.Second, interval).Should(Equal(v2.FirewallPhaseRunning)) + }) }) Context("the firewall set resource", func() { @@ -580,6 +589,13 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.LastTransitionTime).NotTo(BeZero()) }) + It("should be in the creating phase", func() { + Eventually(func() v2.FirewallPhase { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(fw), fw)).To(Succeed()) + return fw.Status.Phase + }, 5*time.Second, interval).Should(Equal(v2.FirewallPhaseCreating)) + }) + It("should have firewall networks populated", func() { var nws []v2.FirewallNetwork var fw = fw.DeepCopy() @@ -1006,6 +1022,17 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.Message).To(Equal(fmt.Sprintf("Firewall %q is phoning home and alive.", *firewall1.Allocation.Name))) }) + It("should have the provisioned condition true", func() { + cond := testcommon.WaitForCondition(k8sClient, ctx, fw.DeepCopy(), func(fd *v2.Firewall) v2.Conditions { + return fd.Status.Conditions + }, v2.FirewallProvisioned, v2.ConditionTrue, 15*time.Second) + + Expect(cond.LastTransitionTime).NotTo(BeZero()) + Expect(cond.LastUpdateTime).NotTo(BeZero()) + Expect(cond.Reason).To(Equal("Provisioned")) + Expect(cond.Message).To(Equal("All firewall conditions have been met.")) + }) + It("should have the monitor condition true", func() { cond := testcommon.WaitForCondition(k8sClient, ctx, fw.DeepCopy(), func(fd *v2.Firewall) v2.Conditions { return fd.Status.Conditions @@ -1027,6 +1054,7 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.Reason).To(Equal("Connected")) Expect(cond.Message).To(Equal(fmt.Sprintf("Controller reconciled shoot at %s.", mon.ControllerStatus.Updated.String()))) }) + It("should have the firewall-controller connected to seed condition true", func() { cond := testcommon.WaitForCondition(k8sClient, ctx, fw.DeepCopy(), func(fd *v2.Firewall) v2.Conditions { return fd.Status.Conditions @@ -1037,6 +1065,7 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.Reason).To(Equal("Connected")) Expect(cond.Message).To(Equal(fmt.Sprintf("Controller reconciled firewall at %s.", mon.ControllerStatus.SeedUpdated.String()))) }) + It("should have configured the distance", func() { cond := testcommon.WaitForCondition(k8sClient, ctx, fw.DeepCopy(), func(fd *v2.Firewall) v2.Conditions { return fd.Status.Conditions @@ -1047,6 +1076,13 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.Reason).To(Equal("Configured")) Expect(cond.Message).To(Equal(fmt.Sprintf("Controller has configured the specified distance %d.", v2.FirewallShortestDistance))) }) + + It("should be in the running phase", func() { + Eventually(func() v2.FirewallPhase { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(fw), fw)).To(Succeed()) + return fw.Status.Phase + }, 5*time.Second, interval).Should(Equal(v2.FirewallPhaseRunning)) + }) }) Context("the firewall set resource", func() { @@ -1374,6 +1410,17 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.Message).To(Equal(fmt.Sprintf("Firewall %q is phoning home and alive.", *readyFirewall.Allocation.Name))) }) + It("should have the provisioned condition true", func() { + cond := testcommon.WaitForCondition(k8sClient, ctx, newFw.DeepCopy(), func(fd *v2.Firewall) v2.Conditions { + return fd.Status.Conditions + }, v2.FirewallProvisioned, v2.ConditionTrue, 15*time.Second) + + Expect(cond.LastTransitionTime).NotTo(BeZero()) + Expect(cond.LastUpdateTime).NotTo(BeZero()) + Expect(cond.Reason).To(Equal("Provisioned")) + Expect(cond.Message).To(Equal("All firewall conditions have been met.")) + }) + It("should have the monitor condition true", func() { cond := testcommon.WaitForCondition(k8sClient, ctx, newFw.DeepCopy(), func(fd *v2.Firewall) v2.Conditions { return fd.Status.Conditions @@ -1768,6 +1815,17 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.Message).To(Equal(fmt.Sprintf("Firewall %q is phoning home and alive.", *firewall1.Allocation.Name))) }) + It("should have the provisioned condition true", func() { + cond := testcommon.WaitForCondition(k8sClient, ctx, fw.DeepCopy(), func(fd *v2.Firewall) v2.Conditions { + return fd.Status.Conditions + }, v2.FirewallProvisioned, v2.ConditionTrue, 15*time.Second) + + Expect(cond.LastTransitionTime).NotTo(BeZero()) + Expect(cond.LastUpdateTime).NotTo(BeZero()) + Expect(cond.Reason).To(Equal("Provisioned")) + Expect(cond.Message).To(Equal("All firewall conditions have been met.")) + }) + It("should have the monitor condition true", func() { cond := testcommon.WaitForCondition(k8sClient, ctx, fw.DeepCopy(), func(fd *v2.Firewall) v2.Conditions { return fd.Status.Conditions @@ -1789,6 +1847,13 @@ var _ = Context("integration test", Ordered, func() { Expect(cond.Reason).To(Equal("NotChecking")) Expect(cond.Message).To(Equal("Not checking controller connection due to firewall annotation.")) }) + + It("should be in the running phase", func() { + Eventually(func() v2.FirewallPhase { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(fw), fw)).To(Succeed()) + return fw.Status.Phase + }, 5*time.Second, interval).Should(Equal(v2.FirewallPhaseRunning)) + }) }) Context("the firewall set resource", func() { diff --git a/integration/metal_resources_test.go b/integration/metal_resources_test.go index 64972da..24e7925 100644 --- a/integration/metal_resources_test.go +++ b/integration/metal_resources_test.go @@ -313,86 +313,86 @@ var ( firewall3 = &models.V1FirewallResponse{ Allocation: &models.V1MachineAllocation{ BootInfo: &models.V1BootInfo{ - Bootloaderid: pointer.Pointer("bootloaderid"), - Cmdline: pointer.Pointer("cmdline"), - ImageID: pointer.Pointer("imageid"), - Initrd: pointer.Pointer("initrd"), - Kernel: pointer.Pointer("kernel"), - OsPartition: pointer.Pointer("ospartition"), - PrimaryDisk: pointer.Pointer("primarydisk"), + Bootloaderid: new("bootloaderid"), + Cmdline: new("cmdline"), + ImageID: new("imageid"), + Initrd: new("initrd"), + Kernel: new("kernel"), + OsPartition: new("ospartition"), + PrimaryDisk: new("primarydisk"), }, - Created: pointer.Pointer(strfmt.DateTime(testTime.Add(-20 * 24 * time.Hour))), - Creator: pointer.Pointer("creator"), + Created: new(strfmt.DateTime(testTime.Add(-20 * 24 * time.Hour))), + Creator: new("creator"), Description: "firewall allocation 3", Filesystemlayout: fsl1, - Hostname: pointer.Pointer("firewall-hostname-3"), + Hostname: new("firewall-hostname-3"), Image: image1, - Name: pointer.Pointer("firewall-3"), + Name: new("firewall-3"), Networks: []*models.V1MachineNetwork{ { - Asn: pointer.Pointer(int64(200)), + Asn: new(int64(200)), Destinationprefixes: []string{"2.2.2.2"}, Ips: []string{"1.1.1.1"}, - Nat: pointer.Pointer(false), - Networkid: pointer.Pointer("private"), + Nat: new(false), + Networkid: new("private"), Networktype: pointer.Pointer(net.PrivatePrimaryUnshared), Prefixes: []string{"prefixes"}, - Private: pointer.Pointer(true), - Underlay: pointer.Pointer(false), - Vrf: pointer.Pointer(int64(100)), + Private: new(true), + Underlay: new(false), + Vrf: new(int64(100)), }, }, - Project: pointer.Pointer("project-1"), - Reinstall: pointer.Pointer(false), + Project: new("project-1"), + Reinstall: new(false), Role: pointer.Pointer(models.V1MachineAllocationRoleFirewall), SSHPubKeys: []string{"sshpubkey"}, - Succeeded: pointer.Pointer(true), + Succeeded: new(true), UserData: "---userdata---", }, Bios: &models.V1MachineBIOS{ - Date: pointer.Pointer("biosdata"), - Vendor: pointer.Pointer("biosvendor"), - Version: pointer.Pointer("biosversion"), + Date: new("biosdata"), + Vendor: new("biosvendor"), + Version: new("biosversion"), }, Description: "firewall 1", Events: &models.V1MachineRecentProvisioningEvents{ - CrashLoop: pointer.Pointer(true), - FailedMachineReclaim: pointer.Pointer(true), + CrashLoop: new(true), + FailedMachineReclaim: new(true), LastErrorEvent: &models.V1MachineProvisioningEvent{ - Event: pointer.Pointer("Crashed"), + Event: new("Crashed"), Message: "crash", Time: strfmt.DateTime(testTime.Add(-10 * 24 * time.Hour)), }, LastEventTime: strfmt.DateTime(testTime.Add(-7 * 24 * time.Hour)), Log: []*models.V1MachineProvisioningEvent{ { - Event: pointer.Pointer("Phoned Home"), + Event: new("Phoned Home"), Message: "phoning home", Time: strfmt.DateTime(testTime.Add(-7 * 24 * time.Hour)), }, }, }, Hardware: &models.V1MachineHardware{ - CPUCores: pointer.Pointer(int32(16)), + CPUCores: new(int32(16)), Disks: []*models.V1MachineBlockDevice{}, - Memory: pointer.Pointer(int64(32)), + Memory: new(int64(32)), Nics: []*models.V1MachineNic{}, }, - ID: pointer.Pointer("3"), + ID: new("3"), Ledstate: &models.V1ChassisIdentifyLEDState{ - Description: pointer.Pointer(""), - Value: pointer.Pointer(""), + Description: new(""), + Value: new(""), }, - Liveliness: pointer.Pointer("Unhealthy"), + Liveliness: new("Unhealthy"), Name: "firewall-3", Partition: partition1, Rackid: "rack-1", Size: size1, State: &models.V1MachineState{ - Description: pointer.Pointer("state"), + Description: new("state"), Issuer: "issuer", - MetalHammerVersion: pointer.Pointer("version"), - Value: pointer.Pointer(""), + MetalHammerVersion: new("version"), + Value: new(""), }, Tags: []string{"a"}, } diff --git a/integration/suite_test.go b/integration/suite_test.go index e4c633b..8db2fed 100644 --- a/integration/suite_test.go +++ b/integration/suite_test.go @@ -16,6 +16,7 @@ import ( "github.com/metal-stack/firewall-controller-manager/controllers/firewall" "github.com/metal-stack/firewall-controller-manager/controllers/monitor" "github.com/metal-stack/firewall-controller-manager/controllers/set" + "github.com/metal-stack/firewall-controller-manager/controllers/timeout" "github.com/metal-stack/firewall-controller-manager/controllers/update" metalclient "github.com/metal-stack/metal-go/test/client" "github.com/metal-stack/metal-lib/pkg/tag" @@ -32,9 +33,9 @@ import ( ) const ( - namespaceName = "test" - firewallHealthTimeout = 19 * 24 * time.Hour - firewallCreateTimeout = 19 * 24 * time.Hour + namespaceName = "test" + firewallHealthTimeout = 19 * 24 * time.Hour + firewallCreateTimeout = 19 * 24 * time.Hour ) var ( @@ -169,6 +170,14 @@ var _ = BeforeSuite(func() { ) Expect(err).ToNot(HaveOccurred()) + err = timeout.SetupWithManager( + ctrl.Log.WithName("controllers").WithName("timeout"), + mgr.GetEventRecorder("timeout-controller"), + mgr, + cc, + ) + Expect(err).ToNot(HaveOccurred()) + err = deployment.SetupWebhookWithManager(ctrl.Log.WithName("defaulting-webhook"), mgr, cc) Expect(err).ToNot(HaveOccurred()) err = set.SetupWebhookWithManager(ctrl.Log.WithName("defaulting-webhook"), mgr, cc) diff --git a/main.go b/main.go index e68aad6..96e1893 100644 --- a/main.go +++ b/main.go @@ -32,6 +32,7 @@ import ( "github.com/metal-stack/firewall-controller-manager/controllers/firewall" "github.com/metal-stack/firewall-controller-manager/controllers/monitor" "github.com/metal-stack/firewall-controller-manager/controllers/set" + "github.com/metal-stack/firewall-controller-manager/controllers/timeout" "github.com/metal-stack/firewall-controller-manager/controllers/update" ) @@ -75,8 +76,8 @@ func main() { "Enabling this will ensure there is only one active controller manager") flag.StringVar(&namespace, "namespace", "", "the namespace this controller is running") flag.DurationVar(&reconcileInterval, "reconcile-interval", 10*time.Minute, "duration after which a resource is getting reconciled at minimum") - flag.DurationVar(&firewallHealthTimeout, "firewall-health-timeout", 20*time.Minute, "duration after a created firewall not getting ready is considered dead") - flag.DurationVar(&createTimeout, "create-timeout", 10*time.Minute, "duration after which a firewall in the creation phase will be recreated") + flag.DurationVar(&firewallHealthTimeout, "firewall-health-timeout", 0*time.Minute, "duration after a created firewall not getting ready is considered dead") + flag.DurationVar(&createTimeout, "create-timeout", 0*time.Minute, "duration after which a firewall in the creation phase will be recreated") flag.DurationVar(&safetyBackoff, "safety-backoff", 10*time.Second, "duration after which a resource is getting reconciled at minimum") flag.DurationVar(&progressDeadline, "progress-deadline", 15*time.Minute, "time after which a deployment is considered unhealthy instead of progressing (informational)") flag.DurationVar(&gracefulShutdownTimeout, "graceful-shutdown-timeout", -1, "grace period after which the controller shuts down") @@ -282,6 +283,9 @@ func main() { if err := update.SetupWithManager(ctrl.Log.WithName("controllers").WithName("update"), seedMgr.GetEventRecorder("update-controller"), seedMgr, cc); err != nil { log.Fatalf("unable to setup update controller: %v", err) } + if err := timeout.SetupWithManager(ctrl.Log.WithName("controllers").WithName("timeout"), seedMgr.GetEventRecorder("timeout-controller"), seedMgr, cc); err != nil { + log.Fatalf("unable to setup timeout controller: %v", err) + } if err := deployment.SetupWebhookWithManager(ctrl.Log.WithName("defaulting-webhook"), seedMgr, cc); err != nil { log.Fatalf("unable to setup webhook, controller deployment %v", err)