From abc4725a7d2903c29cb1fa447e4f290b7b0fefa6 Mon Sep 17 00:00:00 2001 From: Ayush-Patel-56 Date: Thu, 19 Feb 2026 20:56:19 +0530 Subject: [PATCH] fix: support serverless logs and robust timeout in diagnose scripts Signed-off-by: Ayush-Patel-56 --- tools/diagnose-fluid-alluxio.sh | 47 ++++++++++++++++++++++++++++++- tools/diagnose-fluid-goosefs.sh | 49 +++++++++++++++++++++++++++++++-- tools/diagnose-fluid-jindo.sh | 47 ++++++++++++++++++++++++++++++- tools/diagnose-fluid-juicefs.sh | 47 ++++++++++++++++++++++++++++++- 4 files changed, 185 insertions(+), 5 deletions(-) diff --git a/tools/diagnose-fluid-alluxio.sh b/tools/diagnose-fluid-alluxio.sh index 4fe88d013a7..910b7dcc04b 100644 --- a/tools/diagnose-fluid-alluxio.sh +++ b/tools/diagnose-fluid-alluxio.sh @@ -21,7 +21,16 @@ print_usage() { run() { echo echo "-----------------run $*------------------" - timeout 10s "$@" + if command -v timeout >/dev/null 2>&1; then + timeout 10s "$@" + elif command -v gtimeout >/dev/null 2>&1; then + gtimeout 10s "$@" + elif command -v perl >/dev/null 2>&1; then + # Use Perl to enforce timeout on systems without GNU coreutils (like standard macOS) + perl -e 'alarm shift; exec @ARGV' 10 "$@" + else + "$@" + fi if [ $? != 0 ]; then echo "failed to collect info: $*" fi @@ -57,6 +66,41 @@ runtime_pod_logs() { core_component "${runtime_namespace}" "alluxio-fuse" "role=alluxio-fuse" "release=${runtime_name}" } +serverless_pod_logs() { + # Check if kubectl is available + if ! command -v kubectl >/dev/null 2>&1; then + echo "kubectl not found, skipping serverless pod logs collection" + return + fi + + local namespace="${runtime_namespace}" + # More specific selector: injected and matching the specific dataset/runtime + local label_selector="serverless.fluid.io/inject=true,fluid.io/dataset=${runtime_namespace}.${runtime_name}" + + # Get all pods with the serverless inject label + local pods + pods=$(kubectl get po -n "${namespace}" -l "${label_selector}" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null) + if [[ $? -ne 0 ]]; then + echo "failed to get serverless pods in namespace ${namespace}" + return + fi + + if [[ -n "$pods" ]]; then + mkdir -p "$diagnose_dir/pods-${namespace}-serverless" + for po in ${pods}; do + # Find all containers containing fluid-fuse (covers init-fluid-fuse and fluid-fuse) + local containers + containers=$(kubectl get po "${po}" -n "${namespace}" -o jsonpath='{.spec.initContainers[*].name} {.spec.containers[*].name}' 2>/dev/null | tr ' ' '\n' | grep 'fluid-fuse') + if [[ -n "$containers" ]]; then + for container in ${containers}; do + kubectl logs "${po}" -c "${container}" -n "${namespace}" &>"$diagnose_dir/pods-${namespace}-serverless/${po}-${container}.log" 2>&1 + done + fi + done + fi + return +} + core_component() { # namespace container selectors... local namespace="$1" @@ -105,6 +149,7 @@ pd_collect() { pod_status "${fluid_namespace}" pod_status "${runtime_namespace}" runtime_pod_logs + serverless_pod_logs fluid_pod_logs kubectl_resource archive diff --git a/tools/diagnose-fluid-goosefs.sh b/tools/diagnose-fluid-goosefs.sh index 1c16bdcfbe1..712606a28a2 100755 --- a/tools/diagnose-fluid-goosefs.sh +++ b/tools/diagnose-fluid-goosefs.sh @@ -19,7 +19,16 @@ print_usage() { run() { echo echo "-----------------run $*------------------" - timeout 10s "$@" + if command -v timeout >/dev/null 2>&1; then + timeout 10s "$@" + elif command -v gtimeout >/dev/null 2>&1; then + gtimeout 10s "$@" + elif command -v perl >/dev/null 2>&1; then + # Use Perl to enforce timeout on systems without GNU coreutils (like standard macOS) + perl -e 'alarm shift; exec @ARGV' 10 "$@" + else + "$@" + fi if [ $? != 0 ]; then echo "failed to collect info: $*" fi @@ -50,6 +59,41 @@ runtime_pod_logs() { core_component "${runtime_namespace}" "goosefs-fuse" "role=goosefs-fuse" "release=${runtime_name}" } +serverless_pod_logs() { + # Check if kubectl is available + if ! command -v kubectl >/dev/null 2>&1; then + echo "kubectl not found, skipping serverless pod logs collection" + return + fi + + local namespace="${runtime_namespace}" + # More specific selector: injected and matching the specific dataset/runtime + local label_selector="serverless.fluid.io/inject=true,fluid.io/dataset=${runtime_namespace}.${runtime_name}" + + # Get all pods with the serverless inject label + local pods + pods=$(kubectl get po -n "${namespace}" -l "${label_selector}" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null) + if [[ $? -ne 0 ]]; then + echo "failed to get serverless pods in namespace ${namespace}" + return + fi + + if [[ -n "$pods" ]]; then + mkdir -p "$diagnose_dir/pods-${namespace}-serverless" + for po in ${pods}; do + # Find all containers containing fluid-fuse (covers init-fluid-fuse and fluid-fuse) + local containers + containers=$(kubectl get po "${po}" -n "${namespace}" -o jsonpath='{.spec.initContainers[*].name} {.spec.containers[*].name}' 2>/dev/null | tr ' ' '\n' | grep 'fluid-fuse') + if [[ -n "$containers" ]]; then + for container in ${containers}; do + kubectl logs "${po}" -c "${container}" -n "${namespace}" &>"$diagnose_dir/pods-${namespace}-serverless/${po}-${container}.log" 2>&1 + done + fi + done + fi + return +} + core_component() { # namespace container selectors... local namespace="$1" @@ -65,7 +109,7 @@ core_component() { mkdir -p "$diagnose_dir/pods-${namespace}" pods=$(kubectl get po -n ${namespace} "${constrains}" | awk '{print $1}' | grep -v NAME) for po in ${pods}; do - if [[ "${namespace}"="${fluid_namesapce}" ]]; then + if [[ "${namespace}" == "${fluid_namespace}" ]]; then kubectl logs "${po}" -c "$container" -n ${namespace} &>"$diagnose_dir/pods-${namespace}/${po}-${container}.log" 2>&1 else kubectl cp "${namespace}/${po}":/opt/goosefs/logs -c "${container}" "$diagnose_dir/pods-${namespace}/${po}-${container}" 2>&1 @@ -93,6 +137,7 @@ pd_collect() { pod_status "${fluid_namespace}" pod_status "${runtime_namespace}" runtime_pod_logs + serverless_pod_logs fluid_pod_logs kubectl_resource archive diff --git a/tools/diagnose-fluid-jindo.sh b/tools/diagnose-fluid-jindo.sh index 30e3096273c..dd259b13d09 100644 --- a/tools/diagnose-fluid-jindo.sh +++ b/tools/diagnose-fluid-jindo.sh @@ -21,7 +21,16 @@ print_usage() { run() { echo echo "-----------------run $*------------------" - timeout 10s "$@" + if command -v timeout >/dev/null 2>&1; then + timeout 10s "$@" + elif command -v gtimeout >/dev/null 2>&1; then + gtimeout 10s "$@" + elif command -v perl >/dev/null 2>&1; then + # Use Perl to enforce timeout on systems without GNU coreutils (like standard macOS) + perl -e 'alarm shift; exec @ARGV' 10 "$@" + else + "$@" + fi if [ $? != 0 ]; then echo "failed to collect info: $*" fi @@ -55,6 +64,41 @@ runtime_pod_logs() { core_component "${runtime_namespace}" "jindofs-fuse" "role=jindofs-fuse" "release=${runtime_name}" } +serverless_pod_logs() { + # Check if kubectl is available + if ! command -v kubectl >/dev/null 2>&1; then + echo "kubectl not found, skipping serverless pod logs collection" + return + fi + + local namespace="${runtime_namespace}" + # More specific selector: injected and matching the specific dataset/runtime + local label_selector="serverless.fluid.io/inject=true,fluid.io/dataset=${runtime_namespace}.${runtime_name}" + + # Get all pods with the serverless inject label + local pods + pods=$(kubectl get po -n "${namespace}" -l "${label_selector}" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null) + if [[ $? -ne 0 ]]; then + echo "failed to get serverless pods in namespace ${namespace}" + return + fi + + if [[ -n "$pods" ]]; then + mkdir -p "$diagnose_dir/pods-${namespace}-serverless" + for po in ${pods}; do + # Find all containers containing fluid-fuse (covers init-fluid-fuse and fluid-fuse) + local containers + containers=$(kubectl get po "${po}" -n "${namespace}" -o jsonpath='{.spec.initContainers[*].name} {.spec.containers[*].name}' 2>/dev/null | tr ' ' '\n' | grep 'fluid-fuse') + if [[ -n "$containers" ]]; then + for container in ${containers}; do + kubectl logs "${po}" -c "${container}" -n "${namespace}" &>"$diagnose_dir/pods-${namespace}-serverless/${po}-${container}.log" 2>&1 + done + fi + done + fi + return +} + core_component() { # namespace container selectors... local namespace="$1" @@ -99,6 +143,7 @@ pd_collect() { pod_status "${fluid_namespace}" pod_status "${runtime_namespace}" runtime_pod_logs + serverless_pod_logs fluid_pod_logs kubectl_resource archive diff --git a/tools/diagnose-fluid-juicefs.sh b/tools/diagnose-fluid-juicefs.sh index 30d239713ae..e54610c54a6 100644 --- a/tools/diagnose-fluid-juicefs.sh +++ b/tools/diagnose-fluid-juicefs.sh @@ -21,7 +21,16 @@ print_usage() { run() { echo echo "-----------------run $*------------------" - timeout 10s "$@" + if command -v timeout >/dev/null 2>&1; then + timeout 10s "$@" + elif command -v gtimeout >/dev/null 2>&1; then + gtimeout 10s "$@" + elif command -v perl >/dev/null 2>&1; then + # Use Perl to enforce timeout on systems without GNU coreutils (like standard macOS) + perl -e 'alarm shift; exec @ARGV' 10 "$@" + else + "$@" + fi if [ $? != 0 ]; then echo "failed to collect info: $*" fi @@ -54,6 +63,41 @@ runtime_pod_logs() { core_component "${runtime_namespace}" "juicefs-fuse" "role=juicefs-fuse" "release=${runtime_name}" } +serverless_pod_logs() { + # Check if kubectl is available + if ! command -v kubectl >/dev/null 2>&1; then + echo "kubectl not found, skipping serverless pod logs collection" + return + fi + + local namespace="${runtime_namespace}" + # More specific selector: injected and matching the specific dataset/runtime + local label_selector="serverless.fluid.io/inject=true,fluid.io/dataset=${runtime_namespace}.${runtime_name}" + + # Get all pods with the serverless inject label + local pods + pods=$(kubectl get po -n "${namespace}" -l "${label_selector}" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null) + if [[ $? -ne 0 ]]; then + echo "failed to get serverless pods in namespace ${namespace}" + return + fi + + if [[ -n "$pods" ]]; then + mkdir -p "$diagnose_dir/pods-${namespace}-serverless" + for po in ${pods}; do + # Find all containers containing fluid-fuse (covers init-fluid-fuse and fluid-fuse) + local containers + containers=$(kubectl get po "${po}" -n "${namespace}" -o jsonpath='{.spec.initContainers[*].name} {.spec.containers[*].name}' 2>/dev/null | tr ' ' '\n' | grep 'fluid-fuse') + if [[ -n "$containers" ]]; then + for container in ${containers}; do + kubectl logs "${po}" -c "${container}" -n "${namespace}" &>"$diagnose_dir/pods-${namespace}-serverless/${po}-${container}.log" 2>&1 + done + fi + done + fi + return +} + core_component() { # namespace container selectors... local namespace="$1" @@ -98,6 +142,7 @@ pd_collect() { pod_status "${fluid_namespace}" pod_status "${runtime_namespace}" runtime_pod_logs + serverless_pod_logs fluid_pod_logs kubectl_resource archive