From d13a0b13e1b8c84dae5b3e22484f564c3a40dcaa Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Wed, 11 Feb 2026 21:04:04 +0800 Subject: [PATCH] DAOS-18587 chk: handle report upcall failure Anytime when DAOS engine logic needs interaction with admin, it will generate new interaction record in chk_instance::ci_pending_hdl tree, and then trigger dRPP upcall to control plane that may fail for some reason. If hit failure, the dRPC sponsor needs to remove such record from chk_instance::ci_pending_hdl tree before destroying it to avoid triggering fake assertion. The patch also fixes a container label check issue: If the label is transferred as d_iov_t instead of string, then the buffer maybe not '\0' terminated, need to check its buffer length. Test-tag: recovery Signed-off-by: Fan Yong --- src/chk/chk_common.c | 53 ++++++++------ src/chk/chk_engine.c | 43 ++++++----- src/chk/chk_internal.h | 24 ++++++- src/chk/chk_leader.c | 27 +++---- src/chk/chk_upcall.c | 4 ++ src/include/daos/common.h | 1 + src/tests/suite/daos_cr.c | 145 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 237 insertions(+), 60 deletions(-) diff --git a/src/chk/chk_common.c b/src/chk/chk_common.c index adf5d068523..21d0a8ceba7 100644 --- a/src/chk/chk_common.c +++ b/src/chk/chk_common.c @@ -291,7 +291,7 @@ chk_pending_free(struct btr_instance *tins, struct btr_record *rec, void *args) ABT_mutex_unlock(cpr->cpr_mutex); } else { ABT_mutex_unlock(cpr->cpr_mutex); - chk_pending_destroy(cpr); + chk_pending_destroy(NULL, cpr); } } @@ -930,6 +930,27 @@ chk_pool_shard_cleanup(struct chk_instance *ins) } } +int +chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr) +{ + d_iov_t kiov; + d_iov_t riov; + int rc; + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, &seq, sizeof(seq)); + + ABT_rwlock_rdlock(ins->ci_abt_lock); + rc = dbtree_lookup(ins->ci_pending_hdl, &kiov, &riov); + ABT_rwlock_unlock(ins->ci_abt_lock); + if (rc == 0) + *cpr = (struct chk_pending_rec *)riov.iov_buf; + else + *cpr = NULL; + + return rc; +} + int chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid, uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options, @@ -985,12 +1006,14 @@ chk_pending_del(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec * d_iov_set(&kiov, &seq, sizeof(seq)); ABT_rwlock_wrlock(ins->ci_abt_lock); - rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, &riov); + rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, cpr == NULL ? NULL : &riov); ABT_rwlock_unlock(ins->ci_abt_lock); - if (rc == 0) - *cpr = (struct chk_pending_rec *)riov.iov_buf; - else - *cpr = NULL; + if (cpr != NULL) { + if (rc == 0) + *cpr = (struct chk_pending_rec *)riov.iov_buf; + else + *cpr = NULL; + } D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG, "Del pending record with gen "DF_X64", seq "DF_X64": "DF_RC"\n", @@ -1028,29 +1051,13 @@ chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr) ABT_mutex_unlock(cpr->cpr_mutex); } else { ABT_mutex_unlock(cpr->cpr_mutex); - chk_pending_destroy(cpr); + chk_pending_destroy(ins, cpr); } } return rc; } -void -chk_pending_destroy(struct chk_pending_rec *cpr) -{ - D_ASSERT(d_list_empty(&cpr->cpr_pool_link)); - D_ASSERT(d_list_empty(&cpr->cpr_rank_link)); - D_ASSERT(d_list_empty(&cpr->cpr_ins_link)); - - if (cpr->cpr_cond != ABT_COND_NULL) - ABT_cond_free(&cpr->cpr_cond); - - if (cpr->cpr_mutex != ABT_MUTEX_NULL) - ABT_mutex_free(&cpr->cpr_mutex); - - D_FREE(cpr); -} - int chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop) { diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index 3dfb7b9b705..b7374acb806 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -246,6 +246,7 @@ chk_engine_post_repair(struct chk_pool_rec *cpr, int *result, bool update) *result = 0; if (*result != 0) { + chk_ins_set_fail(cpr->cpr_ins, cbk->cb_phase); if (cpr->cpr_ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT) { cbk->cb_time.ct_stop_time = time(NULL); cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_FAILED; @@ -1204,10 +1205,13 @@ chk_engine_cont_target_label_empty(struct chk_cont_rec *ccr) static inline bool chk_engine_cont_cs_label_empty(struct chk_cont_rec *ccr) { - if (daos_iov_empty(&ccr->ccr_label_cs)) + d_iov_t *label = &ccr->ccr_label_cs; + + if (daos_iov_empty(label)) return true; - if (strncmp(DAOS_PROP_NO_CO_LABEL, ccr->ccr_label_cs.iov_buf, DAOS_PROP_LABEL_MAX_LEN) == 0) + if (strlen(DAOS_PROP_NO_CO_LABEL) == label->iov_len && + strncmp(DAOS_PROP_NO_CO_LABEL, label->iov_buf, label->iov_len) == 0) return true; return false; @@ -1579,8 +1583,8 @@ chk_engine_cont_label_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg ccr = riov.iov_buf; if (ccr->ccr_label_prop == NULL || - strncmp(key->iov_buf, ccr->ccr_label_prop->dpp_entries[0].dpe_str, - DAOS_PROP_LABEL_MAX_LEN) != 0) + key->iov_len != strlen(ccr->ccr_label_prop->dpp_entries[0].dpe_str) || + strncmp(key->iov_buf, ccr->ccr_label_prop->dpp_entries[0].dpe_str, key->iov_len) != 0) rc = daos_iov_copy(&ccr->ccr_label_cs, key); else ccr->ccr_label_checked = 1; @@ -3177,13 +3181,12 @@ chk_engine_set_policy(uint64_t gen, uint32_t policy_nr, struct chk_policy *polic static int chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) { - struct chk_instance *ins = chk_engine; - struct chk_pending_rec *cpr = NULL; - struct chk_pending_rec *tmp = NULL; - struct chk_pool_rec *pool = NULL; - d_iov_t kiov; - d_iov_t riov; - int rc; + struct chk_instance *ins = chk_engine; + struct chk_pending_rec *cpr = NULL; + struct chk_pool_rec *pool = NULL; + d_iov_t kiov; + d_iov_t riov; + int rc; D_ASSERT(cru->cru_pool != NULL); @@ -3220,14 +3223,9 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) cru->cru_detail_nr, cru->cru_details, *seq); if (unlikely(rc == -DER_AGAIN)) { D_ASSERT(cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT); + D_ASSERT(cpr != NULL); - rc = chk_pending_del(ins, *seq, &tmp); - if (rc == 0) - D_ASSERT(tmp == NULL); - else if (rc != -DER_NONEXIST) - goto log; - - chk_pending_destroy(cpr); + chk_pending_destroy(ins, cpr); cpr = NULL; goto new_seq; @@ -3273,11 +3271,12 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto again; out: - if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - if (cpr != NULL) - chk_pending_destroy(cpr); + chk_pending_destroy(ins, cpr); + + if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && + d_list_empty(&pool->cpr_pending_list)) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; return rc; } diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h index e4d6d52f3fd..276f7121c51 100644 --- a/src/chk/chk_internal.h +++ b/src/chk/chk_internal.h @@ -742,6 +742,8 @@ int chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t void chk_pool_shard_cleanup(struct chk_instance *ins); +int chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr); + int chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid, uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options, struct chk_pending_rec **cpr); @@ -750,8 +752,6 @@ int chk_pending_del(struct chk_instance *ins, uint64_t seq, struct chk_pending_r int chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr); -void chk_pending_destroy(struct chk_pending_rec *cpr); - int chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop); int chk_prop_prepare(d_rank_t leader, uint32_t flags, uint32_t policy_nr, @@ -986,6 +986,26 @@ chk_destroy_tree(daos_handle_t *toh, struct btr_root *root) } } +static inline void +chk_pending_destroy(struct chk_instance *ins, struct chk_pending_rec *cpr) +{ + if (d_list_empty(&cpr->cpr_pool_link)) { + D_ASSERT(d_list_empty(&cpr->cpr_rank_link)); + D_ASSERT(d_list_empty(&cpr->cpr_ins_link)); + + if (cpr->cpr_cond != ABT_COND_NULL) + ABT_cond_free(&cpr->cpr_cond); + + if (cpr->cpr_mutex != ABT_MUTEX_NULL) + ABT_mutex_free(&cpr->cpr_mutex); + + D_FREE(cpr); + } else { + cpr->cpr_busy = 0; + chk_pending_del(ins, cpr->cpr_seq, NULL); + } +} + static inline void chk_destroy_pending_tree(struct chk_instance *ins) { diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 31cfab811f6..18be52d0ac0 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -3469,12 +3469,10 @@ chk_leader_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act) d_iov_t riov; int rc; - rc = chk_pending_del(ins, seq, &pending); + rc = chk_pending_lookup(ins, seq, &pending); if (rc != 0) goto out; - D_ASSERT(pending->cpr_busy); - if (pending->cpr_on_leader) { ABT_mutex_lock(pending->cpr_mutex); /* @@ -3484,20 +3482,24 @@ chk_leader_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act) pending->cpr_action = act; ABT_cond_broadcast(pending->cpr_cond); ABT_mutex_unlock(pending->cpr_mutex); + chk_pending_del(ins, seq, &pending); } else { d_iov_set(&riov, NULL, 0); d_iov_set(&kiov, pending->cpr_uuid, sizeof(uuid_t)); rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); - if (rc == 0) { + if (rc == 0) pool = (struct chk_pool_rec *)riov.iov_buf; - if (pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - } rc = chk_act_remote(ins->ci_ranks, ins->ci_bk.cb_gen, seq, pending->cpr_class, act, pending->cpr_rank); + if (rc == 0) { + chk_pending_destroy(ins, pending); - chk_pending_destroy(pending); + if (pool != NULL && + pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && + d_list_empty(&pool->cpr_pending_list)) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + } } out: @@ -3707,14 +3709,13 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto again; out: + if ((rc != 0 || decision != NULL) && cpr != NULL) + chk_pending_destroy(ins, cpr); + if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && - (rc != 0 || (cpr != NULL && - cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT))) + d_list_empty(&pool->cpr_pending_list)) pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - if ((rc != 0 || decision != NULL) && cpr != NULL) - chk_pending_destroy(cpr); - return rc; } diff --git a/src/chk/chk_upcall.c b/src/chk/chk_upcall.c index bbc05db5f75..8d699195ac9 100644 --- a/src/chk/chk_upcall.c +++ b/src/chk/chk_upcall.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2022 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -83,6 +84,9 @@ chk_report_upcall(uint64_t gen, uint64_t seq, uint32_t cla, uint32_t act, int re time_t tm = time(NULL); int rc; + if (DAOS_FAIL_CHECK(DAOS_CHK_REPORT_FAILURE)) + return -DER_IO; + report.seq = seq; report.class_ = cla; report.action = act; diff --git a/src/include/daos/common.h b/src/include/daos/common.h index e02c4f28022..a31cf34dfa7 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -928,6 +928,7 @@ enum { #define DAOS_CHK_ENGINE_DEATH (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb9) #define DAOS_CHK_VERIFY_CONT_SHARDS (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xba) #define DAOS_CHK_ORPHAN_POOL_SHARD (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xbb) +#define DAOS_CHK_REPORT_FAILURE (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xbc) #define DAOS_MGMT_FAIL_CREATE_QUERY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xe0) diff --git a/src/tests/suite/daos_cr.c b/src/tests/suite/daos_cr.c index f2be8fbc056..1c31158cb17 100644 --- a/src/tests/suite/daos_cr.c +++ b/src/tests/suite/daos_cr.c @@ -3903,6 +3903,147 @@ cr_lost_rank0(void **state) cr_cleanup(arg, &pool, 1); } +/* + * 1. Create pool. + * 2. Fault injection to generate inconsistent pool label. + * 3. Set fail_loc to fail interaction report. + * 4. Start checker with option "--failout=on" and "POOL_BAD_LABEL:CIA_INTERACT". Should not crash. + * 5. Query checker, instance should failed, pool should be "failed". + * 6. Reset fail_loc. + * 7. Switch to normal mode to verify the pool label. + * 8. Cleanup. + */ +static void +cr_leader_report_fail(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = {0}; + struct daos_check_info dci = {0}; + char *label = NULL; + int rc; + + FAULT_INJECTION_REQUIRED(); + + print_message("CR30: Leader handle report failure\n"); + + rc = cr_pool_create(state, &pool, false, TCC_POOL_BAD_LABEL); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + /* Inject fail_loc to fail interaction report. */ + rc = cr_debug_set_params(arg, DAOS_CHK_REPORT_FAILURE | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_FAILOUT | TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_FAILED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_FAILED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_debug_set_params(arg, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + print_message("CR: getting label for pool " DF_UUID " after check\n", + DP_UUID(pool.pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, pool.label, pool.pool_uuid, "label", &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, pool.label) != 0, + "Pool (" DF_UUID ") label should not be repaired: %s\n", DP_UUID(pool.pool_uuid), + label); + + D_FREE(label); + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool and container. + * 2. Fault injection to make container label inconsistent. + * 3. Set fail_loc to fail interaction report. + * 4. Start checker with option "--failout=on" and "CONT_BAD_LABEL:CIA_INTERACT". Should not crash. + * 5. Query checker, instance should failed, pool should be "failed". + * 6. Reset fail_loc. + * 7. Switch to normal mode to verify the container label. + * 8. Cleanup. + */ +static void +cr_engine_report_fail(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = {0}; + struct test_cont cont = {0}; + struct daos_check_info dci = {0}; + char *label = NULL; + int rc; + + FAULT_INJECTION_REQUIRED(); + + print_message("CR31: Engine handle report failure\n"); + + rc = cr_pool_create(state, &pool, true, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_cont_create(state, &pool, &cont, 1); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + /* Inject fail_loc to fail interaction report. */ + rc = cr_debug_set_params(arg, DAOS_CHK_REPORT_FAILURE | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_FAILOUT | TCSF_RESET, 0, NULL, "CONT_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_FAILED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_FAILED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_debug_set_params(arg, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + /* Former connection for the pool has been evicted by checker. Let's re-connect the pool. */ + rc = cr_cont_get_label(state, &pool, &cont, true, &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, cont.label) != 0, + "Cont (" DF_UUID ") label should not be repaired: %s\n", DP_UUID(cont.uuid), + label); + + D_FREE(label); + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + /* clang-format off */ static const struct CMUnitTest cr_tests[] = { { "CR1: start checker for specified pools", @@ -3963,6 +4104,10 @@ static const struct CMUnitTest cr_tests[] = { cr_maintenance_mode, async_disable, test_case_teardown}, { "CR29: CR with rank 0 excluded at the beginning", cr_lost_rank0, async_disable, test_case_teardown}, + { "CR30: Leader handle report failure", + cr_leader_report_fail, async_disable, test_case_teardown}, + { "CR31: Engine handle report failure", + cr_engine_report_fail, async_disable, test_case_teardown}, }; /* clang-format on */