Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 30 additions & 23 deletions src/chk/chk_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ chk_pending_free(struct btr_instance *tins, struct btr_record *rec, void *args)
ABT_mutex_unlock(cpr->cpr_mutex);
} else {
ABT_mutex_unlock(cpr->cpr_mutex);
chk_pending_destroy(cpr);
chk_pending_destroy(NULL, cpr);
}
}

Expand Down Expand Up @@ -930,6 +930,27 @@ chk_pool_shard_cleanup(struct chk_instance *ins)
}
}

int
chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr)
{
d_iov_t kiov;
d_iov_t riov;
int rc;

d_iov_set(&riov, NULL, 0);
d_iov_set(&kiov, &seq, sizeof(seq));

ABT_rwlock_rdlock(ins->ci_abt_lock);
rc = dbtree_lookup(ins->ci_pending_hdl, &kiov, &riov);
ABT_rwlock_unlock(ins->ci_abt_lock);
if (rc == 0)
*cpr = (struct chk_pending_rec *)riov.iov_buf;
else
*cpr = NULL;

return rc;
}

int
chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid,
uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options,
Expand Down Expand Up @@ -985,12 +1006,14 @@ chk_pending_del(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec *
d_iov_set(&kiov, &seq, sizeof(seq));

ABT_rwlock_wrlock(ins->ci_abt_lock);
rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, &riov);
rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, cpr == NULL ? NULL : &riov);
ABT_rwlock_unlock(ins->ci_abt_lock);
if (rc == 0)
*cpr = (struct chk_pending_rec *)riov.iov_buf;
else
*cpr = NULL;
if (cpr != NULL) {
if (rc == 0)
*cpr = (struct chk_pending_rec *)riov.iov_buf;
else
*cpr = NULL;
}

D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG,
"Del pending record with gen "DF_X64", seq "DF_X64": "DF_RC"\n",
Expand Down Expand Up @@ -1028,29 +1051,13 @@ chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr)
ABT_mutex_unlock(cpr->cpr_mutex);
} else {
ABT_mutex_unlock(cpr->cpr_mutex);
chk_pending_destroy(cpr);
chk_pending_destroy(ins, cpr);
}
}

return rc;
}

void
chk_pending_destroy(struct chk_pending_rec *cpr)
{
D_ASSERT(d_list_empty(&cpr->cpr_pool_link));
D_ASSERT(d_list_empty(&cpr->cpr_rank_link));
D_ASSERT(d_list_empty(&cpr->cpr_ins_link));

if (cpr->cpr_cond != ABT_COND_NULL)
ABT_cond_free(&cpr->cpr_cond);

if (cpr->cpr_mutex != ABT_MUTEX_NULL)
ABT_mutex_free(&cpr->cpr_mutex);

D_FREE(cpr);
}

int
chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop)
{
Expand Down
43 changes: 21 additions & 22 deletions src/chk/chk_engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ chk_engine_post_repair(struct chk_pool_rec *cpr, int *result, bool update)
*result = 0;

if (*result != 0) {
chk_ins_set_fail(cpr->cpr_ins, cbk->cb_phase);
if (cpr->cpr_ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT) {
cbk->cb_time.ct_stop_time = time(NULL);
cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_FAILED;
Expand Down Expand Up @@ -1204,10 +1205,13 @@ chk_engine_cont_target_label_empty(struct chk_cont_rec *ccr)
static inline bool
chk_engine_cont_cs_label_empty(struct chk_cont_rec *ccr)
{
if (daos_iov_empty(&ccr->ccr_label_cs))
d_iov_t *label = &ccr->ccr_label_cs;

if (daos_iov_empty(label))
return true;

if (strncmp(DAOS_PROP_NO_CO_LABEL, ccr->ccr_label_cs.iov_buf, DAOS_PROP_LABEL_MAX_LEN) == 0)
if (strlen(DAOS_PROP_NO_CO_LABEL) == label->iov_len &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this means iov_len isn't null terminated. Maybe could strncmp with label->iov_len as the length, and avoid having to do these extra checks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The label->iov_len maybe smaller than strlen(DAOS_PROP_NO_CO_LABEL). So even if strncmp() returns zero, we still cannot say they are the same.

strncmp(DAOS_PROP_NO_CO_LABEL, label->iov_buf, label->iov_len) == 0)
return true;

return false;
Expand Down Expand Up @@ -1579,8 +1583,8 @@ chk_engine_cont_label_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg

ccr = riov.iov_buf;
if (ccr->ccr_label_prop == NULL ||
strncmp(key->iov_buf, ccr->ccr_label_prop->dpp_entries[0].dpe_str,
DAOS_PROP_LABEL_MAX_LEN) != 0)
key->iov_len != strlen(ccr->ccr_label_prop->dpp_entries[0].dpe_str) ||
strncmp(key->iov_buf, ccr->ccr_label_prop->dpp_entries[0].dpe_str, key->iov_len) != 0)
rc = daos_iov_copy(&ccr->ccr_label_cs, key);
else
ccr->ccr_label_checked = 1;
Expand Down Expand Up @@ -3177,13 +3181,12 @@ chk_engine_set_policy(uint64_t gen, uint32_t policy_nr, struct chk_policy *polic
static int
chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision)
{
struct chk_instance *ins = chk_engine;
struct chk_pending_rec *cpr = NULL;
struct chk_pending_rec *tmp = NULL;
struct chk_pool_rec *pool = NULL;
d_iov_t kiov;
d_iov_t riov;
int rc;
struct chk_instance *ins = chk_engine;
struct chk_pending_rec *cpr = NULL;
struct chk_pool_rec *pool = NULL;
d_iov_t kiov;
d_iov_t riov;
int rc;

D_ASSERT(cru->cru_pool != NULL);

Expand Down Expand Up @@ -3220,14 +3223,9 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision)
cru->cru_detail_nr, cru->cru_details, *seq);
if (unlikely(rc == -DER_AGAIN)) {
D_ASSERT(cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT);
D_ASSERT(cpr != NULL);

rc = chk_pending_del(ins, *seq, &tmp);
if (rc == 0)
D_ASSERT(tmp == NULL);
else if (rc != -DER_NONEXIST)
goto log;

chk_pending_destroy(cpr);
chk_pending_destroy(ins, cpr);
cpr = NULL;

goto new_seq;
Expand Down Expand Up @@ -3273,11 +3271,12 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision)
goto again;

out:
if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING)
pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING;

if (cpr != NULL)
chk_pending_destroy(cpr);
chk_pending_destroy(ins, cpr);

if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING &&
d_list_empty(&pool->cpr_pending_list))
pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING;

return rc;
}
Expand Down
24 changes: 22 additions & 2 deletions src/chk/chk_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,8 @@ int chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t

void chk_pool_shard_cleanup(struct chk_instance *ins);

int chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr);

int chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid,
uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr,
uint32_t *options, struct chk_pending_rec **cpr);
Expand All @@ -750,8 +752,6 @@ int chk_pending_del(struct chk_instance *ins, uint64_t seq, struct chk_pending_r

int chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr);

void chk_pending_destroy(struct chk_pending_rec *cpr);

int chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop);

int chk_prop_prepare(d_rank_t leader, uint32_t flags, uint32_t policy_nr,
Expand Down Expand Up @@ -986,6 +986,26 @@ chk_destroy_tree(daos_handle_t *toh, struct btr_root *root)
}
}

static inline void
chk_pending_destroy(struct chk_instance *ins, struct chk_pending_rec *cpr)
{
if (d_list_empty(&cpr->cpr_pool_link)) {
D_ASSERT(d_list_empty(&cpr->cpr_rank_link));
D_ASSERT(d_list_empty(&cpr->cpr_ins_link));

if (cpr->cpr_cond != ABT_COND_NULL)
ABT_cond_free(&cpr->cpr_cond);

if (cpr->cpr_mutex != ABT_MUTEX_NULL)
ABT_mutex_free(&cpr->cpr_mutex);

D_FREE(cpr);
} else {
cpr->cpr_busy = 0;
chk_pending_del(ins, cpr->cpr_seq, NULL);
}
}

static inline void
chk_destroy_pending_tree(struct chk_instance *ins)
{
Expand Down
27 changes: 14 additions & 13 deletions src/chk/chk_leader.c
Original file line number Diff line number Diff line change
Expand Up @@ -3469,12 +3469,10 @@ chk_leader_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act)
d_iov_t riov;
int rc;

rc = chk_pending_del(ins, seq, &pending);
rc = chk_pending_lookup(ins, seq, &pending);
if (rc != 0)
goto out;

D_ASSERT(pending->cpr_busy);

if (pending->cpr_on_leader) {
ABT_mutex_lock(pending->cpr_mutex);
/*
Expand All @@ -3484,20 +3482,24 @@ chk_leader_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act)
pending->cpr_action = act;
ABT_cond_broadcast(pending->cpr_cond);
ABT_mutex_unlock(pending->cpr_mutex);
chk_pending_del(ins, seq, &pending);
} else {
d_iov_set(&riov, NULL, 0);
d_iov_set(&kiov, pending->cpr_uuid, sizeof(uuid_t));
rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov);
if (rc == 0) {
if (rc == 0)
pool = (struct chk_pool_rec *)riov.iov_buf;
if (pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING)
pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING;
}

rc = chk_act_remote(ins->ci_ranks, ins->ci_bk.cb_gen, seq, pending->cpr_class, act,
pending->cpr_rank);
if (rc == 0) {
chk_pending_destroy(ins, pending);

chk_pending_destroy(pending);
if (pool != NULL &&
pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING &&
d_list_empty(&pool->cpr_pending_list))
pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING;
}
}

out:
Expand Down Expand Up @@ -3707,14 +3709,13 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision)
goto again;

out:
if ((rc != 0 || decision != NULL) && cpr != NULL)
chk_pending_destroy(ins, cpr);

if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING &&
(rc != 0 || (cpr != NULL &&
cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)))
d_list_empty(&pool->cpr_pending_list))
pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING;

if ((rc != 0 || decision != NULL) && cpr != NULL)
chk_pending_destroy(cpr);

return rc;
}

Expand Down
4 changes: 4 additions & 0 deletions src/chk/chk_upcall.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2022 Intel Corporation.
* (C) Copyright 2026 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -83,6 +84,9 @@ chk_report_upcall(uint64_t gen, uint64_t seq, uint32_t cla, uint32_t act, int re
time_t tm = time(NULL);
int rc;

if (DAOS_FAIL_CHECK(DAOS_CHK_REPORT_FAILURE))
return -DER_IO;

report.seq = seq;
report.class_ = cla;
report.action = act;
Expand Down
1 change: 1 addition & 0 deletions src/include/daos/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,7 @@ enum {
#define DAOS_CHK_ENGINE_DEATH (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb9)
#define DAOS_CHK_VERIFY_CONT_SHARDS (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xba)
#define DAOS_CHK_ORPHAN_POOL_SHARD (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xbb)
#define DAOS_CHK_REPORT_FAILURE (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xbc)

#define DAOS_MGMT_FAIL_CREATE_QUERY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xe0)

Expand Down
Loading
Loading