diff --git a/simplyblock_cli/cli-reference.yaml b/simplyblock_cli/cli-reference.yaml index fd5a4bda9..db216ae2d 100644 --- a/simplyblock_cli/cli-reference.yaml +++ b/simplyblock_cli/cli-reference.yaml @@ -1085,6 +1085,41 @@ commands: dest: strict_node_anti_affinity type: bool action: store_true + - name: "--support-storage-tiering" + help: "Enable storage tiering for the cluster" + dest: storage_tiering + type: bool + default: false + - name: "--s3-endpoint" + help: "S3 endpoint for storage tiering" + dest: s3_endpoint + type: str + default: "" + - name: "--s3-bucket" + help: "S3 bucket for storage tiering" + dest: s3_bucket + type: str + default: "" + - name: "--s3-access-key" + help: "S3 access key for storage tiering" + dest: s3_access_key + type: str + default: "" + - name: "--s3-secret-key" + help: "S3 secret key for storage tiering" + dest: s3_secret_key + type: str + default: "" + - name: "--s3-workerpool-mask" + help: "S3 workerpool mask for storage tiering" + dest: s3_workerpool_mask + type: + regex: "^(0x|0X)?[a-fA-F0-9]+$" + - name: "--s3-workerpool-size" + help: "S3 workerpool size for storage tiering" + dest: s3_workerpool_size + type: int + default: 32 - name: add help: "Adds a new cluster" arguments: @@ -1185,6 +1220,41 @@ commands: dest: strict_node_anti_affinity type: bool action: store_true + - name: "--support-storage-tiering" + help: "Enable storage tiering for the cluster" + dest: storage_tiering + type: bool + default: false + - name: "--s3-endpoint" + help: "S3 endpoint for storage tiering" + dest: s3_endpoint + type: str + default: "" + - name: "--s3-bucket" + help: "S3 bucket for storage tiering" + dest: s3_bucket + type: str + default: "" + - name: "--s3-access-key" + help: "S3 access key for storage tiering" + dest: s3_access_key + type: str + default: "" + - name: "--s3-secret-key" + help: "S3 secret key for storage tiering" + dest: s3_secret_key + type: str + default: "" + - name: "--s3-workerpool-mask" + help: "S3 workerpool mask for storage tiering" + dest: s3_workerpool_mask + type: + regex: "^(0x|0X)?[a-fA-F0-9]+$" + - name: "--s3-workerpool-size" + help: "S3 workerpool size for storage tiering" + dest: s3_workerpool_size + type: int + default: 32 - name: activate help: > Activates a cluster. @@ -1986,6 +2056,13 @@ commands: dest: resize type: size default: "0" + - name: backup + help: "Backs up a snapshot to a s3 storage using storage tiering" + arguments: + - name: "snapshot_id" + help: "Snapshot id" + dest: snapshot_id + type: str - name: "caching-node" help: "Caching node commands" aliases: diff --git a/simplyblock_cli/cli.py b/simplyblock_cli/cli.py index 918ad8411..761a63d6f 100644 --- a/simplyblock_cli/cli.py +++ b/simplyblock_cli/cli.py @@ -440,6 +440,13 @@ def init_cluster__create(self, subparser): if self.developer_mode: argument = subcommand.add_argument('--enable-qos', help='Enable qos bdev for storage nodes, true by default', type=bool, default=False, dest='enable_qos') argument = subcommand.add_argument('--strict-node-anti-affinity', help='Enable strict node anti affinity for storage nodes. Never more than one chunk is placed on a node. This requires a minimum of _data-chunks-in-stripe + parity-chunks-in-stripe + 1_ nodes in the cluster.', dest='strict_node_anti_affinity', action='store_true') + argument = subcommand.add_argument('--support-storage-tiering', help='Enable storage tiering for the cluster', type=bool, default=False, dest='storage_tiering') + argument = subcommand.add_argument('--s3-endpoint', help='S3 endpoint for storage tiering', type=str, default='', dest='s3_endpoint') + argument = subcommand.add_argument('--s3-bucket', help='S3 bucket for storage tiering', type=str, default='', dest='s3_bucket') + argument = subcommand.add_argument('--s3-access-key', help='S3 access key for storage tiering', type=str, default='', dest='s3_access_key') + argument = subcommand.add_argument('--s3-secret-key', help='S3 secret key for storage tiering', type=str, default='', dest='s3_secret_key') + argument = subcommand.add_argument('--s3-workerpool-mask', help='S3 workerpool mask for storage tiering', type=regex_type(r'^(0x|0X)?[a-fA-F0-9]+$'), dest='s3_workerpool_mask') + argument = subcommand.add_argument('--s3-workerpool-size', help='S3 workerpool size for storage tiering', type=int, default=32, dest='s3_workerpool_size') def init_cluster__add(self, subparser): subcommand = self.add_sub_command(subparser, 'add', 'Adds a new cluster') @@ -465,6 +472,13 @@ def init_cluster__add(self, subparser): if self.developer_mode: argument = subcommand.add_argument('--enable-qos', help='Enable qos bdev for storage nodes, default: true', type=bool, default=False, dest='enable_qos') argument = subcommand.add_argument('--strict-node-anti-affinity', help='Enable strict node anti affinity for storage nodes. Never more than one chunk is placed on a node. This requires a minimum of _data-chunks-in-stripe + parity-chunks-in-stripe + 1_ nodes in the cluster."', dest='strict_node_anti_affinity', action='store_true') + argument = subcommand.add_argument('--support-storage-tiering', help='Enable storage tiering for the cluster', type=bool, default=False, dest='storage_tiering') + argument = subcommand.add_argument('--s3-endpoint', help='S3 endpoint for storage tiering', type=str, default='', dest='s3_endpoint') + argument = subcommand.add_argument('--s3-bucket', help='S3 bucket for storage tiering', type=str, default='', dest='s3_bucket') + argument = subcommand.add_argument('--s3-access-key', help='S3 access key for storage tiering', type=str, default='', dest='s3_access_key') + argument = subcommand.add_argument('--s3-secret-key', help='S3 secret key for storage tiering', type=str, default='', dest='s3_secret_key') + argument = subcommand.add_argument('--s3-workerpool-mask', help='S3 workerpool mask for storage tiering', type=regex_type(r'^(0x|0X)?[a-fA-F0-9]+$'), dest='s3_workerpool_mask') + argument = subcommand.add_argument('--s3-workerpool-size', help='S3 workerpool size for storage tiering', type=int, default=32, dest='s3_workerpool_size') def init_cluster__activate(self, subparser): subcommand = self.add_sub_command(subparser, 'activate', 'Activates a cluster.') @@ -785,6 +799,7 @@ def init_snapshot(self): self.init_snapshot__list(subparser) self.init_snapshot__delete(subparser) self.init_snapshot__clone(subparser) + self.init_snapshot__backup(subparser) def init_snapshot__add(self, subparser): @@ -807,6 +822,10 @@ def init_snapshot__clone(self, subparser): subcommand.add_argument('lvol_name', help='Logical volume name', type=str) argument = subcommand.add_argument('--resize', help='New logical volume size: 10M, 10G, 10(bytes). Can only increase.', type=size_type(), default='0', dest='resize') + def init_snapshot__backup(self, subparser): + subcommand = self.add_sub_command(subparser, 'backup', 'Backs up a snapshot to a s3 storage using storage tiering') + subcommand.add_argument('snapshot_id', help='Snapshot id', type=str) + def init_caching_node(self): subparser = self.add_command('caching-node', 'Caching node commands', aliases=['cn',]) @@ -1214,6 +1233,8 @@ def run(self): ret = self.snapshot__delete(sub_command, args) elif sub_command in ['clone']: ret = self.snapshot__clone(sub_command, args) + elif sub_command in ['backup']: + ret = self.snapshot__backup(sub_command, args) else: self.parser.print_help() diff --git a/simplyblock_cli/clibase.py b/simplyblock_cli/clibase.py index bfbbf2c77..f04aecbf0 100644 --- a/simplyblock_cli/clibase.py +++ b/simplyblock_cli/clibase.py @@ -628,6 +628,10 @@ def snapshot__clone(self, sub_command, args): success, details = snapshot_controller.clone(args.snapshot_id, args.lvol_name, new_size) return details + def snapshot__backup(self, sub_command, args): + snapshot_id = args.snapshot_id + return snapshot_controller.backup(snapshot_id) + def caching_node__deploy(self, sub_command, args): return caching_node_controller.deploy(args.ifname) @@ -688,18 +692,26 @@ def cluster_add(self, args): distr_bs = args.distr_bs distr_chunk_bs = args.distr_chunk_bs ha_type = args.ha_type - enable_node_affinity = args.enable_node_affinity qpair_count = args.qpair_count max_queue_size = args.max_queue_size inflight_io_threshold = args.inflight_io_threshold enable_qos = args.enable_qos strict_node_anti_affinity = args.strict_node_anti_affinity + storage_tiering = args.storage_tiering + s3_endpoint = args.s3_endpoint + s3_bucket = args.s3_bucket + s3_access_key = args.s3_access_key + s3_secret_key = args.s3_secret_key + s3_workerpool_mask = args.s3_workerpool_mask + s3_workerpool_size = args.s3_workerpool_size return cluster_ops.add_cluster( blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity, - qpair_count, max_queue_size, inflight_io_threshold, enable_qos, strict_node_anti_affinity) + qpair_count, max_queue_size, inflight_io_threshold, enable_qos, strict_node_anti_affinity, + storage_tiering, s3_endpoint, s3_bucket, s3_access_key, s3_secret_key, s3_workerpool_mask, + s3_workerpool_size) def cluster_deploy(self, args): grafana_endpoint = "" @@ -819,6 +831,14 @@ def cluster_create(self, args): enable_qos = args.enable_qos strict_node_anti_affinity = args.strict_node_anti_affinity + # storage_tiering = args.storage_tiering + # s3_endpoint = args.s3_endpoint + # s3_bucket = args.s3_bucket + # s3_access_key = args.s3_access_key + # s3_secret_key = args.s3_secret_key + # s3_workerpool_mask = args.s3_workerpool_mask + # s3_workerpool_size = args.s3_workerpool_size + return cluster_ops.create_cluster( blk_size, page_size_in_blocks, CLI_PASS, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, diff --git a/simplyblock_core/cluster_ops.py b/simplyblock_core/cluster_ops.py index bbfe8f58a..9670bb8d4 100644 --- a/simplyblock_core/cluster_ops.py +++ b/simplyblock_core/cluster_ops.py @@ -458,7 +458,8 @@ def deploy_cluster(storage_nodes,test,ha_type,distr_ndcs,distr_npcs,enable_qos,i def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity, qpair_count, - max_queue_size, inflight_io_threshold, enable_qos, strict_node_anti_affinity) -> str: + max_queue_size, inflight_io_threshold, enable_qos, strict_node_anti_affinity, storage_tiering, + s3_endpoint, s3_bucket, s3_access_key, s3_secret_key, s3_workerpool_mask, s3_workerpool_size) -> str: db_controller = DBController() clusters = db_controller.get_clusters() if not clusters: @@ -493,6 +494,14 @@ def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn cluster.max_queue_size = max_queue_size cluster.inflight_io_threshold = inflight_io_threshold cluster.enable_qos = enable_qos + cluster.storage_tiering = storage_tiering + cluster.s3_endpoint = s3_endpoint + cluster.s3_bucket = s3_bucket + cluster.s3_access_key = s3_access_key + cluster.s3_secret_key = s3_secret_key + cluster.s3_workerpool_mask = s3_workerpool_mask + cluster.s3_workerpool_size = s3_workerpool_size + if cap_warn and cap_warn > 0: cluster.cap_warn = cap_warn if cap_crit and cap_crit > 0: diff --git a/simplyblock_core/controllers/snapshot_controller.py b/simplyblock_core/controllers/snapshot_controller.py index 370d94db3..4e521cb8d 100644 --- a/simplyblock_core/controllers/snapshot_controller.py +++ b/simplyblock_core/controllers/snapshot_controller.py @@ -2,6 +2,7 @@ import logging as lg import time import uuid +import datetime from simplyblock_core.controllers import lvol_controller, snapshot_events, pool_controller @@ -227,6 +228,39 @@ def list(all=False): return utils.print_table(data) +def backup(snapshot_uuid): + snap = db_controller.get_snapshot_by_id(snapshot_uuid) + if not snap: + msg = f"Snapshot not found: {snapshot_uuid}" + logger.error(msg) + return False, msg + + snode = db_controller.get_storage_node_by_id(snap.lvol.node_id) + if not snode: + msg = f"Storage node not found: {snap.lvol.node_id}" + logger.error(msg) + return False, msg + + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) + ret = rpc_client.bdev_lvol_backup_snapshot(snap.snap_uuid, timeout_us=1000000, dev_page_size=2 * 1024 * 1024, nmax_retries=4, nmax_flush_jobs=4) + if not ret: + msg = f"Failed to backup snapshot: {snap.snap_bdev}" + logger.error(msg) + return False, msg + + times = 5 + while times > 0: + resp = rpc_client.bdev_lvol_get_snapshot_backup_status(lvol_name=snap.snap_uuid) + time.sleep(3) + times = times - 1 + if resp == "SUCCEEDED": + snap.backedup_at = str(datetime.datetime.now(datetime.timezone.utc)) + snap.write_to_db(db_controller.kv_store) + logger.info("Done") + return True, "" + + return False, f"Failed to backup snapshot: {snap.snap_bdev}, status: {resp}" + def delete(snapshot_uuid, force_delete=False): snap = db_controller.get_snapshot_by_id(snapshot_uuid) if not snap: diff --git a/simplyblock_core/env_var b/simplyblock_core/env_var index a3b6fe709..00903c9a9 100644 --- a/simplyblock_core/env_var +++ b/simplyblock_core/env_var @@ -1,6 +1,5 @@ SIMPLY_BLOCK_COMMAND_NAME=sbcli-dev SIMPLY_BLOCK_VERSION=18.0.90 - -SIMPLY_BLOCK_DOCKER_IMAGE=simplyblock/simplyblock:main -SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=simplyblock/spdk:main-latest +SIMPLY_BLOCK_DOCKER_IMAGE=public.ecr.aws/simply-block/simplyblock:manohar-storage-tiering3 +SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=simplyblock/spdk:storage-tiering-ha-tested-sanitizer diff --git a/simplyblock_core/models/cluster.py b/simplyblock_core/models/cluster.py index 77a10fb5c..0efc60045 100644 --- a/simplyblock_core/models/cluster.py +++ b/simplyblock_core/models/cluster.py @@ -61,6 +61,13 @@ class Cluster(BaseModel): strict_node_anti_affinity: bool = False tls: bool = False is_re_balancing: bool = False + storage_tiering: bool = True + s3_access_key: str = "foobar" + s3_secret_key: str = "barfoobarfoo" + s3_endpoint: str = "http://192.168.10.146:9000" + s3_bucket: str = "mybucket" + s3_workerpool_mask: str = "0x00000000" + s3_workerpool_size: int = 32 full_page_unmap: bool = True def get_status_code(self): diff --git a/simplyblock_core/models/snapshot.py b/simplyblock_core/models/snapshot.py index 300b984a2..853613455 100644 --- a/simplyblock_core/models/snapshot.py +++ b/simplyblock_core/models/snapshot.py @@ -22,3 +22,4 @@ class SnapShot(BaseModel): snap_ref_id: str = "" snap_uuid: str = "" vuid: int = 0 + backedup_at: str = "" diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 962b637c4..26b41f618 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -1,8 +1,6 @@ import json import inspect - import requests - from simplyblock_core import constants, utils from requests.adapters import HTTPAdapter from urllib3 import Retry @@ -90,6 +88,10 @@ def _request2(self, method, params=None): def get_version(self): return self._request("spdk_get_version") + def bdev_get_bdevs(self, name): + params = { 'name': name } + return self._request("bdev_get_bdevs", params) + def subsystem_list(self, nqn_name=None): data = self._request("nvmf_get_subsystems") if data and nqn_name: @@ -279,10 +281,23 @@ def create_lvstore(self, name, bdev_name, cluster_sz, clear_method, num_md_pages "lvs_name": name, "cluster_sz": cluster_sz, "clear_method": clear_method, + "not_evict_lvstore_md_pages": True, + # "disaster_recovery": True, # toggle then when the node needs to be created in disaster recovery mode "num_md_pages_per_cluster_ratio": num_md_pages_per_cluster_ratio, } return self._request("bdev_lvol_create_lvstore", params) + def bdev_lvol_create_lvstore_persistent(self, name, bdev_name, cluster_sz, clear_method, num_md_pages_per_cluster_ratio, not_evict_lvstore_md_pages=True): + params = { + "bdev_name": bdev_name, + "lvs_name": name, + "cluster_sz": cluster_sz, + "clear_method": clear_method, + "num_md_pages_per_cluster_ratio": num_md_pages_per_cluster_ratio, + "not_evict_lvstore_md_pages": True + } + return self._request("bdev_lvol_create_lvstore_persistent", params) + def create_lvol(self, name, size_in_mib, lvs_name, lvol_priority_class=0): params = { "lvol_name": name, @@ -290,6 +305,7 @@ def create_lvol(self, name, size_in_mib, lvs_name, lvol_priority_class=0): "lvs_name": lvs_name, "thin_provision": True, "clear_method": "unmap", + "sync_fetch": True, "lvol_priority_class": lvol_priority_class, } return self._request("bdev_lvol_create", params) @@ -325,13 +341,17 @@ def lvol_read_only(self, name): def lvol_create_snapshot(self, lvol_id, snapshot_name): params = { "lvol_name": lvol_id, - "snapshot_name": snapshot_name} + "snapshot_name": snapshot_name, + "sync_fetch": True, + } return self._request("bdev_lvol_snapshot", params) def lvol_clone(self, snapshot_name, clone_name): params = { "snapshot_name": snapshot_name, - "clone_name": clone_name} + "clone_name": clone_name, + "sync_fetch": True, + } return self._request("bdev_lvol_clone", params) def lvol_compress_create(self, base_bdev_name, pm_path): @@ -426,7 +446,9 @@ def bdev_alceml_create(self, alceml_name, nvme_name, uuid, pba_init_mode=3, def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm_names, chunk_size, ha_comm_addrs=None, ha_inode_self=None, pba_page_size=2097152, distrib_cpu_mask="", ha_is_non_leader=True, jm_vuid=0, write_protection=False, - full_page_unmap=True): + full_page_unmap=True, storage_tiering_id=0, secondary_io_timeout_us=1 << 30, ghost_capacity=1, fifo_main_capacity=1, fifo_small_capacity=1, + support_storage_tiering=False, secondary_stg_name="", disaster_recovery=False, + ): """" // Optional (not specified = no HA) // Comma-separated communication addresses, for each node, e.g. "192.168.10.1:45001,192.168.10.1:32768". @@ -468,8 +490,35 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm params["write_protection"] = True if full_page_unmap: params["use_map_whole_page_on_1st_write"] = True + + if support_storage_tiering: + # generate a random int + # storage_tiering_id = random.randint(0, 2**16 - 2) + params['support_storage_tiering'] = support_storage_tiering + params['secondary_stg_name'] = secondary_stg_name + params['secondary_io_timeout_us'] = secondary_io_timeout_us + params['disaster_recovery'] = disaster_recovery + params['storage_tiering_id'] = params['vuid'] + params['ghost_capacity'] = ghost_capacity + params['fifo_main_capacity'] = fifo_main_capacity + params['fifo_small_capacity'] = fifo_small_capacity return self._request("bdev_distrib_create", params) + def bdev_s3_create(self, name, local_testing, local_endpoint): + params = { + "name": name, + "local_testing": local_testing, + "local_endpoint": local_endpoint, + } + return self._request("bdev_s3_create", params) + + def bdev_s3_delete(self, **params): + return self._request("bdev_s3_delete", params) + + def bdev_s3_add_bucket(self, name, bucket_name): + params = { 'name': name, 'bucket_name': bucket_name } + return self._request("bdev_s3_add_bucket_name", params) + def bdev_lvol_delete_lvstore(self, name): params = {"lvs_name": name} return self._request2("bdev_lvol_delete_lvstore", params) @@ -931,7 +980,6 @@ def listeners_del(self, nqn, trtype, traddr, trsvcid): } return self._request("nvmf_subsystem_remove_listener", params) - def bdev_distrib_force_to_non_leader(self, jm_vuid=0): params = None if jm_vuid: @@ -988,6 +1036,20 @@ def bdev_lvol_clone_register(self, clone_name, snapshot_name, registered_uuid, b } return self._request("bdev_lvol_clone_register", params) + def bdev_lvol_backup_snapshot(self, lvol_name, timeout_us, dev_page_size, nmax_retries=4, nmax_flush_jobs=4): + params = { + 'lvol_name': lvol_name, + 'timeout_us': timeout_us, + 'dev_page_size': dev_page_size, + 'nmax_retries': nmax_retries, + 'nmax_flush_jobs': nmax_flush_jobs + } + return self._request("bdev_lvol_backup_snapshot", params) + + def bdev_lvol_get_snapshot_backup_status(self, lvol_name): + params = { 'lvol_name': lvol_name } + return self._request("bdev_lvol_get_snapshot_backup_status", params) + def distr_replace_id_in_map_prob(self, storage_ID_from, storage_ID_to): params = { "storage_ID_from": storage_ID_from, diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 8e36c3b0f..b11bc3d86 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -661,12 +661,13 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None): if this_node.lvstore_stack_secondary_1: org_node = db_controller.get_storage_node_by_id(this_node.lvstore_stack_secondary_1) - if org_node.jm_device and org_node.jm_device.status == JMDevice.STATUS_ONLINE: - remote_devices.append(org_node.jm_device) - for jm_id in org_node.jm_ids: - jm_dev = db_controller.get_jm_device_by_id(jm_id) - if jm_dev and jm_dev not in remote_devices: - remote_devices.append(jm_dev) + if org_node: + if org_node.jm_device and org_node.jm_device.status == JMDevice.STATUS_ONLINE: + remote_devices.append(org_node.jm_device) + for jm_id in org_node.jm_ids: + jm_dev = db_controller.get_jm_device_by_id(jm_id) + if jm_dev and jm_dev not in remote_devices: + remote_devices.append(jm_dev) if len(remote_devices) < 2: for node in db_controller.get_storage_nodes_by_cluster_id(this_node.cluster_id): @@ -2829,8 +2830,9 @@ def recreate_lvstore_on_sec(secondary_node): if lv.status not in [LVol.STATUS_IN_DELETION, LVol.STATUS_IN_CREATION]: lvol_list.append(lv) + cluster = db_controller.get_cluster_by_id(primary_node.cluster_id) ### 1- create distribs and raid - ret, err = _create_bdev_stack(secondary_node, primary_node.lvstore_stack, primary_node=primary_node) + ret, err = _create_bdev_stack(secondary_node, primary_node.lvstore_stack, primary_node=primary_node, storage_tiering=cluster.storage_tiering, endpoint=cluster.s3_endpoint, bucket_name=cluster.s3_bucket) if err: logger.error(f"Failed to recreate lvstore on node {secondary_node.get_id()}") logger.error(err) @@ -2896,9 +2898,10 @@ def recreate_lvstore(snode): snode = db_controller.get_storage_node_by_id(snode.get_id()) snode.remote_jm_devices = _connect_to_remote_jm_devs(snode) snode.write_to_db() + cluster = db_controller.get_cluster_by_id(snode.cluster_id) ### 1- create distribs and raid - ret, err = _create_bdev_stack(snode, []) + ret, err = _create_bdev_stack(snode, [], storage_tiering=cluster.storage_tiering, endpoint=cluster.s3_endpoint, bucket_name=cluster.s3_bucket) if err: logger.error(f"Failed to recreate lvstore on node {snode.get_id()}") logger.error(err) @@ -3153,10 +3156,8 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo distrib_list = [] distrib_vuids = [] size = max_size // snode.number_of_distribs - distr_page_size = page_size_in_blocks - # distr_page_size = (ndcs + npcs) * page_size_in_blocks - # cluster_sz = ndcs * page_size_in_blocks - cluster_sz = page_size_in_blocks + distr_page_size = ndcs * page_size_in_blocks + cluster_sz = ndcs * page_size_in_blocks strip_size_kb = int((ndcs + npcs) * 2048) strip_size_kb = utils.nearest_upper_power_of_2(strip_size_kb) jm_vuid = 1 @@ -3171,9 +3172,12 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo snode.jm_vuid = jm_vuid snode.write_to_db() + cluster = db_controller.get_cluster_by_id(snode.cluster_id) write_protection = False if ndcs > 1: write_protection = True + + storage_tiering_ops = [] for _ in range(snode.number_of_distribs): distrib_vuid = utils.get_random_vuid() while distrib_vuid in distrib_vuids: @@ -3200,9 +3204,31 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo } ] ) + if cluster.storage_tiering: + storage_tiering_ops.append( + { + "type": "bdev_s3_create", + "params": { + 'name': 's3_{}'.format(distrib_name), + 'local_testing': True, + 'local_endpoint': cluster.s3_endpoint, + } + } + ) distrib_list.append(distrib_name) distrib_vuids.append(distrib_vuid) + if cluster.storage_tiering: + storage_tiering_ops.append( + { + "type": "bdev_s3_add_bucket_name", + "params": { + 'name': 's3_{}'.format(distrib_name), + 'bucket_name': cluster.s3_bucket, + } + } + ) + if len(distrib_list) == 1: raid_device = distrib_list[0] else: @@ -3245,7 +3271,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo snode.lvstore_status = "in_creation" snode.write_to_db() - ret, err = _create_bdev_stack(snode, lvstore_stack) + ret, err = _create_bdev_stack(snode, lvstore_stack, storage_tiering=cluster.storage_tiering, endpoint=cluster.s3_endpoint, bucket_name=cluster.s3_bucket) if err: logger.error(f"Failed to create lvstore on node {snode.get_id()}") logger.error(err) @@ -3266,7 +3292,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo # creating lvstore on secondary sec_node.remote_jm_devices = _connect_to_remote_jm_devs(sec_node) sec_node.write_to_db() - ret, err = _create_bdev_stack(sec_node, lvstore_stack, primary_node=snode) + ret, err = _create_bdev_stack(sec_node, lvstore_stack, primary_node=snode, storage_tiering=cluster.storage_tiering, endpoint=cluster.s3_endpoint, bucket_name=cluster.s3_bucket) if err: logger.error(f"Failed to create lvstore on node {sec_node.get_id()}") logger.error(err) @@ -3295,8 +3321,52 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo return True -def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None): - rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) +def s3_bdev_create(node_id, name, local_testing, local_endpoint, bucket_name): + db_controller = DBController() + + snode = db_controller.get_storage_node_by_id(node_id) + if not snode: + logger.error(f"Can not find storage node: {node_id}") + return False + + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) + + resp = rpc_client.bdev_get_bdevs(name) + if resp is None: + print("s3 bdev does not exist. creating...") + rpc_client.bdev_s3_create(name, local_testing, local_endpoint) + rpc_client.bdev_s3_add_bucket(name, bucket_name) + else: + print("bdev already exists") + print(resp) + + +def s3_bdev_delete(node_id, name): + db_controller = DBController() + + snode = db_controller.get_storage_node_by_id(node_id) + if not snode: + logger.error(f"Can not find storage node: {node_id}") + return False + + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) + + return rpc_client.bdev_s3_delete(name) + +def s3_bdev_add_bucket_name(node_id, name, bucket_name): + db_controller = DBController() + + snode = db_controller.get_storage_node_by_id(node_id) + if not snode: + logger.error(f"Can not find storage node: {node_id}") + return False + + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) + + return rpc_client.bdev_s3_add_bucket(name, bucket_name) + +def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tiering=False, endpoint=None, bucket_name=None): + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password ) db_controller = DBController() cluster = db_controller.get_cluster_by_id(snode.cluster_id) created_bdevs = [] @@ -3319,13 +3389,23 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None): if name in node_bdev_names: continue - elif type == "bdev_distr": if primary_node: params['jm_names'] = get_node_jm_names(primary_node, remote_node=snode) else: params['jm_names'] = get_node_jm_names(snode) + if storage_tiering: + snode_id = snode.get_id() + params['secondary_stg_name'] = 's3_{}'.format(snode_id.split("-")[0]) + params['support_storage_tiering'] = True + params['ghost_capacity'] = 1 + params['fifo_main_capacity'] = 1 + params['fifo_small_capacity'] = 1 + # params['ghost_capacity'] = (snode.total_capacity/snode.number_of_distrib) * 0.25 + # params['fifo_main_capacity'] = 0.1 * params['ghost_capacity'] + # params['fifo_small_capacity'] = 0.2 * params['ghost_capacity'] + s3_bdev_create(snode.get_id(), params['secondary_stg_name'], local_testing=True, local_endpoint=endpoint, bucket_name=bucket_name) if snode.distrib_cpu_cores: distrib_cpu_mask = utils.decimal_to_hex_power_of_2(snode.distrib_cpu_cores[snode.distrib_cpu_index]) params['distrib_cpu_mask'] = distrib_cpu_mask diff --git a/simplyblock_web/blueprints/snode_ops.py b/simplyblock_web/blueprints/snode_ops.py index 711c99344..d2ce7acbf 100644 --- a/simplyblock_web/blueprints/snode_ops.py +++ b/simplyblock_web/blueprints/snode_ops.py @@ -184,6 +184,8 @@ def spdk_process_start(body: SPDKParams): f"ssd_pcie={ssd_pcie_params}", f"PCI_ALLOWED={ssd_pcie_list}", f"TOTAL_HP={total_mem_mib}", + "AWS_ACCESS_KEY_ID=foobar", + "AWS_SECRET_ACCESS_KEY=barfoobarfoo", ] # restart_policy={"Name": "on-failure", "MaximumRetryCount": 99} ) diff --git a/simplyblock_web/node_utils.py b/simplyblock_web/node_utils.py index 2824f4803..f3f426277 100644 --- a/simplyblock_web/node_utils.py +++ b/simplyblock_web/node_utils.py @@ -6,7 +6,6 @@ import requests import boto3 import re - import jc from simplyblock_core import shell_utils