diff --git a/simplyblock_cli/cli-reference.yaml b/simplyblock_cli/cli-reference.yaml index 68592f094..ed199042b 100644 --- a/simplyblock_cli/cli-reference.yaml +++ b/simplyblock_cli/cli-reference.yaml @@ -38,13 +38,9 @@ commands: type: int required: true - name: "--max-size" - help: "Maximum amount of GB to be utilized on this storage node" + help: "Maximum amount of Huge Pages to be set on the node" description: > - Maximum amount of GB to be utilized on this storage node. This cannot be larger than the total effective - cluster capacity. A safe value is `1/n * 2.0` of effective cluster capacity. Meaning, if you have three - storage nodes, each with 100 TiB of raw capacity and a cluster with erasure coding scheme 1+1 - (two replicas), the effective cluster capacity is _100 TiB * 3 / 2 = 150 TiB_. Setting this parameter to - _150 TiB / 3 * 2 = 100TiB_ would be a safe choice. + Maximum amount of Huge Pages to be set on the node, it will override the auto-calculated value. dest: max_prov type: str required: false diff --git a/simplyblock_cli/cli.py b/simplyblock_cli/cli.py index 862cfbbd6..de11f2cd3 100644 --- a/simplyblock_cli/cli.py +++ b/simplyblock_cli/cli.py @@ -93,7 +93,7 @@ def init_storage_node__deploy(self, subparser): def init_storage_node__configure(self, subparser): subcommand = self.add_sub_command(subparser, 'configure', 'Prepare a configuration file to be used when adding the storage node') argument = subcommand.add_argument('--max-lvol', help='Max logical volume per storage node', type=int, dest='max_lvol', required=True) - argument = subcommand.add_argument('--max-size', help='Maximum amount of GB to be utilized on this storage node', type=str, dest='max_prov', required=False) + argument = subcommand.add_argument('--max-size', help='Maximum amount of Huge Pages to be set on the node', type=str, dest='max_prov', required=False) argument = subcommand.add_argument('--nodes-per-socket', help='number of each node to be added per each socket.', type=int, default=1, dest='nodes_per_socket') argument = subcommand.add_argument('--sockets-to-use', help='The system socket to use when adding the storage nodes', type=str, default='0', dest='sockets_to_use') argument = subcommand.add_argument('--cores-percentage', help='The percentage of cores to be used for spdk (0-99)', type=range_type(0, 99), default=0, dest='cores_percentage') diff --git a/simplyblock_core/cluster_ops.py b/simplyblock_core/cluster_ops.py index 799153aa3..89240ae44 100644 --- a/simplyblock_core/cluster_ops.py +++ b/simplyblock_core/cluster_ops.py @@ -1352,8 +1352,12 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None, def cluster_grace_startup(cl_id, clear_data=False, spdk_image=None) -> None: - db_controller.get_cluster_by_id(cl_id) # ensure exists + get_cluster = db_controller.get_cluster_by_id(cl_id) # ensure exists + st = db_controller.get_storage_nodes_by_cluster_id(cl_id) + for node in st: + logger.info(f"Shutting down node: {node.get_id()}") + storage_node_ops.shutdown_storage_node(node.get_id(), force=True) st = db_controller.get_storage_nodes_by_cluster_id(cl_id) for node in st: logger.info(f"Restarting node: {node.get_id()}") @@ -1362,6 +1366,19 @@ def cluster_grace_startup(cl_id, clear_data=False, spdk_image=None) -> None: get_node = db_controller.get_storage_node_by_id(node.get_id()) if get_node.status != StorageNode.STATUS_ONLINE: raise ValueError("failed to restart node") + if get_cluster.status == Cluster.STATUS_UNREADY: + logger.info("Cluster is not activated yet, please manually activate it") + + else: + while True: + get_cluster = db_controller.get_cluster_by_id(cl_id) + if get_cluster.status != Cluster.STATUS_ACTIVE: + logger.info(f"wait for cluster to be active, current status is: {get_cluster.status}") + time.sleep(5) + else: + break + logger.info("Cluster gracefully started") + def cluster_grace_shutdown(cl_id) -> None: @@ -1369,11 +1386,10 @@ def cluster_grace_shutdown(cl_id) -> None: st = db_controller.get_storage_nodes_by_cluster_id(cl_id) for node in st: - if node.status == StorageNode.STATUS_ONLINE: - logger.info(f"Suspending node: {node.get_id()}") - storage_node_ops.suspend_storage_node(node.get_id()) - logger.info(f"Shutting down node: {node.get_id()}") - storage_node_ops.shutdown_storage_node(node.get_id()) + logger.info(f"Suspending node: {node.get_id()}") + storage_node_ops.suspend_storage_node(node.get_id(), force=True) + logger.info(f"Shutting down node: {node.get_id()}") + storage_node_ops.shutdown_storage_node(node.get_id(), force=True) def delete_cluster(cl_id) -> None: diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 3f9e8ccb5..e06bd26e6 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -1650,11 +1650,12 @@ def restart_storage_node( snode.mgmt_ip = node_ip.split(":")[0] data_nics = [] for nic in snode.data_nics: - device = node_info['network_interface'][nic.if_name] + if_name = nic["if_name"] + device = node_info['network_interface'][if_name] data_nics.append( IFace({ 'uuid': str(uuid.uuid4()), - 'if_name': nic, + 'if_name': if_name, 'ip4_address': device['ip'], 'status': device['status'], 'net_type': device['net_type']}))