diff --git a/simplyblock_cli/cli-reference.yaml b/simplyblock_cli/cli-reference.yaml index 1855384bc..dbee8e4ac 100644 --- a/simplyblock_cli/cli-reference.yaml +++ b/simplyblock_cli/cli-reference.yaml @@ -923,6 +923,11 @@ commands: min: 0 max: 128 default: 3 + - name: "--client-data-nic" + help: > + Network interface name from client to use for LVol connection. + dest: client_data_nic + type: str - name: add help: "Adds a new cluster" arguments: @@ -1050,6 +1055,11 @@ commands: aliases: - "-n" type: str + - name: "--client-data-nic" + help: > + Network interface name from client to use for LVol connection. + dest: client_data_nic + type: str - name: activate help: > Activates a cluster. diff --git a/simplyblock_cli/cli.py b/simplyblock_cli/cli.py index ef5449e53..093fd0759 100644 --- a/simplyblock_cli/cli.py +++ b/simplyblock_cli/cli.py @@ -386,6 +386,7 @@ def init_cluster__create(self, subparser): argument = subcommand.add_argument('--name', '-n', help='Assigns a name to the newly created cluster.', type=str, dest='name') argument = subcommand.add_argument('--qpair-count', help='NVMe/TCP transport qpair count per logical volume', type=range_type(0, 128), default=32, dest='qpair_count') argument = subcommand.add_argument('--client-qpair-count', help='default NVMe/TCP transport qpair count per logical volume for client', type=range_type(0, 128), default=3, dest='client_qpair_count') + argument = subcommand.add_argument('--client-data-nic', help='Network interface name from client to use for LVol connection.', type=str, dest='client_data_nic') def init_cluster__add(self, subparser): subcommand = self.add_sub_command(subparser, 'add', 'Adds a new cluster') @@ -413,6 +414,7 @@ def init_cluster__add(self, subparser): argument = subcommand.add_argument('--inflight-io-threshold', help='The number of inflight IOs allowed before the IO queuing starts', type=int, default=4, dest='inflight_io_threshold') argument = subcommand.add_argument('--strict-node-anti-affinity', help='Enable strict node anti affinity for storage nodes. Never more than one chunk is placed on a node. This requires a minimum of _data-chunks-in-stripe + parity-chunks-in-stripe + 1_ nodes in the cluster."', dest='strict_node_anti_affinity', action='store_true') argument = subcommand.add_argument('--name', '-n', help='Assigns a name to the newly created cluster.', type=str, dest='name') + argument = subcommand.add_argument('--client-data-nic', help='Network interface name from client to use for LVol connection.', type=str, dest='client_data_nic') def init_cluster__activate(self, subparser): subcommand = self.add_sub_command(subparser, 'activate', 'Activates a cluster.') diff --git a/simplyblock_cli/clibase.py b/simplyblock_cli/clibase.py index 58f01cae3..efd50c154 100644 --- a/simplyblock_cli/clibase.py +++ b/simplyblock_cli/clibase.py @@ -701,11 +701,13 @@ def cluster_add(self, args): inflight_io_threshold = args.inflight_io_threshold strict_node_anti_affinity = args.strict_node_anti_affinity is_single_node = args.is_single_node + client_data_nic = args.client_data_nic return cluster_ops.add_cluster( blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity, - qpair_count, max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, fabric) + qpair_count, max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, fabric, + client_data_nic) def cluster_create(self, args): page_size_in_blocks = args.page_size @@ -740,6 +742,7 @@ def cluster_create(self, args): dns_name = args.dns_name is_single_node = args.is_single_node fabric = args.fabric + client_data_nic = args.client_data_nic return cluster_ops.create_cluster( blk_size, page_size_in_blocks, @@ -747,7 +750,7 @@ def cluster_create(self, args): ifname, mgmt_ip, log_del_interval, metrics_retention_period, contact_point, grafana_endpoint, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, mode, enable_node_affinity, qpair_count, client_qpair_count, max_queue_size, inflight_io_threshold, disable_monitoring, - strict_node_anti_affinity, name, tls_secret, ingress_host_source, dns_name, fabric, is_single_node) + strict_node_anti_affinity, name, tls_secret, ingress_host_source, dns_name, fabric, is_single_node, client_data_nic) def query_yes_no(self, question, default="yes"): """Ask a yes/no question via raw_input() and return their answer. diff --git a/simplyblock_core/cluster_ops.py b/simplyblock_core/cluster_ops.py index 0e1d2e2b2..799153aa3 100644 --- a/simplyblock_core/cluster_ops.py +++ b/simplyblock_core/cluster_ops.py @@ -223,7 +223,7 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, mgmt_ip, log_del_interval, metrics_retention_period, contact_point, grafana_endpoint, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, mode, enable_node_affinity, qpair_count, client_qpair_count, max_queue_size, inflight_io_threshold, disable_monitoring, strict_node_anti_affinity, name, - tls_secret, ingress_host_source, dns_name, fabric, is_single_node) -> str: + tls_secret, ingress_host_source, dns_name, fabric, is_single_node, client_data_nic) -> str: if distr_ndcs == 0 and distr_npcs == 0: raise ValueError("both distr_ndcs and distr_npcs cannot be 0") @@ -335,6 +335,7 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass, cluster.disable_monitoring = disable_monitoring cluster.mode = mode cluster.full_page_unmap = False + cluster.client_data_nic = client_data_nic or "" if mode == "docker": if not disable_monitoring: @@ -441,7 +442,7 @@ def _run_fio(mount_point) -> None: def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity, qpair_count, max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, cr_name=None, - cr_namespace=None, cr_plural=None, fabric="tcp", cluster_ip=None, grafana_secret=None) -> str: + cr_namespace=None, cr_plural=None, fabric="tcp", cluster_ip=None, grafana_secret=None, client_data_nic="") -> str: default_cluster = None @@ -527,6 +528,7 @@ def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn cluster.fabric_tcp = protocols["tcp"] cluster.fabric_rdma = protocols["rdma"] cluster.full_page_unmap = False + cluster.client_data_nic = client_data_nic or "" cluster.status = Cluster.STATUS_UNREADY cluster.create_dt = str(datetime.datetime.now()) diff --git a/simplyblock_core/controllers/lvol_controller.py b/simplyblock_core/controllers/lvol_controller.py index 26140dcca..a543b089d 100644 --- a/simplyblock_core/controllers/lvol_controller.py +++ b/simplyblock_core/controllers/lvol_controller.py @@ -1302,6 +1302,9 @@ def connect_lvol(uuid, ctrl_loss_tmo=constants.LVOL_NVME_CONNECT_CTRL_LOSS_TMO): else: keep_alive_to = constants.LVOL_NVME_KEEP_ALIVE_TO + client_data_nic_str = "" + if cluster.client_data_nic: + client_data_nic_str = f"--host-iface={cluster.client_data_nic}" out.append({ "ns_id": lvol.ns_id, "transport": transport, @@ -1312,11 +1315,13 @@ def connect_lvol(uuid, ctrl_loss_tmo=constants.LVOL_NVME_CONNECT_CTRL_LOSS_TMO): "ctrl-loss-tmo": ctrl_loss_tmo, "nr-io-queues": cluster.client_qpair_count, "keep-alive-tmo": keep_alive_to, + "host-iface": cluster.client_data_nic, "connect": f"sudo nvme connect --reconnect-delay={constants.LVOL_NVME_CONNECT_RECONNECT_DELAY} " f"--ctrl-loss-tmo={ctrl_loss_tmo} " f"--nr-io-queues={cluster.client_qpair_count} " f"--keep-alive-tmo={keep_alive_to} " - f"--transport={transport} --traddr={ip} --trsvcid={port} --nqn={lvol.nqn}", + f"--transport={transport} --traddr={ip} --trsvcid={port} --nqn={lvol.nqn} " + f"{client_data_nic_str}", }) return out diff --git a/simplyblock_core/models/cluster.py b/simplyblock_core/models/cluster.py index f85be6e06..a9a47c9d7 100644 --- a/simplyblock_core/models/cluster.py +++ b/simplyblock_core/models/cluster.py @@ -72,6 +72,7 @@ class Cluster(BaseModel): is_re_balancing: bool = False full_page_unmap: bool = True is_single_node: bool = False + client_data_nic: str = "" def get_status_code(self): if self.status in self.STATUS_CODE_MAP: diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index eba4a30f3..e79c15914 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -236,12 +236,14 @@ def transport_create(self, trtype, qpair_count=6, shared_bufs=24576): params.update({"c2h_success": True,"sock_priority": 0}) return self._request("nvmf_create_transport", params) - def sock_impl_set_options(self): - method = "sock_impl_set_options" - params = {"impl_name": "posix", "enable_quickack": True, - "enable_zerocopy_send_server": True, - "enable_zerocopy_send_client": True} - return self._request(method, params) + def sock_impl_set_options(self, bind_to_device=None): + params = { + "impl_name": "posix", "enable_quickack": True, + "enable_zerocopy_send_server": True, + "enable_zerocopy_send_client": True} + if bind_to_device: + params["bind_to_device"] = bind_to_device + return self._request("sock_impl_set_options", params) def transport_create_caching(self, trtype): params = { diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 8124170ae..6ab4488f6 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -1274,7 +1274,10 @@ def add_node(cluster_id, node_addr, iface_name, data_nics_list, logger.warning(f"Failed to set nvmf max subsystems {constants.NVMF_MAX_SUBSYSTEMS}") # 2- set socket implementation options - ret = rpc_client.sock_impl_set_options() + bind_to_device = None + if snode.data_nics and len(snode.data_nics) == 1: + bind_to_device = snode.data_nics[0].if_name + ret = rpc_client.sock_impl_set_options(bind_to_device) if not ret: logger.error("Failed to set optimized socket options") return False @@ -1838,7 +1841,10 @@ def restart_storage_node( rpc_client.accel_set_options() # 2- set socket implementation options - ret = rpc_client.sock_impl_set_options() + bind_to_device = None + if snode.data_nics and len(snode.data_nics) == 1: + bind_to_device = snode.data_nics[0].if_name + ret = rpc_client.sock_impl_set_options(bind_to_device) if not ret: logger.error("Failed socket implement set options") return False diff --git a/simplyblock_web/api/v1/cluster.py b/simplyblock_web/api/v1/cluster.py index 9bb191602..c3a5fab7c 100644 --- a/simplyblock_web/api/v1/cluster.py +++ b/simplyblock_web/api/v1/cluster.py @@ -55,12 +55,14 @@ def add_cluster(): inflight_io_threshold = cl_data.get('inflight_io_threshold', 4) strict_node_anti_affinity = cl_data.get('strict_node_anti_affinity', False) is_single_node = cl_data.get('is_single_node', False) + client_data_nic = cl_data.get('client_data_nic', "") return utils.get_response(cluster_ops.add_cluster( blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity, - qpair_count, max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, - cr_name, cr_namespace, cr_plural, fabric + qpair_count, max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, + cr_name, cr_namespace, cr_plural, fabric, + client_data_nic )) diff --git a/simplyblock_web/api/v2/cluster.py b/simplyblock_web/api/v2/cluster.py index 009e4cd96..283b7d80d 100644 --- a/simplyblock_web/api/v2/cluster.py +++ b/simplyblock_web/api/v2/cluster.py @@ -48,6 +48,8 @@ class ClusterParams(BaseModel): cr_plural: str = "" cluster_ip: str = "" grafana_secret: str = "" + client_data_nic: str = "" + @api.get('/', name='clusters:list') def list() -> List[ClusterDTO]: