diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py index 96126744af..a61f4ea517 100644 --- a/lightllm/server/api_cli.py +++ b/lightllm/server/api_cli.py @@ -615,6 +615,19 @@ def make_argument_parser() -> argparse.ArgumentParser: default=None, help="""Directory used to persist disk cache data. Defaults to a temp directory when not set.""", ) + parser.add_argument( + "--redis_endpoint", + type=str, + default="", + help="""Redis endpoint used by disk cache index service (LightMem).""", + ) + parser.add_argument( + "--num_node_in_disk_cache", + type=int, + default=1, + help="""Number of nodes participating in disk cache index sharding. + Only used when --redis_endpoint is set. All nodes must use the same value.""", + ) parser.add_argument( "--enable_dp_prompt_cache_fetch", action="store_true", diff --git a/lightllm/server/core/objs/start_args_type.py b/lightllm/server/core/objs/start_args_type.py index a369cf7f7f..18ce51840e 100644 --- a/lightllm/server/core/objs/start_args_type.py +++ b/lightllm/server/core/objs/start_args_type.py @@ -148,6 +148,8 @@ class StartArgs: enable_disk_cache: bool = field(default=False) disk_cache_storage_size: float = field(default=10) disk_cache_dir: Optional[str] = field(default=None) + redis_endpoint: str = field(default="") + num_node_in_disk_cache: int = field(default=1) enable_dp_prompt_cache_fetch: bool = field(default=False) # zmp ports router_port: int = field(default=None) diff --git a/lightllm/server/multi_level_kv_cache/disk_cache_worker.py b/lightllm/server/multi_level_kv_cache/disk_cache_worker.py index b1e5fcf6f2..6297ee9c96 100644 --- a/lightllm/server/multi_level_kv_cache/disk_cache_worker.py +++ b/lightllm/server/multi_level_kv_cache/disk_cache_worker.py @@ -6,7 +6,7 @@ from typing import List, Optional import torch -from lightllm.utils.envs_utils import get_unique_server_name +from lightllm.utils.envs_utils import get_disk_cache_index_prefix, get_unique_server_name from lightllm.utils.log_utils import init_logger from .cpu_cache_client import CpuKvCacheClient @@ -36,6 +36,8 @@ def __init__( disk_cache_storage_size: float, cpu_cache_client: CpuKvCacheClient, disk_cache_dir: Optional[str] = None, + redis_endpoint: str = "", + num_node_in_disk_cache: int = 1, ): self.cpu_cache_client = cpu_cache_client self._pages_all_idle = False @@ -43,8 +45,9 @@ def __init__( assert disk_cache_storage_size > 0 storage_size = int(disk_cache_storage_size * (1024 ** 3)) # num_shard与KVCACHE_MAX_BLOCK_SIZE相关,KVCACHE_MAX_BLOCK_SIZE默认64MB前提下, - # num_shard设置32, 能使disk cache的容量利用率达到90%,继续增大num_shard会导致容量利用率下降 - num_shard = 32 + if num_node_in_disk_cache <= 0: + raise ValueError(f"num_node_in_disk_cache must be >= 1, got {num_node_in_disk_cache}") + num_shard = 64 * num_node_in_disk_cache if redis_endpoint else 64 num_worker = 48 # 读写同时进行时,分配16线程用来写,32线程用来读 max_concurrent_write_tasks = 16 @@ -64,6 +67,9 @@ def __init__( storage_size=storage_size, num_shard=num_shard, num_worker=num_worker, + index_endpoint=redis_endpoint, + index_prefix=get_disk_cache_index_prefix(), + bandwidth_log=True, ) logger.info( diff --git a/lightllm/server/multi_level_kv_cache/manager.py b/lightllm/server/multi_level_kv_cache/manager.py index 1de1b502c9..6a8b596fb0 100644 --- a/lightllm/server/multi_level_kv_cache/manager.py +++ b/lightllm/server/multi_level_kv_cache/manager.py @@ -55,6 +55,8 @@ def __init__( disk_cache_storage_size=self.args.disk_cache_storage_size, cpu_cache_client=self.cpu_cache_client, disk_cache_dir=self.args.disk_cache_dir, + redis_endpoint=self.args.redis_endpoint, + num_node_in_disk_cache=self.args.num_node_in_disk_cache, ) self.disk_cache_thread = threading.Thread(target=self.disk_cache_worker.run, daemon=True) self.disk_cache_thread.start() diff --git a/lightllm/utils/envs_utils.py b/lightllm/utils/envs_utils.py index 7a7a9be121..9623eb4b8a 100644 --- a/lightllm/utils/envs_utils.py +++ b/lightllm/utils/envs_utils.py @@ -210,6 +210,11 @@ def get_disk_cache_prompt_limit_length(): return int(os.getenv("LIGHTLLM_DISK_CACHE_PROMPT_LIMIT_LENGTH", 2048)) +@lru_cache(maxsize=None) +def get_disk_cache_index_prefix() -> str: + return os.getenv("LIGHTLLM_DISK_CACHE_INDEX_PREFIX", "lightmem") + + @lru_cache(maxsize=None) def enable_huge_page(): """