Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions lightllm/server/api_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,19 @@ def make_argument_parser() -> argparse.ArgumentParser:
default=None,
help="""Directory used to persist disk cache data. Defaults to a temp directory when not set.""",
)
parser.add_argument(
"--redis_endpoint",
type=str,
default="",
help="""Redis endpoint used by disk cache index service (LightMem).""",
)
parser.add_argument(
"--num_node_in_disk_cache",
type=int,
default=1,
help="""Number of nodes participating in disk cache index sharding.
Only used when --redis_endpoint is set. All nodes must use the same value.""",
)
parser.add_argument(
"--enable_dp_prompt_cache_fetch",
action="store_true",
Expand Down
2 changes: 2 additions & 0 deletions lightllm/server/core/objs/start_args_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ class StartArgs:
enable_disk_cache: bool = field(default=False)
disk_cache_storage_size: float = field(default=10)
disk_cache_dir: Optional[str] = field(default=None)
redis_endpoint: str = field(default="")
num_node_in_disk_cache: int = field(default=1)
enable_dp_prompt_cache_fetch: bool = field(default=False)
# zmp ports
router_port: int = field(default=None)
Expand Down
12 changes: 9 additions & 3 deletions lightllm/server/multi_level_kv_cache/disk_cache_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import List, Optional

import torch
from lightllm.utils.envs_utils import get_unique_server_name
from lightllm.utils.envs_utils import get_disk_cache_index_prefix, get_unique_server_name
from lightllm.utils.log_utils import init_logger
from .cpu_cache_client import CpuKvCacheClient

Expand Down Expand Up @@ -36,15 +36,18 @@ def __init__(
disk_cache_storage_size: float,
cpu_cache_client: CpuKvCacheClient,
disk_cache_dir: Optional[str] = None,
redis_endpoint: str = "",
num_node_in_disk_cache: int = 1,
):
self.cpu_cache_client = cpu_cache_client
self._pages_all_idle = False

assert disk_cache_storage_size > 0
storage_size = int(disk_cache_storage_size * (1024 ** 3))
# num_shard与KVCACHE_MAX_BLOCK_SIZE相关,KVCACHE_MAX_BLOCK_SIZE默认64MB前提下,
# num_shard设置32, 能使disk cache的容量利用率达到90%,继续增大num_shard会导致容量利用率下降
num_shard = 32
if num_node_in_disk_cache <= 0:
raise ValueError(f"num_node_in_disk_cache must be >= 1, got {num_node_in_disk_cache}")
num_shard = 64 * num_node_in_disk_cache if redis_endpoint else 64
Comment on lines 45 to +50
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The comment on lines 45-46 states that num_shard is set to 32 for 90% utilization. However, the new logic on line 50 changes num_shard to 64 * num_node_in_disk_cache or 64. This change in the calculation of num_shard should be reflected in the comment to avoid confusion and explain the new rationale behind the value, especially the change from 32 to 64 as a base.

Suggested change
assert disk_cache_storage_size > 0
storage_size = int(disk_cache_storage_size * (1024 ** 3))
# num_shard与KVCACHE_MAX_BLOCK_SIZE相关,KVCACHE_MAX_BLOCK_SIZE默认64MB前提下,
# num_shard设置32, 能使disk cache的容量利用率达到90%,继续增大num_shard会导致容量利用率下降
num_shard = 32
if num_node_in_disk_cache <= 0:
raise ValueError(f"num_node_in_disk_cache must be >= 1, got {num_node_in_disk_cache}")
num_shard = 64 * num_node_in_disk_cache if redis_endpoint else 64
# num_shard与KVCACHE_MAX_BLOCK_SIZE相关,KVCACHE_MAX_BLOCK_SIZE默认64MB前提下,
# num_shard设置为64, 能使disk cache的容量利用率达到90%以上,继续增大num_shard会导致容量利用率下降
if num_node_in_disk_cache <= 0:
raise ValueError(f"num_node_in_disk_cache must be >= 1, got {num_node_in_disk_cache}")
num_shard = 64 * num_node_in_disk_cache if redis_endpoint else 64

num_worker = 48
# 读写同时进行时,分配16线程用来写,32线程用来读
max_concurrent_write_tasks = 16
Expand All @@ -64,6 +67,9 @@ def __init__(
storage_size=storage_size,
num_shard=num_shard,
num_worker=num_worker,
index_endpoint=redis_endpoint,
index_prefix=get_disk_cache_index_prefix(),
bandwidth_log=True,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The bandwidth_log parameter is hardcoded to True. If this is a configurable option for PyLocalCacheService, it might be beneficial to expose it as an argument in the DiskCacheWorker constructor or via an environment variable, similar to index_prefix. If it's always intended to be True, a comment explaining why it's hardcoded would be helpful for future maintainability.

)

logger.info(
Expand Down
2 changes: 2 additions & 0 deletions lightllm/server/multi_level_kv_cache/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def __init__(
disk_cache_storage_size=self.args.disk_cache_storage_size,
cpu_cache_client=self.cpu_cache_client,
disk_cache_dir=self.args.disk_cache_dir,
redis_endpoint=self.args.redis_endpoint,
num_node_in_disk_cache=self.args.num_node_in_disk_cache,
)
self.disk_cache_thread = threading.Thread(target=self.disk_cache_worker.run, daemon=True)
self.disk_cache_thread.start()
Expand Down
5 changes: 5 additions & 0 deletions lightllm/utils/envs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ def get_disk_cache_prompt_limit_length():
return int(os.getenv("LIGHTLLM_DISK_CACHE_PROMPT_LIMIT_LENGTH", 2048))


@lru_cache(maxsize=None)
def get_disk_cache_index_prefix() -> str:
return os.getenv("LIGHTLLM_DISK_CACHE_INDEX_PREFIX", "lightmem")


@lru_cache(maxsize=None)
def enable_huge_page():
"""
Expand Down