Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lightllm/server/api_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ def make_argument_parser() -> argparse.ArgumentParser:
action="store_true",
help="Whether or not to allow for custom models defined on the Hub in their own modeling files.",
)
parser.add_argument("--detail_log", action="store_true", help="enable to print input infos in requests.")
parser.add_argument("--disable_log_stats", action="store_true", help="disable logging throughput stats.")
parser.add_argument("--log_stats_interval", type=int, default=10, help="log stats interval in second.")
parser.add_argument(
Expand Down
1 change: 1 addition & 0 deletions lightllm/server/core/objs/start_args_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class StartArgs:
nccl_port: int = field(default=None)
use_config_server_to_init_nccl: bool = field(default=False)
trust_remote_code: bool = field(default=False)
detail_log: bool = field(default=False)
disable_log_stats: bool = field(default=False)
log_stats_interval: int = field(default=10)
router_token_ratio: float = field(default=0.0)
Expand Down
16 changes: 15 additions & 1 deletion lightllm/server/httpserver/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ async def generate(

alloc_req_index = await self.shm_req_manager.async_alloc_req_index()
alloced_req_indexes.append(alloc_req_index)
req_objs = []
req_objs: List[Req] = []
for i, req_index in enumerate(alloced_req_indexes):
req_obj = await self.shm_req_manager.async_get_req_obj_by_index(req_index)
req_obj.init(
Expand All @@ -348,6 +348,12 @@ async def generate(
)
req_objs.append(req_obj)

logger.debug(
f"alloc shm_req for req_id {group_request_id}, "
f"shm_req num: {sampling_params.n} details (req_id, index_in_shm_mem): "
f"{[(req_obj.request_id, req_obj.index_in_shm_mem) for req_obj in req_objs]}"
)

req_status = ReqStatus(group_request_id, multimodal_params, req_objs, start_time)
self.req_id_to_out_inf[group_request_id] = req_status

Expand Down Expand Up @@ -437,6 +443,13 @@ async def _encode(
)
else:
prompt_ids = self.tokenizer.encode(prompt, add_special_tokens=sampling_params.add_special_tokens)

if self.args.detail_log:
logger.debug(
f"req_id: {sampling_params.group_request_id} prompt: {prompt},\n"
f"samplingparmas: {sampling_params.to_dict()}\n"
f"token_ids: {prompt_ids}"
)
Comment on lines +447 to +452
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

When the --detail_log flag is enabled, the server logs the full content of the user-provided prompt and its corresponding token IDs at the DEBUG level. LLM prompts frequently contain sensitive information, including Personally Identifiable Information (PII), proprietary data, or even secrets. Logging this information to persistent storage poses a significant privacy and security risk, as it could lead to unauthorized exposure of sensitive user data if log files are compromised or improperly handled. Additionally, there's a typo in the log message: samplingparmas should be samplingparams.

                    f"samplingparams: {sampling_params.to_dict()}\n"

return prompt_ids

# 这里的校验对多模态不是很充分, to do
Expand Down Expand Up @@ -686,6 +699,7 @@ async def recycle_resource_loop(self):
for req_status in release_req_status:
self.req_id_to_out_inf.pop(req_status.group_req_objs.group_req_id, None)
for req in req_status.group_req_objs.shm_req_objs:
logger.debug(f"httpserver release req_id {req.request_id}, index {req.index_in_shm_mem}")
await self.shm_req_manager.async_put_back_req_obj(req)
await self.shm_req_manager.async_release_req_index(req.index_in_shm_mem)
await self._release_multimodal_resources(req_status.group_req_objs.multimodal_params)
Expand Down
3 changes: 2 additions & 1 deletion lightllm/server/router/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,5 @@ def __repr__(self):
return f"batch_id={self.batch_id}, " f"reqs={self.reqs}, "

def simple_log(self):
return f"batch_id={self.batch_id}, time:{time.time()}s req_ids:{[req.request_id for req in self.reqs]}"
details_tuples = [(req.request_id, req.index_in_shm_mem) for req in self.reqs]
return f"batch_id={self.batch_id}, time:{time.time()}s (req_id, shm_index)s:{details_tuples}"
2 changes: 2 additions & 0 deletions lightllm/server/router/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,8 @@ def _generate_new_batch(self):
Batch.merge_two_batch(self.running_batch, self.schedule_new_batch)
)
self.schedule_new_batch = Batch.merge_two_batch(self.schedule_new_batch, new_batch)
if self.schedule_new_batch is not None:
logger.info(f"gen new batch, {self.schedule_new_batch.simple_log()}")
return

def _multinode_tp_generate_new_batch(self):
Expand Down
5 changes: 5 additions & 0 deletions lightllm/server/router/req_queue/chunked_prefill/beam_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from typing import List
from ...batch import Batch, Req
from lightllm.server.router.req_queue.base_queue import BaseQueue
from lightllm.utils.log_utils import init_logger

logger = init_logger(__name__)


class ChunkedBeamContinuesBatchQueue(BaseQueue):
Expand Down Expand Up @@ -119,6 +122,8 @@ def generate_new_batch(self, current_batch: Batch):
new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size_in_node=self.dp_size_in_node)

for req in abort_req_list:
req: Req = req
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")
Comment on lines +125 to +126
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The line req: Req = req is a redundant type hint. The type of req is already inferred from the abort_req_list iterable, so this line can be removed for better code clarity. This redundant pattern appears in a few other files in this pull request.

Suggested change
req: Req = req
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")

self.free_aborted_req_cpu_cache_pages(req)
self.router.shm_req_manager.put_back_req_obj(req)
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
Expand Down
5 changes: 5 additions & 0 deletions lightllm/server/router/req_queue/chunked_prefill/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from ...batch import Batch, Req
from lightllm.server.router.req_queue.base_queue import BaseQueue
from lightllm.common.basemodel.infer_lock import g_router_lock
from lightllm.utils.log_utils import init_logger

logger = init_logger(__name__)


class ChunkedPrefillQueue(BaseQueue):
Expand Down Expand Up @@ -96,6 +99,8 @@ def generate_new_batch(self, current_batch: Batch):
if len(can_run_list) != 0:
new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size_in_node=self.dp_size_in_node)
for req in abort_req_list:
req: Req = req
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")
Comment on lines +102 to +103
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The line req: Req = req is a redundant type hint. The type of req is already inferred from the abort_req_list iterable, so this line can be removed for better code clarity.

Suggested change
req: Req = req
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")

self.free_aborted_req_cpu_cache_pages(req)
self.router.shm_req_manager.put_back_req_obj(req)
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from ...batch import Batch, Req
from lightllm.server.router.req_queue.base_queue import BaseQueue
from lightllm.common.basemodel.infer_lock import g_router_lock
from lightllm.utils.log_utils import init_logger

logger = init_logger(__name__)


class NIXLPDQueue(BaseQueue):
Expand Down Expand Up @@ -87,6 +90,8 @@ def generate_new_batch(self, current_batch: Batch):
if len(can_run_list) != 0:
new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size_in_node=self.dp_size_in_node)
for req in abort_req_list:
req: Req = req
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")
Comment on lines +93 to +94
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The line req: Req = req is a redundant type hint. The type of req is already inferred from the abort_req_list iterable, so this line can be removed for better code clarity.

Suggested change
req: Req = req
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")

self.free_aborted_req_cpu_cache_pages(req)
self.router.shm_req_manager.put_back_req_obj(req)
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from ...batch import Batch, Req
from lightllm.server.router.req_queue.base_queue import BaseQueue
from lightllm.common.basemodel.infer_lock import g_router_lock
from lightllm.utils.log_utils import init_logger

logger = init_logger(__name__)


class QueueForPDDecode(BaseQueue):
Expand Down Expand Up @@ -52,6 +55,8 @@ def generate_new_batch(self, current_batch: Batch):
if len(can_run_list) != 0:
new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size_in_node=self.dp_size_in_node)
for req in abort_req_list:
req: Req = req
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")
Comment on lines +58 to +59
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The line req: Req = req is a redundant type hint. The type of req is already inferred from the abort_req_list iterable, so this line can be removed for better code clarity.

Suggested change
req: Req = req
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")
logger.debug(f"router abort req id {req.request_id} shm_index: {req.index_in_shm_mem}")

self.free_aborted_req_cpu_cache_pages(req)
self.router.shm_req_manager.put_back_req_obj(req)
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
Expand Down