Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions lightllm/server/api_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@ class ImageURL(BaseModel):
url: str


class AudioURL(BaseModel):
url: str


class MessageContent(BaseModel):
type: str
text: Optional[str] = None
image_url: Optional[ImageURL] = None
audio_url: Optional[AudioURL] = None


class Message(BaseModel):
Expand Down
15 changes: 14 additions & 1 deletion lightllm/server/api_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req

created_time = int(time.time())

multimodal_params_dict = {"images": []}
multimodal_params_dict = {"images": [], "audios": []}
for message in request.messages:
if isinstance(message.content, list):
texts = []
Expand All @@ -197,6 +197,19 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req
raise ValueError(
"Unrecognized image input. Supports local path, http url, base64, and PIL.Image."
)
elif content.type == "audio_url" and content.audio_url is not None:
audio = content.audio_url.url
if audio.startswith("http://") or audio.startswith("https://"):
multimodal_params_dict["audios"].append({"type": "url", "data": audio})
Comment on lines +200 to +203
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The application introduces a new feature to handle audio_url in chat completions. When an audio_url starting with http:// or https:// is provided, the server subsequently fetches the resource using the fetch_resource utility function without any validation of the target host. This allows an attacker to perform Server-Side Request Forgery (SSRF) attacks, potentially accessing internal network resources such as cloud metadata services (e.g., http://169.254.169.254/latest/meta-data/) or internal APIs.

To remediate this, implement strict validation for the audio_url. This should include maintaining an allow-list of trusted domains and ensuring that the resolved IP address is not a private or reserved IP address.

elif audio.startswith("data:audio"):
data_str = audio.split(";", 1)[1]
if data_str.startswith("base64,"):
data = data_str[7:]
multimodal_params_dict["audios"].append({"type": "base64", "data": data})
else:
raise ValueError("Unrecognized audio input.")
else:
raise ValueError("Unrecognized audio input. Supports local path, http url, base64.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The error message "Unrecognized audio input. Supports local path, http url, base64." is misleading because the current implementation does not handle local file paths for audio inputs. It only supports URLs and base64-encoded data. To avoid confusion, the error message should be updated to reflect the actual supported formats.

Suggested change
raise ValueError("Unrecognized audio input. Supports local path, http url, base64.")
raise ValueError("Unrecognized audio input. Supports http url and base64.")


tools = None
if request.tools and request.tool_choice != "none":
Expand Down
31 changes: 30 additions & 1 deletion lightllm/server/build_prompt.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,45 @@
import os
import json
from lightllm.server.tokenizer import get_tokenizer
from lightllm.utils.log_utils import init_logger

logger = init_logger(__name__)

tokenizer = None


def init_tokenizer(args):
global tokenizer
from lightllm.server.tokenizer import get_tokenizer

tokenizer = get_tokenizer(args.model_dir, args.tokenizer_mode, trust_remote_code=args.trust_remote_code)
chat_path = args.chat_template
if chat_path is not None:
with open(chat_path, "r", encoding="utf-8") as f:
chat_template_str = f.read()
tokenizer.chat_template = chat_template_str
return

# 如果 tokenizer 目录下存在chat_template.json, 同时不存在 chat_template.jinja,
# 则加载其并赋值给tokenizer 的 chat_template 对象。
if not os.path.exists(os.path.join(args.model_dir, "chat_template.jinja")) and os.path.exists(
os.path.join(args.model_dir, "chat_template.json")
):
default_chat_template_path = os.path.join(args.model_dir, "chat_template.json")
try:
with open(default_chat_template_path, "r", encoding="utf-8") as f:
template_data = json.load(f)
if "chat_template" in template_data:
# Set it directly on the tokenizer object so apply_chat_template can use it
if hasattr(tokenizer, "tokenizer"):
# 多模态 tokenizer
tokenizer.tokenizer.chat_template = template_data["chat_template"]
else:
tokenizer.chat_template = template_data["chat_template"]

logger.info(f"Loaded chat_template.json from {default_chat_template_path}")
except Exception as e:
logger.warning(f"Failed to load chat_template.json from {default_chat_template_path}: {e}")
return


async def build_prompt(request, tools) -> str:
Expand Down
10 changes: 10 additions & 0 deletions lightllm/server/multimodal_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ def to_dict(self):
ret["start_index_in_embed_cache"] = self.start_index_in_embed_cache
return ret

def to_origin_dict(self):
"""
将内容转换为原始请求的形式,主要用于请求转发
"""
ret = {}
ret["type"] = self._type
ret["data"] = self._data
return ret


class ImageItem:
def __init__(self, **kwargs):
Expand Down Expand Up @@ -173,4 +182,5 @@ def to_origin_dict(self):
"""
ret = {}
ret["images"] = [i.to_origin_dict() for i in self.images]
ret["audios"] = [a.to_origin_dict() for a in self.audios]
return ret
Loading