-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Open
Labels
enhancementNew feature or requestNew feature or request
Description
swift infer 为什么无法传入 RequestConfig 中的参数 n,从而实现针对数据集中的每个 query 生成多条 response?
#!/bin/bash
CUDA_VISIBLE_DEVICES=0 \
swift infer \
--model /home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/clap25/models/Qwen3-0.6B \
--infer_backend vllm \
--val_dataset VerilogEval-v2-spec-to-rtl.jsonl \
--vllm_gpu_memory_utilization 0.9 \
--vllm_max_model_len 8192 \
--max_new_tokens 4096 \
--response_prefix '<think>\n\n</think>\n\n' \
--n 5
报错信息如下
run sh: `/home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/supercom_envs/ms_swift/bin/python /home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/supercom_envs/ms_swift/lib/python3.11/site-packages/swift/cli/infer.py --model /home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/clap25/models/Qwen3-0.6B --infer_backend vllm --val_dataset /home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/clap25/scripts/swift_train/data_process/VerilogEval-v2-spec-to-rtl.jsonl --vllm_gpu_memory_utilization 0.9 --vllm_max_model_len 8192 --max_new_tokens 4096 --n 5 --response_prefix <think>\n\n</think>\n\n`
[INFO:swift] Successfully registered `/home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/supercom_envs/ms_swift/lib/python3.11/site-packages/swift/llm/dataset/data/dataset_info.json`.
usage: infer.py [-h] [--use_ray [USE_RAY]] [--ray_exp_name RAY_EXP_NAME]
[--device_groups DEVICE_GROUPS] [--model MODEL]
[--model_type MODEL_TYPE] [--model_revision MODEL_REVISION]
[--task_type {causal_lm,seq_cls,embedding,reranker,generative_reranker}]
[--torch_dtype {bfloat16,float16,float32,None}]
[--attn_impl ATTN_IMPL]
[--new_special_tokens NEW_SPECIAL_TOKENS [NEW_SPECIAL_TOKENS ...]]
[--num_labels NUM_LABELS]
[--problem_type {regression,single_label_classification,multi_label_classification}]
[--rope_scaling ROPE_SCALING] [--device_map DEVICE_MAP]
[--max_memory MAX_MEMORY] [--max_model_len MAX_MODEL_LEN]
[--local_repo_path LOCAL_REPO_PATH]
[--init_strategy {zero,uniform,normal,xavier_uniform,xavier_normal,kaiming_uniform,kaiming_normal,orthogonal}]
[--template TEMPLATE] [--system SYSTEM]
[--max_length MAX_LENGTH]
[--truncation_strategy {delete,left,right,split,None}]
[--max_pixels MAX_PIXELS] [--agent_template AGENT_TEMPLATE]
[--norm_bbox {norm1000,none,None}]
[--use_chat_template USE_CHAT_TEMPLATE]
[--padding_free [PADDING_FREE]]
[--padding_side {left,right,None}] [--loss_scale LOSS_SCALE]
[--sequence_parallel_size SEQUENCE_PARALLEL_SIZE]
[--response_prefix RESPONSE_PREFIX]
[--template_backend {swift,jinja}]
[--dataset DATASET [DATASET ...]]
[--val_dataset VAL_DATASET [VAL_DATASET ...]]
[--cached_dataset CACHED_DATASET [CACHED_DATASET ...]]
[--cached_val_dataset CACHED_VAL_DATASET [CACHED_VAL_DATASET ...]]
[--split_dataset_ratio SPLIT_DATASET_RATIO]
[--data_seed DATA_SEED] [--dataset_num_proc DATASET_NUM_PROC]
[--load_from_cache_file [LOAD_FROM_CACHE_FILE]]
[--dataset_shuffle [DATASET_SHUFFLE]] [--no_dataset_shuffle]
[--val_dataset_shuffle [VAL_DATASET_SHUFFLE]]
[--streaming [STREAMING]]
[--interleave_prob INTERLEAVE_PROB [INTERLEAVE_PROB ...]]
[--stopping_strategy {first_exhausted,all_exhausted}]
[--shuffle_buffer_size SHUFFLE_BUFFER_SIZE]
[--download_mode {force_redownload,reuse_dataset_if_exists}]
[--columns COLUMNS] [--strict [STRICT]]
[--remove_unused_columns [REMOVE_UNUSED_COLUMNS]]
[--no_remove_unused_columns]
[--model_name MODEL_NAME [MODEL_NAME ...]]
[--model_author MODEL_AUTHOR [MODEL_AUTHOR ...]]
[--custom_dataset_info CUSTOM_DATASET_INFO [CUSTOM_DATASET_INFO ...]]
[--quant_method {bnb,hqq,eetq,quanto,fp8}]
[--quant_bits {1,2,3,4,8,float8}] [--hqq_axis HQQ_AXIS]
[--bnb_4bit_compute_dtype {float16,bfloat16,float32,None}]
[--bnb_4bit_quant_type {fp4,nf4}]
[--bnb_4bit_use_double_quant [BNB_4BIT_USE_DOUBLE_QUANT]]
[--no_bnb_4bit_use_double_quant]
[--bnb_4bit_quant_storage BNB_4BIT_QUANT_STORAGE]
[--max_new_tokens MAX_NEW_TOKENS] [--temperature TEMPERATURE]
[--top_k TOP_K] [--top_p TOP_P]
[--repetition_penalty REPETITION_PENALTY]
[--num_beams NUM_BEAMS] [--stream STREAM]
[--stop_words STOP_WORDS [STOP_WORDS ...]]
[--logprobs [LOGPROBS]] [--top_logprobs TOP_LOGPROBS]
[--ckpt_dir CKPT_DIR]
[--lora_modules LORA_MODULES [LORA_MODULES ...]]
[--tuner_backend {peft,unsloth}] [--train_type TRAIN_TYPE]
[--adapters ADAPTERS [ADAPTERS ...]]
[--external_plugins EXTERNAL_PLUGINS [EXTERNAL_PLUGINS ...]]
[--seed SEED] [--model_kwargs MODEL_KWARGS]
[--load_args [LOAD_ARGS]] [--no_load_args]
[--load_data_args [LOAD_DATA_ARGS]] [--packing [PACKING]]
[--packing_length PACKING_LENGTH]
[--packing_num_proc PACKING_NUM_PROC]
[--lazy_tokenize LAZY_TOKENIZE]
[--custom_register_path CUSTOM_REGISTER_PATH [CUSTOM_REGISTER_PATH ...]]
[--use_hf [USE_HF]] [--hub_token HUB_TOKEN]
[--ddp_timeout DDP_TIMEOUT] [--ddp_backend DDP_BACKEND]
[--ignore_args_error [IGNORE_ARGS_ERROR]]
[--use_swift_lora [USE_SWIFT_LORA]]
[--vllm_gpu_memory_utilization VLLM_GPU_MEMORY_UTILIZATION]
[--vllm_tensor_parallel_size VLLM_TENSOR_PARALLEL_SIZE]
[--vllm_pipeline_parallel_size VLLM_PIPELINE_PARALLEL_SIZE]
[--vllm_enable_expert_parallel [VLLM_ENABLE_EXPERT_PARALLEL]]
[--vllm_max_num_seqs VLLM_MAX_NUM_SEQS]
[--vllm_max_model_len VLLM_MAX_MODEL_LEN]
[--vllm_disable_custom_all_reduce [VLLM_DISABLE_CUSTOM_ALL_REDUCE]]
[--no_vllm_disable_custom_all_reduce]
[--vllm_enforce_eager [VLLM_ENFORCE_EAGER]]
[--vllm_limit_mm_per_prompt VLLM_LIMIT_MM_PER_PROMPT]
[--vllm_max_lora_rank VLLM_MAX_LORA_RANK]
[--vllm_enable_prefix_caching VLLM_ENABLE_PREFIX_CACHING]
[--vllm_use_async_engine [VLLM_USE_ASYNC_ENGINE]]
[--vllm_quantization VLLM_QUANTIZATION]
[--vllm_reasoning_parser VLLM_REASONING_PARSER]
[--vllm_disable_cascade_attn [VLLM_DISABLE_CASCADE_ATTN]]
[--vllm_mm_processor_cache_gb VLLM_MM_PROCESSOR_CACHE_GB]
[--vllm_speculative_config VLLM_SPECULATIVE_CONFIG]
[--vllm_engine_kwargs VLLM_ENGINE_KWARGS]
[--vllm_data_parallel_size VLLM_DATA_PARALLEL_SIZE]
[--sglang_tp_size SGLANG_TP_SIZE]
[--sglang_pp_size SGLANG_PP_SIZE]
[--sglang_dp_size SGLANG_DP_SIZE]
[--sglang_ep_size SGLANG_EP_SIZE]
[--sglang_enable_ep_moe [SGLANG_ENABLE_EP_MOE]]
[--sglang_mem_fraction_static SGLANG_MEM_FRACTION_STATIC]
[--sglang_context_length SGLANG_CONTEXT_LENGTH]
[--sglang_disable_cuda_graph [SGLANG_DISABLE_CUDA_GRAPH]]
[--sglang_quantization SGLANG_QUANTIZATION]
[--sglang_kv_cache_dtype SGLANG_KV_CACHE_DTYPE]
[--sglang_enable_dp_attention [SGLANG_ENABLE_DP_ATTENTION]]
[--sglang_disable_custom_all_reduce [SGLANG_DISABLE_CUSTOM_ALL_REDUCE]]
[--no_sglang_disable_custom_all_reduce]
[--sglang_speculative_algorithm SGLANG_SPECULATIVE_ALGORITHM]
[--sglang_speculative_num_steps SGLANG_SPECULATIVE_NUM_STEPS]
[--sglang_speculative_eagle_topk SGLANG_SPECULATIVE_EAGLE_TOPK]
[--sglang_speculative_num_draft_tokens SGLANG_SPECULATIVE_NUM_DRAFT_TOKENS]
[--lmdeploy_tp LMDEPLOY_TP]
[--lmdeploy_session_len LMDEPLOY_SESSION_LEN]
[--lmdeploy_cache_max_entry_count LMDEPLOY_CACHE_MAX_ENTRY_COUNT]
[--lmdeploy_quant_policy LMDEPLOY_QUANT_POLICY]
[--lmdeploy_vision_batch_size LMDEPLOY_VISION_BATCH_SIZE]
[--merge_lora [MERGE_LORA]]
[--safe_serialization [SAFE_SERIALIZATION]]
[--no_safe_serialization] [--max_shard_size MAX_SHARD_SIZE]
[--infer_backend {vllm,pt,sglang,lmdeploy}]
[--result_path RESULT_PATH]
[--write_batch_size WRITE_BATCH_SIZE] [--metric {acc,rouge}]
[--max_batch_size MAX_BATCH_SIZE]
[--val_dataset_sample VAL_DATASET_SAMPLE]
[--reranker_use_activation [RERANKER_USE_ACTIVATION]]
[--no_reranker_use_activation]
infer.py: error: ambiguous option: --n could match --new_special_tokens, --new-special-tokens, --num_labels, --num-labels, --norm_bbox, --norm-bbox, --no_dataset_shuffle, --no-dataset-shuffle, --no_remove_unused_columns, --no-remove-unused-columns, --no_bnb_4bit_use_double_quant, --no-bnb-4bit-use-double-quant, --num_beams, --num-beams, --no_load_args, --no-load-args, --no_vllm_disable_custom_all_reduce, --no-vllm-disable-custom-all-reduce, --no_sglang_disable_custom_all_reduce, --no-sglang-disable-custom-all-reduce, --no_safe_serialization, --no-safe-serialization, --no_reranker_use_activation, --no-reranker-use-activation
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request