Skip to content

infer dataset 情况下,如何实现一次输出多条 response? #7257

@NoviceStone

Description

@NoviceStone

swift infer 为什么无法传入 RequestConfig 中的参数 n,从而实现针对数据集中的每个 query 生成多条 response?

#!/bin/bash

CUDA_VISIBLE_DEVICES=0 \
swift infer \
    --model /home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/clap25/models/Qwen3-0.6B \
    --infer_backend vllm \
    --val_dataset VerilogEval-v2-spec-to-rtl.jsonl \
    --vllm_gpu_memory_utilization 0.9 \
    --vllm_max_model_len 8192 \
    --max_new_tokens 4096 \
    --response_prefix '<think>\n\n</think>\n\n' \
    --n 5

报错信息如下

run sh: `/home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/supercom_envs/ms_swift/bin/python /home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/supercom_envs/ms_swift/lib/python3.11/site-packages/swift/cli/infer.py --model /home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/clap25/models/Qwen3-0.6B --infer_backend vllm --val_dataset /home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/clap25/scripts/swift_train/data_process/VerilogEval-v2-spec-to-rtl.jsonl --vllm_gpu_memory_utilization 0.9 --vllm_max_model_len 8192 --max_new_tokens 4096 --n 5 --response_prefix <think>\n\n</think>\n\n`
[INFO:swift] Successfully registered `/home/gpu1/SAIT_HQ_XIAN/share/SOLVE/xys/supercom_envs/ms_swift/lib/python3.11/site-packages/swift/llm/dataset/data/dataset_info.json`.
usage: infer.py [-h] [--use_ray [USE_RAY]] [--ray_exp_name RAY_EXP_NAME]
                [--device_groups DEVICE_GROUPS] [--model MODEL]
                [--model_type MODEL_TYPE] [--model_revision MODEL_REVISION]
                [--task_type {causal_lm,seq_cls,embedding,reranker,generative_reranker}]
                [--torch_dtype {bfloat16,float16,float32,None}]
                [--attn_impl ATTN_IMPL]
                [--new_special_tokens NEW_SPECIAL_TOKENS [NEW_SPECIAL_TOKENS ...]]
                [--num_labels NUM_LABELS]
                [--problem_type {regression,single_label_classification,multi_label_classification}]
                [--rope_scaling ROPE_SCALING] [--device_map DEVICE_MAP]
                [--max_memory MAX_MEMORY] [--max_model_len MAX_MODEL_LEN]
                [--local_repo_path LOCAL_REPO_PATH]
                [--init_strategy {zero,uniform,normal,xavier_uniform,xavier_normal,kaiming_uniform,kaiming_normal,orthogonal}]
                [--template TEMPLATE] [--system SYSTEM]
                [--max_length MAX_LENGTH]
                [--truncation_strategy {delete,left,right,split,None}]
                [--max_pixels MAX_PIXELS] [--agent_template AGENT_TEMPLATE]
                [--norm_bbox {norm1000,none,None}]
                [--use_chat_template USE_CHAT_TEMPLATE]
                [--padding_free [PADDING_FREE]]
                [--padding_side {left,right,None}] [--loss_scale LOSS_SCALE]
                [--sequence_parallel_size SEQUENCE_PARALLEL_SIZE]
                [--response_prefix RESPONSE_PREFIX]
                [--template_backend {swift,jinja}]
                [--dataset DATASET [DATASET ...]]
                [--val_dataset VAL_DATASET [VAL_DATASET ...]]
                [--cached_dataset CACHED_DATASET [CACHED_DATASET ...]]
                [--cached_val_dataset CACHED_VAL_DATASET [CACHED_VAL_DATASET ...]]
                [--split_dataset_ratio SPLIT_DATASET_RATIO]
                [--data_seed DATA_SEED] [--dataset_num_proc DATASET_NUM_PROC]
                [--load_from_cache_file [LOAD_FROM_CACHE_FILE]]
                [--dataset_shuffle [DATASET_SHUFFLE]] [--no_dataset_shuffle]
                [--val_dataset_shuffle [VAL_DATASET_SHUFFLE]]
                [--streaming [STREAMING]]
                [--interleave_prob INTERLEAVE_PROB [INTERLEAVE_PROB ...]]
                [--stopping_strategy {first_exhausted,all_exhausted}]
                [--shuffle_buffer_size SHUFFLE_BUFFER_SIZE]
                [--download_mode {force_redownload,reuse_dataset_if_exists}]
                [--columns COLUMNS] [--strict [STRICT]]
                [--remove_unused_columns [REMOVE_UNUSED_COLUMNS]]
                [--no_remove_unused_columns]
                [--model_name MODEL_NAME [MODEL_NAME ...]]
                [--model_author MODEL_AUTHOR [MODEL_AUTHOR ...]]
                [--custom_dataset_info CUSTOM_DATASET_INFO [CUSTOM_DATASET_INFO ...]]
                [--quant_method {bnb,hqq,eetq,quanto,fp8}]
                [--quant_bits {1,2,3,4,8,float8}] [--hqq_axis HQQ_AXIS]
                [--bnb_4bit_compute_dtype {float16,bfloat16,float32,None}]
                [--bnb_4bit_quant_type {fp4,nf4}]
                [--bnb_4bit_use_double_quant [BNB_4BIT_USE_DOUBLE_QUANT]]
                [--no_bnb_4bit_use_double_quant]
                [--bnb_4bit_quant_storage BNB_4BIT_QUANT_STORAGE]
                [--max_new_tokens MAX_NEW_TOKENS] [--temperature TEMPERATURE]
                [--top_k TOP_K] [--top_p TOP_P]
                [--repetition_penalty REPETITION_PENALTY]
                [--num_beams NUM_BEAMS] [--stream STREAM]
                [--stop_words STOP_WORDS [STOP_WORDS ...]]
                [--logprobs [LOGPROBS]] [--top_logprobs TOP_LOGPROBS]
                [--ckpt_dir CKPT_DIR]
                [--lora_modules LORA_MODULES [LORA_MODULES ...]]
                [--tuner_backend {peft,unsloth}] [--train_type TRAIN_TYPE]
                [--adapters ADAPTERS [ADAPTERS ...]]
                [--external_plugins EXTERNAL_PLUGINS [EXTERNAL_PLUGINS ...]]
                [--seed SEED] [--model_kwargs MODEL_KWARGS]
                [--load_args [LOAD_ARGS]] [--no_load_args]
                [--load_data_args [LOAD_DATA_ARGS]] [--packing [PACKING]]
                [--packing_length PACKING_LENGTH]
                [--packing_num_proc PACKING_NUM_PROC]
                [--lazy_tokenize LAZY_TOKENIZE]
                [--custom_register_path CUSTOM_REGISTER_PATH [CUSTOM_REGISTER_PATH ...]]
                [--use_hf [USE_HF]] [--hub_token HUB_TOKEN]
                [--ddp_timeout DDP_TIMEOUT] [--ddp_backend DDP_BACKEND]
                [--ignore_args_error [IGNORE_ARGS_ERROR]]
                [--use_swift_lora [USE_SWIFT_LORA]]
                [--vllm_gpu_memory_utilization VLLM_GPU_MEMORY_UTILIZATION]
                [--vllm_tensor_parallel_size VLLM_TENSOR_PARALLEL_SIZE]
                [--vllm_pipeline_parallel_size VLLM_PIPELINE_PARALLEL_SIZE]
                [--vllm_enable_expert_parallel [VLLM_ENABLE_EXPERT_PARALLEL]]
                [--vllm_max_num_seqs VLLM_MAX_NUM_SEQS]
                [--vllm_max_model_len VLLM_MAX_MODEL_LEN]
                [--vllm_disable_custom_all_reduce [VLLM_DISABLE_CUSTOM_ALL_REDUCE]]
                [--no_vllm_disable_custom_all_reduce]
                [--vllm_enforce_eager [VLLM_ENFORCE_EAGER]]
                [--vllm_limit_mm_per_prompt VLLM_LIMIT_MM_PER_PROMPT]
                [--vllm_max_lora_rank VLLM_MAX_LORA_RANK]
                [--vllm_enable_prefix_caching VLLM_ENABLE_PREFIX_CACHING]
                [--vllm_use_async_engine [VLLM_USE_ASYNC_ENGINE]]
                [--vllm_quantization VLLM_QUANTIZATION]
                [--vllm_reasoning_parser VLLM_REASONING_PARSER]
                [--vllm_disable_cascade_attn [VLLM_DISABLE_CASCADE_ATTN]]
                [--vllm_mm_processor_cache_gb VLLM_MM_PROCESSOR_CACHE_GB]
                [--vllm_speculative_config VLLM_SPECULATIVE_CONFIG]
                [--vllm_engine_kwargs VLLM_ENGINE_KWARGS]
                [--vllm_data_parallel_size VLLM_DATA_PARALLEL_SIZE]
                [--sglang_tp_size SGLANG_TP_SIZE]
                [--sglang_pp_size SGLANG_PP_SIZE]
                [--sglang_dp_size SGLANG_DP_SIZE]
                [--sglang_ep_size SGLANG_EP_SIZE]
                [--sglang_enable_ep_moe [SGLANG_ENABLE_EP_MOE]]
                [--sglang_mem_fraction_static SGLANG_MEM_FRACTION_STATIC]
                [--sglang_context_length SGLANG_CONTEXT_LENGTH]
                [--sglang_disable_cuda_graph [SGLANG_DISABLE_CUDA_GRAPH]]
                [--sglang_quantization SGLANG_QUANTIZATION]
                [--sglang_kv_cache_dtype SGLANG_KV_CACHE_DTYPE]
                [--sglang_enable_dp_attention [SGLANG_ENABLE_DP_ATTENTION]]
                [--sglang_disable_custom_all_reduce [SGLANG_DISABLE_CUSTOM_ALL_REDUCE]]
                [--no_sglang_disable_custom_all_reduce]
                [--sglang_speculative_algorithm SGLANG_SPECULATIVE_ALGORITHM]
                [--sglang_speculative_num_steps SGLANG_SPECULATIVE_NUM_STEPS]
                [--sglang_speculative_eagle_topk SGLANG_SPECULATIVE_EAGLE_TOPK]
                [--sglang_speculative_num_draft_tokens SGLANG_SPECULATIVE_NUM_DRAFT_TOKENS]
                [--lmdeploy_tp LMDEPLOY_TP]
                [--lmdeploy_session_len LMDEPLOY_SESSION_LEN]
                [--lmdeploy_cache_max_entry_count LMDEPLOY_CACHE_MAX_ENTRY_COUNT]
                [--lmdeploy_quant_policy LMDEPLOY_QUANT_POLICY]
                [--lmdeploy_vision_batch_size LMDEPLOY_VISION_BATCH_SIZE]
                [--merge_lora [MERGE_LORA]]
                [--safe_serialization [SAFE_SERIALIZATION]]
                [--no_safe_serialization] [--max_shard_size MAX_SHARD_SIZE]
                [--infer_backend {vllm,pt,sglang,lmdeploy}]
                [--result_path RESULT_PATH]
                [--write_batch_size WRITE_BATCH_SIZE] [--metric {acc,rouge}]
                [--max_batch_size MAX_BATCH_SIZE]
                [--val_dataset_sample VAL_DATASET_SAMPLE]
                [--reranker_use_activation [RERANKER_USE_ACTIVATION]]
                [--no_reranker_use_activation]
infer.py: error: ambiguous option: --n could match --new_special_tokens, --new-special-tokens, --num_labels, --num-labels, --norm_bbox, --norm-bbox, --no_dataset_shuffle, --no-dataset-shuffle, --no_remove_unused_columns, --no-remove-unused-columns, --no_bnb_4bit_use_double_quant, --no-bnb-4bit-use-double-quant, --num_beams, --num-beams, --no_load_args, --no-load-args, --no_vllm_disable_custom_all_reduce, --no-vllm-disable-custom-all-reduce, --no_sglang_disable_custom_all_reduce, --no-sglang-disable-custom-all-reduce, --no_safe_serialization, --no-safe-serialization, --no_reranker_use_activation, --no-reranker-use-activation

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions