pip install modelscopemkdir /home/model/Qwen
mkdir /home/model/Qwen/Qwen2.5-VL-3B-Instruct
modelscope download --model Qwen/Qwen2.5-VL-7B-Instruct --local_dir /home/model/Qwen/Qwen2.5-VL-7B-Instruct根据以上方法生成镜像名为vllm-ascend_v0.10.0rc1:v1.0_250901的新镜像,后续使用该镜像进行推理部署。
#!/bin/sh
LOCAL_MODEL_PATH=
DOCKER_IMAGE=
CONTAINER_NAME=
ASCEND_DEVICES=
SERVICE_PORT=
usage() {
echo "Usage: $0 -m LOCAL_MODEL_PATH -n CONTAINER_NAME [-d ASCEND_DEVICES] [-p SERVICE_PORT]"
echo "Options:"
echo " -m, Set local model path (MUST), e.g., \"/root/.cache/Qwen/Qwen3-4B\""
echo " -i, Set docker image name or id (MUST)"
echo " -n, Set new container name (MUST)"
echo " -d, Set visiable ascend device (optional), default \"0,1,2,3,4,5,6,7\""
echo " -p, Set service port (optional), default \'8000\'"
exit 1
}
while getopts ":i:p:m:n:d:h" opt; do
case $opt in
p)
SERVICE_PORT="$OPTARG"
echo "arg -p provided, service port set with : $SERVICE_PORT"
;;
m)
LOCAL_MODEL_PATH="$OPTARG"
echo "arg -m provided, local model path set with : $LOCAL_MODEL_PATH"
;;
n)
CONTAINER_NAME="$OPTARG"
echo "arg -n provided, container name set with : $CONTAINER_NAME"
;;
d)
ASCEND_DEVICES="$OPTARG"
echo "arg -d provided, ASCEND_RT_VISIBLE_DEVICES set with : $ASCEND_DEVICES"
;;
i)
DOCKER_IMAGE="$OPTARG"
echo "arg -i provided, docker image set with : $DOCKER_IMAGE"
;;
h)
usage
exit 0
;;
\?)
echo "Error: invalid arg -$OPTARG" >&2
usage
exit 1
;;
:)
echo "Error: arg -$OPTARG needs a valule" >&2
usage
exit 1
;;
esac
done
if [ -z $LOCAL_MODEL_PATH ] || [ -z $DOCKER_IMAGE ] || [ -z $CONTAINER_NAME ]; then
echo "Error: required args missing"
usage
exit 1
fi
if [ "$SERVICE_PORT" = "" ]; then
SERVICE_PORT=8000
fi
if [ "$ASCEND_DEVICES" = "" ]; then
ASCEND_DEVICES="0,1,2,3,4,5,6,7"
fi
if [ "$LOCAL_CACHE_PATH" = "" ]; then
LOCAL_CACHE_PATH="/root/.cache"
fi
MODEL_NAME=`basename $LOCAL_MODEL_PATH`
docker run -itd -u 0 --ipc=host --privileged \
-e VLLM_USE_MODELSCOPE=True -e PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256 \
-e ASCEND_RT_VISIBLE_DEVICES=$ASCEND_DEVICES \
-e SERVIC_MODEL_NAME=${MODEL_NAME} \
--name $CONTAINER_NAME \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \
-v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-v $LOCAL_CACHE_PATH:/root/.cache \
-v $LOCAL_MODEL_PATH:/model \
-v /opt:/opt \
-p $SERVICE_PORT:8000 \
-it $DOCKER_IMAGE bash脚本使用方法如下:
bash run-vllm-ascend_v1.sh -i <镜像名称> -m <模型参数> -n <容器实例名称> -d <NPU设备ID> -p <监听端口>bash run-vllm-ascend_v1.sh -m /home/model/Qwen/Qwen2.5-VL-7B-Instruct -i vllm-ascend_v0.10.0rc1:v1.0_250901 -n qwen25vl7b -p 8901 -d "0"该命令表示将:
docker logs -f qwen25vl7b跟踪容器启动日志,输出“Application startup complete.”说明启动完成:
INFO 09-02 02:48:05 [launcher.py:37] Route: /v1/rerank, Methods: POST INFO 09-02 02:48:05 [launcher.py:37] Route: /v2/rerank, Methods: POST INFO 09-02 02:48:05 [launcher.py:37] Route: /scale_elastic_ep, Methods: POST INFO 09-02 02:48:05 [launcher.py:37] Route: /is_scaling_elastic_ep, Methods: POST INFO 09-02 02:48:05 [launcher.py:37] Route: /invocations, Methods: POST INFO 09-02 02:48:05 [launcher.py:37] Route: /metrics, Methods: GET INFO: Started server process [591] INFO: Waiting for application startup. INFO: Application startup complete.