环境准备:Atlas 800I/800T A2 (64G),单卡。
执行以下 Shell 命令,拉取MindSpore MiniCPM 推理容器镜像:
docker pull swr.cn-central-221.ovaijisuan.com/mindsporelab/minicpm:v4pip install openmind_hub
export HUB_WHITE_LIST_PATHS=/mnt/data/MiniCPM8B # 该路径存放权重from openmind_hub import snapshot_download
snapshot_download(
repo_id="MindSpore-Lab/MiniCPM4-8B",
local_dir="/mnt/data/MiniCPM8B",
local_dir_use_symlinks=False
)执行以下命令创建容器,name设置为MiniCPM8B
docker run -it --privileged --name=MiniCPM8B --net=host \
--shm-size 500g \
--device=/dev/davinci0 \
--device=/dev/davinci1 \
--device=/dev/davinci2 \
--device=/dev/davinci3 \
--device=/dev/davinci4 \
--device=/dev/davinci5 \
--device=/dev/davinci6 \
--device=/dev/davinci7 \
--device=/dev/davinci_manager \
--device=/dev/hisi_hdc \
--device /dev/devmm_svm \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \
-v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \
-v /usr/local/sbin:/usr/local/sbin \
-v /etc/hccn.conf:/etc/hccn.conf \
-v /mnt/data/MiniCPM8B/:/mnt/data/MiniCPM8B/ \
swr.cn-central-221.ovaijisuan.com/mindsporelab/minicpm:v4 \
bashpython3 -m vllm_mindspore.entrypoints vllm.entrypoints.openai.api_server --model "/mnt/data/MiniCPM8B" --trust_remote_code --tensor_parallel_size=1 --max-num-seqs=256 --block-size=32 --max_model_len=8192 --max-num-batched-tokens=8192curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "/mnt/data/MiniCPM8B",
"messages": [
{"role": "user", "content": "帮我介绍一下上海"}
],
"temperature": 0.0,
"top_p": 0.95,
"top_k": 20,
"min_p": 0,
"max_tokens": 4096,
"add_special_tokens": true,
"repetition_penalty": 1.2
}'