前往昇腾社区/开发资源下载适配本模型的镜像包:mindie_1.0.T71.*-800I-A2-arm64-py3.11.tar.gz
完成加载镜像后,请使用docker images命令确认查找具体镜像名称与标签。
docker load -i mindie:1.0.T71.*-800I-A2-py311-ubuntu22.04-arm64(下载的镜像名称与标签)目前提供的MindIE镜像预置了Baichuan-M1-14B-Base模型推理脚本,无需再额外下载魔乐仓库承载的模型适配代码,直接新建容器即可。
执行以下启动命令(参考):
docker run -itd --privileged --name=容器名称 --net=host \
--shm-size 500g \
--device=/dev/davinci0 \
--device=/dev/davinci1 \
--device=/dev/davinci2 \
--device=/dev/davinci3 \
--device=/dev/davinci4 \
--device=/dev/davinci5 \
--device=/dev/davinci6 \
--device=/dev/davinci7 \
--device=/dev/davinci_manager \
--device=/dev/hisi_hdc \
--device /dev/devmm_svm \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \
-v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \
-v /usr/local/sbin:/usr/local/sbin \
-v /etc/hccn.conf:/etc/hccn.conf \
-v /权重路径:/权重路径 \
mindie:1.0.0-XXX-800I-A2-arm64-py3.11(根据加载的镜像名称修改) \
bashdocker exec -it ${容器名称} bash
source /usr/local/Ascend/atb-models/set_env.sh进入llm_model路径
cd $ATB_SPEED_HOME_PATH执行对话测试
torchrun --nproc_per_node 2 \
--master_port 20037 \
-m examples.run_pa \
--block_size 64 \
--model_path {权重路径} \
--input_texts 'I have recently recovered from my cold.' \
--max_output_length 20 \
--trust_remote_code进入ModelTest路径
cd $ATB_SPEED_HOME_PATH/tests/modeltest/运行测试脚本
bash run.sh pa_[data_type] performance [case_pair] [batch_size] ([prefill_batch_size]) [model_name] ([is_chat_model]) (lora [lora_data_path]) [weight_dir] ([trust_remote_code]) [chip_num] ([parallel_params]) ([max_position_embedding/max_sequence_length])具体执行batch=1, 输入长度256, 输出长度256用例的4卡并行性能测试命令为:
bash run.sh pa_bf16 performance [[256,256]] 1 baichuan_m1 ${weight_path} trust_remote_code 4注:ModelTest为大模型的性能和精度提供测试功能。使用文档请参考
${ATB_SPEED_HOME_PATH}/tests/modeltest/README.md
vim /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json{
...
"ServerConfig" :
{
...
"port" : 1040, #自定义
"managementPort" : 1041, #自定义
"metricsPort" : 1042, #自定义
...
"httpsEnabled" : false,
...
},
"BackendConfig": {
...
"npuDeviceIds" : [[0,1,2,3]],
...
"ModelDeployConfig":
{
"maxSeqLen" : 32768,
"maxInputTokenLen" : 32768,
"truncation" : false,
"ModelConfig" : [
{
...
"modelName" : "baichuan",
"modelWeightPath" : "/data/datasets/Baichuan-M1-14B-Base",
"worldSize" : 4,
...
"trustRemoteCode" : true, #baichuan模型运行需要信任本地代码
}
]
},
"ScheduleConfig" :
{
...
"cacheBlockSize" : 64,
"maxPrefillBatchSize" : 1,
"maxPrefillTokens" : 32768,
"maxBatchSize" : 1,
...
}
}
}cd /usr/local/Ascend/mindie/latest/mindie-service/bin
./mindieservice_daemoncurl 127.0.0.1:1040/generate -d '{
"prompt": "I have recently recovered from my cold.",
"max_tokens": 32,
"stream": false,
"do_sample":true,
"repetition_penalty": 1.05,
"temperature": 0.3,
"top_p": 0.85,
"top_k": 5,
"model": "baichuan"
}'注: 服务化推理的更多信息请参考MindIE Service用户指南
pip install transformers==4.46.3 --force-reinstall
pip install numpy==1.26.4 --force-reinstall