MindIE 2.1.RC2版本:
docker pull --platform=arm64 swr.cn-south-1.myhuaweicloud.com/ascendhub/mindie:2.1.RC2-800I-A2-py311-openeuler24.03-lts后续工作目录文件夹为workdir,两台机器均挂载持久卷到workdir目录下,可将权重以及其他所需文件一并放在该文件夹中。
| 环境配置 | 配置说明 |
|---|---|
| 硬件配置 | Atlas 800T A2 910B2(64G) |
| 驱动版本 | 23.0.5.1 |
| CANN版本 | 8.2.RC2 |
| Python版本 | 3.11.6 |
| torch版本 | 2.1.0 |
| torch-npu版本 | 2.1.0.post13 |
| 推理框架 | mindie |
| 推理镜像 | MindIE 2.1.RC2 |
| 部署方式 | 双机 |
如果环境中无hccn_tool,需要上传hccn_tool到/usr/bin/下,添加可执行权限:chmod +x /usr/bin/hccn_tool
每个节点执行如下命令进行检查:
# Check the remote switch ports
for i in {0..7}; do hccn_tool -i $i -lldp -g | grep Ifname; done
# Get the link status of the Ethernet ports (UP or DOWN)
for i in {0..7}; do hccn_tool -i $i -link -g ; done
# Check the network health status
for i in {0..7}; do hccn_tool -i $i -net_health -g ; done
# View the network detected IP configuration
for i in {0..7}; do hccn_tool -i $i -netdetect -g ; done
# View gateway configuration
for i in {0..7}; do hccn_tool -i $i -gateway -g ; done
# View NPU network configuration
cat /etc/hccn.conf每台机器需要是一样的值,建议全0
for i in {0..7}; do hccn_tool -i $i -tls -g ; done | grep switch如果不一致,可以通过以下命令,将NPU底层tls校验行为置0(执行完可以使用上面的命令进行检查,是否置成功)
for i in {0..7};do hccn_tool -i $i -tls -s enable 0;done机器间互联检测可以通过本机每张npu卡ping其他主机的npu卡ip地址,能ping通表示正常。
获取每张NPU卡的ip地址
for i in {0..7}; do hccn_tool -i $i -ip -g | grep ipaddr; done卡与卡之间互相ping的指令如下:
# Execute on the target node (replace with actual IP)
hccn_tool -i 0 -ping -g address x.x.x.x提前准备W8A8的DeepSeek R1 W8A8量化的权重,并将模型文件放到workdir下,示例路径:/workdir/DeepSeek-R1-0528-w8a8/,修改模型文件夹权限chmod 750 -R /workdir/DeepSeek-R1-0528-w8a8/
原始权重下载链接,W8A8量化方式参考:DeepSeek R1系列
获取当前机器8张卡的IP地址,双机需要每台机器都执行一遍如下指令获取:
for i in {0..7};do hccn_tool -i $i -ip -g | grep ipaddr;done以如下两台机器为例,示例ranktable.json格式: 机器1 Pod IP:197.166.232.231 机器2 Pod IP:197.166.70.223
{
"version": "1.0",
"server_count": "2",
"server_list": [
{
"server_id": "197.166.232.231",
"container_ip": "197.166.232.231",
"device": [
{"device_id": "0", "device_ip": "192.168.56.17", "rank_id": "0" },
{"device_id": "1", "device_ip": "192.168.56.18", "rank_id": "1" },
{"device_id": "2", "device_ip": "192.168.56.19", "rank_id": "2" },
{"device_id": "3", "device_ip": "192.168.56.20", "rank_id": "3" },
{"device_id": "4", "device_ip": "192.168.56.21", "rank_id": "4" },
{"device_id": "5", "device_ip": "192.168.56.22", "rank_id": "5" },
{"device_id": "6", "device_ip": "192.168.56.23", "rank_id": "6" },
{"device_id": "7", "device_ip": "192.168.56.24", "rank_id": "7" }
]
},
{
"server_id": "197.166.70.223",
"container_ip": "197.166.70.223",
"device": [
{"device_id": "0", "device_ip": "192.168.55.241", "rank_id": "8" },
{"device_id": "1", "device_ip": "192.168.55.242", "rank_id": "9" },
{"device_id": "2", "device_ip": "192.168.55.243", "rank_id": "10" },
{"device_id": "3", "device_ip": "192.168.55.244", "rank_id": "11" },
{"device_id": "4", "device_ip": "192.168.55.245", "rank_id": "12" },
{"device_id": "5", "device_ip": "192.168.55.246", "rank_id": "13" },
{"device_id": "6", "device_ip": "192.168.55.247", "rank_id": "14" },
{"device_id": "7", "device_ip": "192.168.55.248", "rank_id": "15" }
]
}
],
"status": "completed"
}准备好ranktable.json后,修改文件权限chmod 640 /workdir/ranktable.json
脚本内容如下所示,需指定主节点IP:MASTER_IP,以及当前机器所在容器IP:MIES_CONTAINER_IP,如无法通过环境变量$MY_POD_IP获取当前容器IP,则需要将env_set.sh重名为env_set_master.sh以及env_set_worker.sh,并将两台容器IP分别填到上述两个shell脚本的MIES_CONTAINER_IP处,后续在两台机器中分别执行source env_set_master.sh以及source env_set_worker.sh。
source /usr/local/Ascend/mindie/set_env.sh
source /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/nnal/atb/set_env.sh
source /usr/local/Ascend/atb-models/set_env.sh
export RANK_TABLE_FILE=/workdir/ranktable.json
export MIES_CONTAINER_IP=$MY_POD_IP
export MASTER_IP=197.166.232.231
export MIES_SERVICE_MONITOR_MODE=1
export MINDIE_LOG_TO_STDOUT=1
export MINDIE_LOG_LEVEL="llm:warn; rt:warn; sd:warn; torch:warn"
export ASDOPS_LOG_TO_FILE=1
export ASDOPS_LOG_LEVEL=ERROR
export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export INF_NAN_MODE_ENABLE=1
export ATB_LLM_HCCL_ENABLE=1
export ATB_LLM_ENABLE_AUTO_TRANSPOSE=0
export ATB_OPERATION_EXECUTE_ASYNC=1
export ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE=3
export ATB_WORKSPACE_MEM_ALLOC_GLOBAL=1
export ATB_LAYER_INTERNAL_TENSOR_REUSE=1
export TASK_QUEUE_ENABLE=2
export OMP_NUM_THREADS=10
export MINDIE_ASYNC_SCHEDULING_ENABLE=1
export NPU_MEMORY_FRACTION=0.96
export HCCL_BUFFER=64
export HCCL_EXEC_TIMEOUT=0
export HCCL_CONNECT_TIMEOUT=3600
export HCCL_OP_EXPANSION_MODE="AIV"
export HCCL_RDMA_PCIE_DIRECT_POST_NOSTRICT=TRUE
export HCCL_ALGO="level0:NA;level1:pipeline"config.json 内容如下所示:
{
"Version" : "1.0.0",
"ServerConfig" :
{
"ipAddress" : "127.0.0.1",
"managementIpAddress" : "127.0.0.2",
"port" : 1025,
"managementPort" : 1026,
"metricsPort" : 1027,
"allowAllZeroIpListening" : false,
"maxLinkNum" : 100,
"httpsEnabled" : false,
"fullTextEnabled" : false,
"tlsCaPath" : "security/ca/",
"tlsCaFile" : ["ca.pem"],
"tlsCert" : "security/certs/server.pem",
"tlsPk" : "security/keys/server.key.pem",
"tlsPkPwd" : "security/pass/key_pwd.txt",
"tlsCrlPath" : "security/certs/",
"tlsCrlFiles" : ["server_crl.pem"],
"managementTlsCaFile" : ["management_ca.pem"],
"managementTlsCert" : "security/certs/management/server.pem",
"managementTlsPk" : "security/keys/management/server.key.pem",
"managementTlsPkPwd" : "security/pass/management/key_pwd.txt",
"managementTlsCrlPath" : "security/management/certs/",
"managementTlsCrlFiles" : ["server_crl.pem"],
"kmcKsfMaster" : "tools/pmt/master/ksfa",
"kmcKsfStandby" : "tools/pmt/standby/ksfb",
"inferMode" : "standard",
"interCommTLSEnabled" : false,
"interCommPort" : 1121,
"interCommTlsCaPath" : "security/grpc/ca/",
"interCommTlsCaFiles" : ["ca.pem"],
"interCommTlsCert" : "security/grpc/certs/server.pem",
"interCommPk" : "security/grpc/keys/server.key.pem",
"interCommPkPwd" : "security/grpc/pass/key_pwd.txt",
"interCommTlsCrlPath" : "security/grpc/certs/",
"interCommTlsCrlFiles" : ["server_crl.pem"],
"openAiSupport" : "vllm",
"tokenTimeout" : 3600,
"e2eTimeout" : 3600,
"distDPServerEnabled":false
},
"BackendConfig" : {
"backendName" : "mindieservice_llm_engine",
"modelInstanceNumber" : 1,
"npuDeviceIds" : [[0,1,2,3,4,5,6,7]],
"tokenizerProcessNumber" : 8,
"multiNodesInferEnabled" : true,
"multiNodesInferPort" : 1120,
"interNodeTLSEnabled" : false,
"interNodeTlsCaPath" : "security/grpc/ca/",
"interNodeTlsCaFiles" : ["ca.pem"],
"interNodeTlsCert" : "security/grpc/certs/server.pem",
"interNodeTlsPk" : "security/grpc/keys/server.key.pem",
"interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt",
"interNodeTlsCrlPath" : "security/grpc/certs/",
"interNodeTlsCrlFiles" : ["server_crl.pem"],
"interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
"interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb",
"ModelDeployConfig" :
{
"maxSeqLen" : 131072,
"maxInputTokenLen" : 131072,
"truncation" : false,
"ModelConfig" : [
{
"modelInstanceType" : "Standard",
"modelName" : "DeepSeek-R1-0528",
"modelWeightPath" : "/workdir/DeepSeek-R1-0528-w8a8/",
"worldSize" : 8,
"cpuMemSize" : 5,
"npuMemSize" : -1,
"backendType" : "atb",
"trustRemoteCode" : false,
"dp": 1,
"tp": 8,
"sp": 8,
"cp": 2,
"moe_tp": 1,
"moe_ep": 16,
"models": {
"deepseekv2": {
"ep_level": 1,
"enable_init_routing_cutoff": true,
"topk_scaling_factor": 0.25
}
},
"async_scheduler_wait_time": 120,
"kv_trans_timeout": 10,
"kv_link_timeout": 1080
}
]
},
"ScheduleConfig" :
{
"templateType" : "Standard",
"templateName" : "Standard_LLM",
"cacheBlockSize" : 128,
"maxPrefillBatchSize" : 2,
"maxPrefillTokens" : 131072,
"prefillTimeMsPerReq" : 150,
"prefillPolicyType" : 0,
"decodeTimeMsPerReq" : 50,
"decodePolicyType" : 0,
"maxBatchSize" : 200,
"maxIterTimes" : 131072,
"maxPreemptCount" : 0,
"supportSelectBatch" : false,
"maxQueueDelayMicroseconds" : 5000
}
}
}注意点:
在两个容器中执行以下指令,将启动脚本日志重定向到持久卷:/workdir/mindie.log
source /workdir/env_set.sh
cp -f /workdir/config.json /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json
nohup /usr/local/Ascend/mindie/latest/mindie-service/bin/mindieservice_daemon > /workdir/mindie.log 2>&1 &
tail -f /workdir/mindie.logcurl -X POST -d '{
"model":"DeepSeek-R1-0528",
"messages": [{
"role": "user",
"content": "介绍一下北京"
}],
"max_tokens": 64 ,
"stream": false
}' http://197.166.232.231:1025/v1/chat/completions