DeepSeek-V3是一款拥有6710亿参数的MoE语言模型,单Token激活370亿参数。该模型采用了经过V2版本验证的MLA潜在注意力机制与DeepSeekMoE结构,实现了高吞吐、低消耗的训练过程。其首创无辅助损失负载均衡策略,并引入多Token预测目标,有效提升了模型的泛化能力与生成质量。该模型在14.8T多领域高质量数据上完成预训练,并辅以监督微调与强化学习,综合评测结果超越主流开源模型,性能可对标顶级闭源方案。
w8a8权重下载链接:https://modelers.cn/models/Modelers_Park/DeepSeek-V3-0324-w8a8-function_call
| 组件 | 版本 |
|---|---|
| 硬件环境 | 910C(64卡) |
| 组件 | 版本 |
|---|---|
| MindIE | 2.2.RC1 |
| HDK | Ascend HDK 25.2.3 |
| CANN | 8.3.RC2 |
| 模型 | DeepSeek V3 |
https://www.hiascend.com/developer/download/community/result?module=ie+pt+canndocker load -i mindie_dev-2.2.RC1.B150-800I-A3-py311-ubuntu22.04-aarch64.tar.gzbash start_docker.sh <image id> <container name>start_docker.sh如下:
IMAGES_ID=$1
NAME=$2
if [ $# -ne 2 ]; then
echo "error: need one argument describing your container name."
exit 1
fi
docker run --name ${NAME} -it -d --net=host --shm-size=500g \
--privileged=true \
-w /home \
--device=/dev/davinci_manager \
--device=/dev/hisi_hdc \
--device=/dev/devmm_svm \
--entrypoint=bash \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/sbin:/usr/local/sbin \
-v /home:/home \
-v /data:/data \
-v /mnt:/mnt \
-v /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime \
${IMAGES_ID}docker exec -itu root <container name> bash
cd /usr/local/Ascend/mindie/latest/mindie-service/examples/
cp -r ./kubernetes_deploy_scripts <your path>user_config_base_A3.json修改(以下脚本可直接用于性能测试) user_config文件可直接在附件中直接下载,注意修改各个文件路径。
{
"version": "v1.0",
"deploy_config": {
"p_instances_num": 4,
"d_instances_num": 1,
"single_p_instance_pod_num": 1,
"single_d_instance_pod_num": 4,
"p_pod_npu_num": 16,
"d_pod_npu_num": 16,
"p_instances_scale_num": 0,
"d_instances_scale_num": 0,
"model_id": "mindie_20251105150806",
"prefill_distribute_enable": 0,
"decode_distribute_enable": 1,
"image_name": "mindie:dev-2.2.RC1.B150-800I-A3-py311-ubuntu22.04-aarch64",
"job_id": "mindie",
"hardware_type": "800I_A3",
"mindie_env_path": "./conf/mindie_env_a3.json",
"mindie_host_log_path": "/home/log/ascend_log",
"mindie_container_log_path": "/root/mindie",
"weight_mount_path": "{weight_path}",
"coordinator_backup_cfg": {
"function_enable": false
},
"controller_backup_cfg": {
"function_sw": false
},
"deploy_mount_path": {
"ms_controller_mount": {
"${user_define_host_path}": "${user_define_container_path}"
},
"ms_coordinator_mount": {
"${user_define_host_path}": "${user_define_container_path}"
},
"prefill_server_mount": {
"${user_define_host_path1}": "${user_define_container_path}",
"${user_define_host_path2}": "${user_define_container_path}"
},
"decode_server_mount": {
"${user_define_host_path1}": "${user_define_container_path}",
"${user_define_host_path2}": "${user_define_container_path}"
}
},
"tls_config": {
"tls_enable": false,
"kmc_ksf_master": "./security/master/tools/pmt/master/ksfa",
"kmc_ksf_standby": "./security/standby/tools/pmt/standby/ksfb",
"infer_tls_enable": false,
"infer_tls_items": {
"ca_cert": "./security/infer/security/certs/ca.pem",
"tls_cert": "./security/infer/security/certs/cert.pem",
"tls_key": "./security/infer/security/keys/cert.key.pem",
"tls_passwd": "./security/infer/security/pass/key_pwd.txt",
"tls_crl": "infer"
},
"management_tls_enable": false,
"management_tls_items": {
"ca_cert": "./security/management/security/certs/ca.pem",
"tls_cert": "./security/management/security/certs/cert.pem",
"tls_key": "./security/management/security/keys/cert.key.pem",
"tls_passwd": "./security/management/security/pass/key_pwd.txt",
"tls_crl": "management"
},
"ccae_tls_enable": false,
"ccae_tls_items": {
"ca_cert": "./security/ccae/security/certs/ca.pem",
"tls_cert": "./security/ccae/security/certs/cert.pem",
"tls_key": "./security/ccae/security/keys/cert.key.pem",
"tls_passwd": "./security/ccae/security/pass/key_pwd.txt",
"tls_crl": "ccae"
},
"cluster_tls_enable": false,
"cluster_tls_items": {
"ca_cert": "./security/clusterd/security/certs/ca.pem",
"tls_cert": "./security/clusterd/security/certs/cert.pem",
"tls_key": "./security/clusterd/security/keys/cert.key.pem",
"tls_passwd": "./security/clusterd/security/pass/key_pwd.txt",
"tls_crl": "clusterd"
},
"etcd_server_tls_enable": false,
"etcd_server_tls_items": {
"ca_cert": "./security/etcd_server/security/certs/ca.pem",
"tls_cert": "./security/etcd_server/security/certs/cert.pem",
"tls_key": "./security/etcd_server/security/keys/cert.key.pem",
"tls_passwd": "./security/etcd_server/security/pass/key_pwd.txt",
"kmc_ksf_master": "./security/etcd_server/tools/pmt/master/ksfa",
"kmc_ksf_standby": "./security/etcd_server/tools/pmt/standby/ksfb",
"tls_crl": ""
}
}
},
"mindie_ms_controller_config": {
"deploy_mode": "pd_separate",
"digs_prefill_slo": 1000,
"digs_decode_slo": 50,
"multi_node_infer_config": {
"multi_node_infer_enable": true
}
},
"mindie_ms_coordinator_config": {
"http_config": {
"predict_ip": "127.0.0.1",
"predict_port": "1025",
"manage_ip": "127.0.0.1",
"manage_port": "1026",
"alarm_port": "1027",
"server_thread_num": 10,
"client_thread_num": 10,
"http_timeout_seconds": 600,
"keep_alive_seconds": 3600
},
"request_limit": {
"single_node_max_requests": 4096,
"max_requests": 90000
},
"exception_config": {
"first_token_timeout": 3600,
"schedule_timeout": 3600,
"infer_timeout": 65535
}
},
"mindie_server_prefill_config": {
"ServerConfig": {
"maxLinkNum": 4096,
"inferMode": "dmi",
"tokenTimeout": 3600,
"e2eTimeout": 65535,
"distDPServerEnabled": false
},
"BackendConfig": {
"npuDeviceIds": [
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15
]
],
"tokenizerProcessNumber": 1,
"multiNodesInferEnabled": false,
"kvPoolConfig": {
"backend": "",
"configPath": ""
},
"ModelDeployConfig": {
"maxSeqLen": 18000,
"maxInputTokenLen": 18000,
"ModelConfig": [
{
"modelInstanceType": "Standard",
"modelName": "{model_name}",
"modelWeightPath": "{weight_path}",
"worldSize": 16,
"cpuMemSize": 5,
"npuMemSize": -1,
"backendType": "atb",
"trustRemoteCode": false,
"dp": 2,
"cp": 1,
"tp": 8,
"sp": 1,
"moe_ep": 16,
"pp": 1,
"moe_tp": 1,
"kv_link_timeout": 1080,
"modelCutPolicy": "custom",
"plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
"models": {
"deepseekv2": {
"eplb": {
"level": 1,
"expert_map_file": "/{path_to_eplb_file}/gsm8k_prefill_1nodep.json"
},
"kv_cache_options": {
"enable_nz": true
},
"ep_level": 2
}
}
}
]
},
"ScheduleConfig": {
"distributedEnable": false,
"maxPrefillBatchSize": 16,
"maxPrefillTokens": 18000,
"dpScheduling": true
}
}
},
"mindie_server_decode_config": {
"ServerConfig": {
"maxLinkNum": 256,
"fullTextEnabled": false,
"inferMode": "dmi",
"tokenTimeout": 3600,
"e2eTimeout": 65535,
"distDPServerEnabled": true
},
"BackendConfig": {
"npuDeviceIds": [
[
0
]
],
"tokenizerProcessNumber": 1,
"multiNodesInferEnabled": false,
"kvPoolConfig": {
"backend": "",
"configPath": ""
},
"ModelDeployConfig": {
"maxSeqLen": 18000,
"maxInputTokenLen": 18000,
"truncation": false,
"ModelConfig": [
{
"modelInstanceType": "Standard",
"modelName": "{model_name}",
"modelWeightPath": "{weight_path}",
"worldSize": 1,
"cpuMemSize": 5,
"npuMemSize": -1,
"backendType": "atb",
"trustRemoteCode": false,
"dp": 64,
"cp": 1,
"tp": 1,
"sp": 1,
"moe_ep": 64,
"pp": 1,
"moe_tp": 1,
"kv_trans_timeout": 10,
"kv_link_timeout": 3600,
"modelCutPolicy": "custom",
"plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
"models": {
"deepseekv2": {
"mix_shared_routing": false,
"enable_dispatch_combine_v2": true,
"kv_cache_options": {
"enable_nz": true
},
"ep_level": 2,
"eplb": {
"level": 1,
"expert_map_file": "/{path_to_eplb_file}/decode_global_deployment.json"
},
"parallel_options": {
"lm_head_local_tp": 16,
"o_proj_local_tp": 2,
"hccl_moe_ep_buffer": 4096,
"hccl_moe_tp_buffer": 64
},
"enable_oproj_prefetch": false,
"enable_mlapo_prefetch": false
}
}
}
]
},
"ScheduleConfig": {
"distributedEnable": true,
"maxPrefillBatchSize": 4,
"maxPrefillTokens": 18000,
"maxBatchSize": 40,
"maxIterTimes": 18000,
"maxQueueDelayMicroseconds": 5000
}
}
}
}{
"version": "v1.0",
"deploy_config": {
"p_instances_num": 4,
"d_instances_num": 1,
"single_p_instance_pod_num": 1,
"single_d_instance_pod_num": 4,
"p_pod_npu_num": 16,
"d_pod_npu_num": 16,
"p_instances_scale_num": 0,
"d_instances_scale_num": 0,
"model_id": "mindie_20251105150806",
"prefill_distribute_enable": 0,
"decode_distribute_enable": 1,
"image_name": "mindie:dev-2.2.RC1.B150-800I-A3-py311-ubuntu22.04-aarch64",
"job_id": "mindie",
"hardware_type": "800I_A3",
"mindie_env_path": "./conf/mindie_env_a3.json",
"mindie_host_log_path": "/home/log/ascend_log",
"mindie_container_log_path": "/root/mindie",
"weight_mount_path": "{weight_path}",
"coordinator_backup_cfg": {
"function_enable": false
},
"controller_backup_cfg": {
"function_sw": false
},
"deploy_mount_path": {
"ms_controller_mount": {
"${user_define_host_path}": "${user_define_container_path}"
},
"ms_coordinator_mount": {
"${user_define_host_path}": "${user_define_container_path}"
},
"prefill_server_mount": {
"${user_define_host_path1}": "${user_define_container_path}",
"${user_define_host_path2}": "${user_define_container_path}"
},
"decode_server_mount": {
"${user_define_host_path1}": "${user_define_container_path}",
"${user_define_host_path2}": "${user_define_container_path}"
}
},
"tls_config": {
"tls_enable": false,
"kmc_ksf_master": "./security/master/tools/pmt/master/ksfa",
"kmc_ksf_standby": "./security/standby/tools/pmt/standby/ksfb",
"infer_tls_enable": false,
"infer_tls_items": {
"ca_cert": "./security/infer/security/certs/ca.pem",
"tls_cert": "./security/infer/security/certs/cert.pem",
"tls_key": "./security/infer/security/keys/cert.key.pem",
"tls_passwd": "./security/infer/security/pass/key_pwd.txt",
"tls_crl": "infer"
},
"management_tls_enable": false,
"management_tls_items": {
"ca_cert": "./security/management/security/certs/ca.pem",
"tls_cert": "./security/management/security/certs/cert.pem",
"tls_key": "./security/management/security/keys/cert.key.pem",
"tls_passwd": "./security/management/security/pass/key_pwd.txt",
"tls_crl": "management"
},
"ccae_tls_enable": false,
"ccae_tls_items": {
"ca_cert": "./security/ccae/security/certs/ca.pem",
"tls_cert": "./security/ccae/security/certs/cert.pem",
"tls_key": "./security/ccae/security/keys/cert.key.pem",
"tls_passwd": "./security/ccae/security/pass/key_pwd.txt",
"tls_crl": "ccae"
},
"cluster_tls_enable": false,
"cluster_tls_items": {
"ca_cert": "./security/clusterd/security/certs/ca.pem",
"tls_cert": "./security/clusterd/security/certs/cert.pem",
"tls_key": "./security/clusterd/security/keys/cert.key.pem",
"tls_passwd": "./security/clusterd/security/pass/key_pwd.txt",
"tls_crl": "clusterd"
},
"etcd_server_tls_enable": false,
"etcd_server_tls_items": {
"ca_cert": "./security/etcd_server/security/certs/ca.pem",
"tls_cert": "./security/etcd_server/security/certs/cert.pem",
"tls_key": "./security/etcd_server/security/keys/cert.key.pem",
"tls_passwd": "./security/etcd_server/security/pass/key_pwd.txt",
"kmc_ksf_master": "./security/etcd_server/tools/pmt/master/ksfa",
"kmc_ksf_standby": "./security/etcd_server/tools/pmt/standby/ksfb",
"tls_crl": ""
}
}
},
"mindie_ms_controller_config": {
"deploy_mode": "pd_separate",
"digs_prefill_slo": 1000,
"digs_decode_slo": 50,
"multi_node_infer_config": {
"multi_node_infer_enable": true
}
},
"mindie_ms_coordinator_config": {
"http_config": {
"predict_ip": "127.0.0.1",
"predict_port": "1025",
"manage_ip": "127.0.0.1",
"manage_port": "1026",
"alarm_port": "1027",
"server_thread_num": 10,
"client_thread_num": 10,
"http_timeout_seconds": 600,
"keep_alive_seconds": 3600
},
"request_limit": {
"single_node_max_requests": 4096,
"max_requests": 90000
},
"exception_config": {
"first_token_timeout": 3600,
"schedule_timeout": 3600,
"infer_timeout": 65535
}
},
"mindie_server_prefill_config": {
"ServerConfig": {
"maxLinkNum": 4096,
"inferMode": "dmi",
"tokenTimeout": 3600,
"e2eTimeout": 65535,
"distDPServerEnabled": false
},
"BackendConfig": {
"npuDeviceIds": [
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15
]
],
"tokenizerProcessNumber": 1,
"multiNodesInferEnabled": false,
"kvPoolConfig": {
"backend": "",
"configPath": ""
},
"ModelDeployConfig": {
"maxSeqLen": 67000,
"maxInputTokenLen": 67000,
"ModelConfig": [
{
"modelInstanceType": "Standard",
"modelName": "{model_name}",
"modelWeightPath": "{weight_path}",
"worldSize": 16,
"cpuMemSize": 5,
"npuMemSize": -1,
"backendType": "atb",
"trustRemoteCode": false,
"dp": 1,
"cp": 2,
"tp": 8,
"sp": 8,
"moe_ep": 16,
"pp": 1,
"moe_tp": 1,
"kv_link_timeout": 1080,
"modelCutPolicy": "custom",
"plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
"models": {
"deepseekv2": {
"eplb": {
"level": 1,
"expert_map_file": "/{path_to_eplb_path}/gsm8k_prefill_1nodep.json"
},
"kv_cache_options": {
"enable_nz": true
},
"ep_level": 2
}
}
}
]
},
"ScheduleConfig": {
"distributedEnable": false,
"maxPrefillBatchSize": 16,
"maxPrefillTokens": 67000,
"dpScheduling": true
}
}
},
"mindie_server_decode_config": {
"ServerConfig": {
"maxLinkNum": 256,
"fullTextEnabled": false,
"inferMode": "dmi",
"tokenTimeout": 3600,
"e2eTimeout": 65535,
"distDPServerEnabled": true
},
"BackendConfig": {
"npuDeviceIds": [
[
0
]
],
"tokenizerProcessNumber": 1,
"multiNodesInferEnabled": false,
"kvPoolConfig": {
"backend": "",
"configPath": ""
},
"ModelDeployConfig": {
"maxSeqLen": 67000,
"maxInputTokenLen": 67000,
"truncation": false,
"ModelConfig": [
{
"modelInstanceType": "Standard",
"modelName": "{model_name}",
"modelWeightPath": "{weight_path}",
"worldSize": 1,
"cpuMemSize": 5,
"npuMemSize": -1,
"backendType": "atb",
"trustRemoteCode": false,
"dp": 64,
"cp": 1,
"tp": 1,
"sp": 1,
"moe_ep": 64,
"pp": 1,
"moe_tp": 1,
"kv_trans_timeout": 10,
"kv_link_timeout": 3600,
"modelCutPolicy": "custom",
"plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
"models": {
"deepseekv2": {
"mix_shared_routing": false,
"enable_dispatch_combine_v2": true,
"kv_cache_options": {
"enable_nz": true
},
"ep_level": 2,
"eplb": {
"level": 1,
"expert_map_file": "/{path_to_eplb_file}/decode_global_deployment.json"
},
"parallel_options": {
"lm_head_local_tp": 16,
"o_proj_local_tp": 2,
"hccl_moe_ep_buffer": 4096,
"hccl_moe_tp_buffer": 64
},
"enable_oproj_prefetch": false,
"enable_mlapo_prefetch": false
}
}
}
]
},
"ScheduleConfig": {
"distributedEnable": true,
"maxPrefillBatchSize": 4,
"maxPrefillTokens": 67000,
"maxBatchSize": 32,
"maxIterTimes": 67000,
"maxQueueDelayMicroseconds": 5000
}
}
}
}{
"version": "v1.0",
"deploy_config": {
"p_instances_num": 4,
"d_instances_num": 1,
"single_p_instance_pod_num": 1,
"single_d_instance_pod_num": 4,
"p_pod_npu_num": 16,
"d_pod_npu_num": 16,
"p_instances_scale_num": 0,
"d_instances_scale_num": 0,
"model_id": "mindie_20251105150806",
"prefill_distribute_enable": 0,
"decode_distribute_enable": 1,
"image_name": "mindie:dev-2.2.RC1.B150-800I-A3-py311-ubuntu22.04-aarch64",
"job_id": "mindie",
"hardware_type": "800I_A3",
"mindie_env_path": "./conf/mindie_env_a3.json",
"mindie_host_log_path": "/home/log/ascend_log",
"mindie_container_log_path": "/root/mindie",
"weight_mount_path": "{weight_path}",
"coordinator_backup_cfg": {
"function_enable": false
},
"controller_backup_cfg": {
"function_sw": false
},
"deploy_mount_path": {
"ms_controller_mount": {
"${user_define_host_path}": "${user_define_container_path}"
},
"ms_coordinator_mount": {
"${user_define_host_path}": "${user_define_container_path}"
},
"prefill_server_mount": {
"${user_define_host_path1}": "${user_define_container_path}",
"${user_define_host_path2}": "${user_define_container_path}"
},
"decode_server_mount": {
"${user_define_host_path1}": "${user_define_container_path}",
"${user_define_host_path2}": "${user_define_container_path}"
}
},
"tls_config": {
"tls_enable": false,
"kmc_ksf_master": "./security/master/tools/pmt/master/ksfa",
"kmc_ksf_standby": "./security/standby/tools/pmt/standby/ksfb",
"infer_tls_enable": false,
"infer_tls_items": {
"ca_cert": "./security/infer/security/certs/ca.pem",
"tls_cert": "./security/infer/security/certs/cert.pem",
"tls_key": "./security/infer/security/keys/cert.key.pem",
"tls_passwd": "./security/infer/security/pass/key_pwd.txt",
"tls_crl": "infer"
},
"management_tls_enable": false,
"management_tls_items": {
"ca_cert": "./security/management/security/certs/ca.pem",
"tls_cert": "./security/management/security/certs/cert.pem",
"tls_key": "./security/management/security/keys/cert.key.pem",
"tls_passwd": "./security/management/security/pass/key_pwd.txt",
"tls_crl": "management"
},
"ccae_tls_enable": false,
"ccae_tls_items": {
"ca_cert": "./security/ccae/security/certs/ca.pem",
"tls_cert": "./security/ccae/security/certs/cert.pem",
"tls_key": "./security/ccae/security/keys/cert.key.pem",
"tls_passwd": "./security/ccae/security/pass/key_pwd.txt",
"tls_crl": "ccae"
},
"cluster_tls_enable": false,
"cluster_tls_items": {
"ca_cert": "./security/clusterd/security/certs/ca.pem",
"tls_cert": "./security/clusterd/security/certs/cert.pem",
"tls_key": "./security/clusterd/security/keys/cert.key.pem",
"tls_passwd": "./security/clusterd/security/pass/key_pwd.txt",
"tls_crl": "clusterd"
},
"etcd_server_tls_enable": false,
"etcd_server_tls_items": {
"ca_cert": "./security/etcd_server/security/certs/ca.pem",
"tls_cert": "./security/etcd_server/security/certs/cert.pem",
"tls_key": "./security/etcd_server/security/keys/cert.key.pem",
"tls_passwd": "./security/etcd_server/security/pass/key_pwd.txt",
"kmc_ksf_master": "./security/etcd_server/tools/pmt/master/ksfa",
"kmc_ksf_standby": "./security/etcd_server/tools/pmt/standby/ksfb",
"tls_crl": ""
}
}
},
"mindie_ms_controller_config": {
"deploy_mode": "pd_separate",
"digs_prefill_slo": 1000,
"digs_decode_slo": 50,
"multi_node_infer_config": {
"multi_node_infer_enable": true
}
},
"mindie_ms_coordinator_config": {
"http_config": {
"predict_ip": "127.0.0.1",
"predict_port": "1025",
"manage_ip": "127.0.0.1",
"manage_port": "1026",
"alarm_port": "1027",
"server_thread_num": 10,
"client_thread_num": 10,
"http_timeout_seconds": 600,
"keep_alive_seconds": 3600
},
"request_limit": {
"single_node_max_requests": 4096,
"max_requests": 90000
},
"exception_config": {
"first_token_timeout": 3600,
"schedule_timeout": 3600,
"infer_timeout": 65535
}
},
"mindie_server_prefill_config": {
"ServerConfig": {
"maxLinkNum": 4096,
"inferMode": "dmi",
"tokenTimeout": 3600,
"e2eTimeout": 65535,
"distDPServerEnabled": false
},
"BackendConfig": {
"npuDeviceIds": [
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15
]
],
"tokenizerProcessNumber": 1,
"multiNodesInferEnabled": false,
"kvPoolConfig": {
"backend": "",
"configPath": ""
},
"ModelDeployConfig": {
"maxSeqLen": 133000,
"maxInputTokenLen": 133000,
"ModelConfig": [
{
"modelInstanceType": "Standard",
"modelName": "{model_name}",
"modelWeightPath": "{weight_path}",
"worldSize": 16,
"cpuMemSize": 5,
"npuMemSize": -1,
"backendType": "atb",
"trustRemoteCode": false,
"dp": 1,
"cp": 2,
"tp": 8,
"sp": 8,
"moe_ep": 16,
"pp": 1,
"moe_tp": 1,
"kv_link_timeout": 1080,
"modelCutPolicy": "custom",
"models": {
"deepseekv2": {
"eplb": {
"level": 1,
"expert_map_file": "/{path_to_eplb_path}/gsm8k_prefill_1nodep.json"
},
"kv_cache_options": {
"enable_nz": true
},
"ep_level": 2
}
}
}
]
},
"ScheduleConfig": {
"distributedEnable": false,
"maxPrefillBatchSize": 16,
"maxPrefillTokens": 133000,
"dpScheduling": true
}
}
},
"mindie_server_decode_config": {
"ServerConfig": {
"maxLinkNum": 256,
"fullTextEnabled": false,
"inferMode": "dmi",
"tokenTimeout": 3600,
"e2eTimeout": 65535,
"distDPServerEnabled": true
},
"BackendConfig": {
"npuDeviceIds": [
[
0
]
],
"tokenizerProcessNumber": 1,
"multiNodesInferEnabled": false,
"kvPoolConfig": {
"backend": "",
"configPath": ""
},
"ModelDeployConfig": {
"maxSeqLen": 133000,
"maxInputTokenLen": 133000,
"truncation": false,
"ModelConfig": [
{
"modelInstanceType": "Standard",
"modelName": "{model_name}",
"modelWeightPath": "{weight_path}",
"worldSize": 1,
"cpuMemSize": 5,
"npuMemSize": -1,
"backendType": "atb",
"trustRemoteCode": false,
"dp": 64,
"cp": 1,
"tp": 1,
"sp": 1,
"moe_ep": 64,
"pp": 1,
"moe_tp": 1,
"kv_trans_timeout": 10,
"kv_link_timeout": 3600,
"modelCutPolicy": "custom",
"models": {
"deepseekv2": {
"mix_shared_routing": false,
"enable_dispatch_combine_v2": true,
"kv_cache_options": {
"enable_nz": true
},
"ep_level": 2,
"eplb": {
"level": 1,
"expert_map_file": "/{path_to_eplb_file}/decode_global_deployment.json"
},
"parallel_options": {
"lm_head_local_tp": 16,
"o_proj_local_tp": 2,
"hccl_moe_ep_buffer": 4096,
"hccl_moe_tp_buffer": 64
},
"enable_oproj_prefetch": false,
"enable_mlapo_prefetch": false
}
}
}
]
},
"ScheduleConfig": {
"distributedEnable": true,
"maxPrefillBatchSize": 4,
"maxPrefillTokens": 133000,
"maxBatchSize": 32,
"maxIterTimes": 133000,
"maxQueueDelayMicroseconds": 5000
}
}
}
}python3 deploy_ac_job.py --user_config_path user_config_base_A3.jsoncurl -X POST http://xxx:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "m_model",
"messages": [
{
"role": "user",
"content": "你是谁?"
}
],
"max_tokens": 100,
"ignore_eos": false,
"stream": false
}'
curl -X POST http://xxx:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "m_model",
"prompt": "<|begin▁of▁sentence|><|User|>hello<|Assistant|></think>Hello! I am DeepSeek.<|end▁of▁sentence|><|User|>自我介绍<|Assistant|>