Ascend-SACT/DeepSeek-v3
模型介绍文件和版本Pull Requests讨论分析
下载使用量0

DeepSeek v3模型在MindIE框架下大EP部署指导

1. 模型概述及场景

DeepSeek-V3是一款拥有6710亿参数的MoE语言模型,单Token激活370亿参数。该模型采用了经过V2版本验证的MLA潜在注意力机制与DeepSeekMoE结构,实现了高吞吐、低消耗的训练过程。其首创无辅助损失负载均衡策略,并引入多Token预测目标,有效提升了模型的泛化能力与生成质量。该模型在14.8T多领域高质量数据上完成预训练,并辅以监督微调与强化学习,综合评测结果超越主流开源模型,性能可对标顶级闭源方案。

w8a8权重下载链接:https://modelers.cn/models/Modelers_Park/DeepSeek-V3-0324-w8a8-function_call

2. 准备运行环境

2.1 硬件版本

组件版本
硬件环境910C(64卡)

2.2 软件版本

组件版本
MindIE2.2.RC1
HDKAscend HDK 25.2.3
CANN8.3.RC2
模型DeepSeek V3

3. 运行指导

3.1 获取镜像

https://www.hiascend.com/developer/download/community/result?module=ie+pt+cann

3.2 获取大EP部署脚本

3.2.1 下载镜像,将镜像传到共享存储中,所有节点加载镜像

docker load -i mindie_dev-2.2.RC1.B150-800I-A3-py311-ubuntu22.04-aarch64.tar.gz

3.2.2 主节点创建容器

bash start_docker.sh <image id> <container name>

start_docker.sh如下:

IMAGES_ID=$1
NAME=$2
if [ $# -ne 2 ]; then
    echo "error: need one argument describing your container name."
    exit 1
fi
docker run --name ${NAME} -it -d --net=host --shm-size=500g \
    --privileged=true \
    -w /home \
    --device=/dev/davinci_manager \
    --device=/dev/hisi_hdc \
    --device=/dev/devmm_svm \
    --entrypoint=bash \
    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
    -v /usr/local/dcmi:/usr/local/dcmi \
    -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
    -v /usr/local/sbin:/usr/local/sbin \
    -v /home:/home \
    -v /data:/data \
    -v /mnt:/mnt \
    -v /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime \
    ${IMAGES_ID}

3.2.3 进入容器,将部署脚本拷贝到裸机中

docker exec -itu root <container name> bash
cd /usr/local/Ascend/mindie/latest/mindie-service/examples/
cp -r ./kubernetes_deploy_scripts <your path>

3.3 修改部署脚本

user_config_base_A3.json修改(以下脚本可直接用于性能测试)  user_config文件可直接在附件中直接下载,注意修改各个文件路径。

3.3.1 user_config.json_16k配置:

{
  "version": "v1.0",
  "deploy_config": {
    "p_instances_num": 4,
    "d_instances_num": 1,
    "single_p_instance_pod_num": 1,
    "single_d_instance_pod_num": 4,
    "p_pod_npu_num": 16,
    "d_pod_npu_num": 16,
    "p_instances_scale_num": 0,
    "d_instances_scale_num": 0,
    "model_id": "mindie_20251105150806",
    "prefill_distribute_enable": 0,
    "decode_distribute_enable": 1,
    "image_name": "mindie:dev-2.2.RC1.B150-800I-A3-py311-ubuntu22.04-aarch64",
    "job_id": "mindie",
    "hardware_type": "800I_A3",
    "mindie_env_path": "./conf/mindie_env_a3.json",
    "mindie_host_log_path": "/home/log/ascend_log",
    "mindie_container_log_path": "/root/mindie",
    "weight_mount_path": "{weight_path}",
    "coordinator_backup_cfg": {
      "function_enable": false
    },
    "controller_backup_cfg": {
      "function_sw": false
    },
    "deploy_mount_path": {
      "ms_controller_mount": {
        "${user_define_host_path}": "${user_define_container_path}"
      },
      "ms_coordinator_mount": {
        "${user_define_host_path}": "${user_define_container_path}"
      },
      "prefill_server_mount": {
        "${user_define_host_path1}": "${user_define_container_path}",
        "${user_define_host_path2}": "${user_define_container_path}"
      },
      "decode_server_mount": {
        "${user_define_host_path1}": "${user_define_container_path}",
        "${user_define_host_path2}": "${user_define_container_path}"
      }
    },
    "tls_config": {
      "tls_enable": false,
      "kmc_ksf_master": "./security/master/tools/pmt/master/ksfa",
      "kmc_ksf_standby": "./security/standby/tools/pmt/standby/ksfb",
      "infer_tls_enable": false,
      "infer_tls_items": {
        "ca_cert": "./security/infer/security/certs/ca.pem",
        "tls_cert": "./security/infer/security/certs/cert.pem",
        "tls_key": "./security/infer/security/keys/cert.key.pem",
        "tls_passwd": "./security/infer/security/pass/key_pwd.txt",
        "tls_crl": "infer"
      },
      "management_tls_enable": false,
      "management_tls_items": {
        "ca_cert": "./security/management/security/certs/ca.pem",
        "tls_cert": "./security/management/security/certs/cert.pem",
        "tls_key": "./security/management/security/keys/cert.key.pem",
        "tls_passwd": "./security/management/security/pass/key_pwd.txt",
        "tls_crl": "management"
      },
      "ccae_tls_enable": false,
      "ccae_tls_items": {
        "ca_cert": "./security/ccae/security/certs/ca.pem",
        "tls_cert": "./security/ccae/security/certs/cert.pem",
        "tls_key": "./security/ccae/security/keys/cert.key.pem",
        "tls_passwd": "./security/ccae/security/pass/key_pwd.txt",
        "tls_crl": "ccae"
      },
      "cluster_tls_enable": false,
      "cluster_tls_items": {
        "ca_cert": "./security/clusterd/security/certs/ca.pem",
        "tls_cert": "./security/clusterd/security/certs/cert.pem",
        "tls_key": "./security/clusterd/security/keys/cert.key.pem",
        "tls_passwd": "./security/clusterd/security/pass/key_pwd.txt",
        "tls_crl": "clusterd"
      },
      "etcd_server_tls_enable": false,
      "etcd_server_tls_items": {
        "ca_cert": "./security/etcd_server/security/certs/ca.pem",
        "tls_cert": "./security/etcd_server/security/certs/cert.pem",
        "tls_key": "./security/etcd_server/security/keys/cert.key.pem",
        "tls_passwd": "./security/etcd_server/security/pass/key_pwd.txt",
        "kmc_ksf_master": "./security/etcd_server/tools/pmt/master/ksfa",
        "kmc_ksf_standby": "./security/etcd_server/tools/pmt/standby/ksfb",
        "tls_crl": ""
      }
    }
  },
  "mindie_ms_controller_config": {
    "deploy_mode": "pd_separate",
    "digs_prefill_slo": 1000,
    "digs_decode_slo": 50,
    "multi_node_infer_config": {
      "multi_node_infer_enable": true
    }
  },
  "mindie_ms_coordinator_config": {
    "http_config": {
      "predict_ip": "127.0.0.1",
      "predict_port": "1025",
      "manage_ip": "127.0.0.1",
      "manage_port": "1026",
      "alarm_port": "1027",
      "server_thread_num": 10,
      "client_thread_num": 10,
      "http_timeout_seconds": 600,
      "keep_alive_seconds": 3600
    },
    "request_limit": {
      "single_node_max_requests": 4096,
      "max_requests": 90000
    },
    "exception_config": {
      "first_token_timeout": 3600,
      "schedule_timeout": 3600,
      "infer_timeout": 65535
    }
  },
  "mindie_server_prefill_config": {
    "ServerConfig": {
      "maxLinkNum": 4096,
      "inferMode": "dmi",
      "tokenTimeout": 3600,
      "e2eTimeout": 65535,
      "distDPServerEnabled": false
    },
    "BackendConfig": {
      "npuDeviceIds": [
        [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10,
          11,
          12,
          13,
          14,
          15
        ]
      ],
      "tokenizerProcessNumber": 1,
      "multiNodesInferEnabled": false,
      "kvPoolConfig": {
        "backend": "",
        "configPath": ""
      },
      "ModelDeployConfig": {
        "maxSeqLen": 18000,
        "maxInputTokenLen": 18000,
        "ModelConfig": [
          {
            "modelInstanceType": "Standard",
            "modelName": "{model_name}",
            "modelWeightPath": "{weight_path}",
            "worldSize": 16,
            "cpuMemSize": 5,
            "npuMemSize": -1,
            "backendType": "atb",
            "trustRemoteCode": false,
            "dp": 2,
            "cp": 1,
            "tp": 8,
            "sp": 1,
            "moe_ep": 16,
            "pp": 1,
            "moe_tp": 1,
            "kv_link_timeout": 1080,
            "modelCutPolicy": "custom",
            "plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
            "models": {
              "deepseekv2": {
                "eplb": {
                  "level": 1,
                  "expert_map_file": "/{path_to_eplb_file}/gsm8k_prefill_1nodep.json"
                },
                "kv_cache_options": {
                  "enable_nz": true
                },
                "ep_level": 2
              }
            }
          }
        ]
      },
      "ScheduleConfig": {
        "distributedEnable": false,
        "maxPrefillBatchSize": 16,
        "maxPrefillTokens": 18000,
        "dpScheduling": true
      }
    }
  },
  "mindie_server_decode_config": {
    "ServerConfig": {
      "maxLinkNum": 256,
      "fullTextEnabled": false,
      "inferMode": "dmi",
      "tokenTimeout": 3600,
      "e2eTimeout": 65535,
      "distDPServerEnabled": true
    },
    "BackendConfig": {
      "npuDeviceIds": [
        [
          0
        ]
      ],
      "tokenizerProcessNumber": 1,
      "multiNodesInferEnabled": false,
      "kvPoolConfig": {
        "backend": "",
        "configPath": ""
      },
      "ModelDeployConfig": {
        "maxSeqLen": 18000,
        "maxInputTokenLen": 18000,
        "truncation": false,
        "ModelConfig": [
          {
            "modelInstanceType": "Standard",
            "modelName": "{model_name}",
            "modelWeightPath": "{weight_path}",
            "worldSize": 1,
            "cpuMemSize": 5,
            "npuMemSize": -1,
            "backendType": "atb",
            "trustRemoteCode": false,
            "dp": 64,
            "cp": 1,
            "tp": 1,
            "sp": 1,
            "moe_ep": 64,
            "pp": 1,
            "moe_tp": 1,
            "kv_trans_timeout": 10,
            "kv_link_timeout": 3600,
            "modelCutPolicy": "custom",
            "plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
            "models": {
              "deepseekv2": {
                "mix_shared_routing": false,
                "enable_dispatch_combine_v2": true,
                "kv_cache_options": {
                  "enable_nz": true
                },
                "ep_level": 2,
                "eplb": {
                  "level": 1,
                  "expert_map_file": "/{path_to_eplb_file}/decode_global_deployment.json"
                },
                "parallel_options": {
                  "lm_head_local_tp": 16,
                  "o_proj_local_tp": 2,
                  "hccl_moe_ep_buffer": 4096,
                  "hccl_moe_tp_buffer": 64
                },
                "enable_oproj_prefetch": false,
                "enable_mlapo_prefetch": false
              }
            }
          }
        ]
      },
      "ScheduleConfig": {
        "distributedEnable": true,
        "maxPrefillBatchSize": 4,
        "maxPrefillTokens": 18000,
        "maxBatchSize": 40,
        "maxIterTimes": 18000,
        "maxQueueDelayMicroseconds": 5000
      }
    }
  }
}

3.3.2 user_config.json_64k配置:

{
  "version": "v1.0",
  "deploy_config": {
    "p_instances_num": 4,
    "d_instances_num": 1,
    "single_p_instance_pod_num": 1,
    "single_d_instance_pod_num": 4,
    "p_pod_npu_num": 16,
    "d_pod_npu_num": 16,
    "p_instances_scale_num": 0,
    "d_instances_scale_num": 0,
    "model_id": "mindie_20251105150806",
    "prefill_distribute_enable": 0,
    "decode_distribute_enable": 1,
    "image_name": "mindie:dev-2.2.RC1.B150-800I-A3-py311-ubuntu22.04-aarch64",
    "job_id": "mindie",
    "hardware_type": "800I_A3",
    "mindie_env_path": "./conf/mindie_env_a3.json",
    "mindie_host_log_path": "/home/log/ascend_log",
    "mindie_container_log_path": "/root/mindie",
    "weight_mount_path": "{weight_path}",
    "coordinator_backup_cfg": {
      "function_enable": false
    },
    "controller_backup_cfg": {
      "function_sw": false
    },
    "deploy_mount_path": {
      "ms_controller_mount": {
        "${user_define_host_path}": "${user_define_container_path}"
      },
      "ms_coordinator_mount": {
        "${user_define_host_path}": "${user_define_container_path}"
      },
      "prefill_server_mount": {
        "${user_define_host_path1}": "${user_define_container_path}",
        "${user_define_host_path2}": "${user_define_container_path}"
      },
      "decode_server_mount": {
        "${user_define_host_path1}": "${user_define_container_path}",
        "${user_define_host_path2}": "${user_define_container_path}"
      }
    },
    "tls_config": {
      "tls_enable": false,
      "kmc_ksf_master": "./security/master/tools/pmt/master/ksfa",
      "kmc_ksf_standby": "./security/standby/tools/pmt/standby/ksfb",
      "infer_tls_enable": false,
      "infer_tls_items": {
        "ca_cert": "./security/infer/security/certs/ca.pem",
        "tls_cert": "./security/infer/security/certs/cert.pem",
        "tls_key": "./security/infer/security/keys/cert.key.pem",
        "tls_passwd": "./security/infer/security/pass/key_pwd.txt",
        "tls_crl": "infer"
      },
      "management_tls_enable": false,
      "management_tls_items": {
        "ca_cert": "./security/management/security/certs/ca.pem",
        "tls_cert": "./security/management/security/certs/cert.pem",
        "tls_key": "./security/management/security/keys/cert.key.pem",
        "tls_passwd": "./security/management/security/pass/key_pwd.txt",
        "tls_crl": "management"
      },
      "ccae_tls_enable": false,
      "ccae_tls_items": {
        "ca_cert": "./security/ccae/security/certs/ca.pem",
        "tls_cert": "./security/ccae/security/certs/cert.pem",
        "tls_key": "./security/ccae/security/keys/cert.key.pem",
        "tls_passwd": "./security/ccae/security/pass/key_pwd.txt",
        "tls_crl": "ccae"
      },
      "cluster_tls_enable": false,
      "cluster_tls_items": {
        "ca_cert": "./security/clusterd/security/certs/ca.pem",
        "tls_cert": "./security/clusterd/security/certs/cert.pem",
        "tls_key": "./security/clusterd/security/keys/cert.key.pem",
        "tls_passwd": "./security/clusterd/security/pass/key_pwd.txt",
        "tls_crl": "clusterd"
      },
      "etcd_server_tls_enable": false,
      "etcd_server_tls_items": {
        "ca_cert": "./security/etcd_server/security/certs/ca.pem",
        "tls_cert": "./security/etcd_server/security/certs/cert.pem",
        "tls_key": "./security/etcd_server/security/keys/cert.key.pem",
        "tls_passwd": "./security/etcd_server/security/pass/key_pwd.txt",
        "kmc_ksf_master": "./security/etcd_server/tools/pmt/master/ksfa",
        "kmc_ksf_standby": "./security/etcd_server/tools/pmt/standby/ksfb",
        "tls_crl": ""
      }
    }
  },
  "mindie_ms_controller_config": {
    "deploy_mode": "pd_separate",
    "digs_prefill_slo": 1000,
    "digs_decode_slo": 50,
    "multi_node_infer_config": {
      "multi_node_infer_enable": true
    }
  },
  "mindie_ms_coordinator_config": {
    "http_config": {
      "predict_ip": "127.0.0.1",
      "predict_port": "1025",
      "manage_ip": "127.0.0.1",
      "manage_port": "1026",
      "alarm_port": "1027",
      "server_thread_num": 10,
      "client_thread_num": 10,
      "http_timeout_seconds": 600,
      "keep_alive_seconds": 3600
    },
    "request_limit": {
      "single_node_max_requests": 4096,
      "max_requests": 90000
    },
    "exception_config": {
      "first_token_timeout": 3600,
      "schedule_timeout": 3600,
      "infer_timeout": 65535
    }
  },
  "mindie_server_prefill_config": {
    "ServerConfig": {
      "maxLinkNum": 4096,
      "inferMode": "dmi",
      "tokenTimeout": 3600,
      "e2eTimeout": 65535,
      "distDPServerEnabled": false
    },
    "BackendConfig": {
      "npuDeviceIds": [
        [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10,
          11,
          12,
          13,
          14,
          15
        ]
      ],
      "tokenizerProcessNumber": 1,
      "multiNodesInferEnabled": false,
      "kvPoolConfig": {
        "backend": "",
        "configPath": ""
      },
      "ModelDeployConfig": {
        "maxSeqLen": 67000,
        "maxInputTokenLen": 67000,
        "ModelConfig": [
          {
            "modelInstanceType": "Standard",
            "modelName": "{model_name}",
            "modelWeightPath": "{weight_path}",
            "worldSize": 16,
            "cpuMemSize": 5,
            "npuMemSize": -1,
            "backendType": "atb",
            "trustRemoteCode": false,
            "dp": 1,
            "cp": 2,
            "tp": 8,
            "sp": 8,
            "moe_ep": 16,
            "pp": 1,
            "moe_tp": 1,
            "kv_link_timeout": 1080,
            "modelCutPolicy": "custom",
            "plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
            "models": {
              "deepseekv2": {
                "eplb": {
                  "level": 1,
                  "expert_map_file": "/{path_to_eplb_path}/gsm8k_prefill_1nodep.json"
                },
                "kv_cache_options": {
                  "enable_nz": true
                },
                "ep_level": 2
              }
            }
          }
        ]
      },
      "ScheduleConfig": {
        "distributedEnable": false,
        "maxPrefillBatchSize": 16,
        "maxPrefillTokens": 67000,
        "dpScheduling": true
      }
    }
  },
  "mindie_server_decode_config": {
    "ServerConfig": {
      "maxLinkNum": 256,
      "fullTextEnabled": false,
      "inferMode": "dmi",
      "tokenTimeout": 3600,
      "e2eTimeout": 65535,
      "distDPServerEnabled": true
    },
    "BackendConfig": {
      "npuDeviceIds": [
        [
          0
        ]
      ],
      "tokenizerProcessNumber": 1,
      "multiNodesInferEnabled": false,
      "kvPoolConfig": {
        "backend": "",
        "configPath": ""
      },
      "ModelDeployConfig": {
        "maxSeqLen": 67000,
        "maxInputTokenLen": 67000,
        "truncation": false,
        "ModelConfig": [
          {
            "modelInstanceType": "Standard",
            "modelName": "{model_name}",
            "modelWeightPath": "{weight_path}",
            "worldSize": 1,
            "cpuMemSize": 5,
            "npuMemSize": -1,
            "backendType": "atb",
            "trustRemoteCode": false,
            "dp": 64,
            "cp": 1,
            "tp": 1,
            "sp": 1,
            "moe_ep": 64,
            "pp": 1,
            "moe_tp": 1,
            "kv_trans_timeout": 10,
            "kv_link_timeout": 3600,
            "modelCutPolicy": "custom",
            "plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
            "models": {
              "deepseekv2": {
                "mix_shared_routing": false,
                "enable_dispatch_combine_v2": true,
                "kv_cache_options": {
                  "enable_nz": true
                },
                "ep_level": 2,
                "eplb": {
                  "level": 1,
                  "expert_map_file": "/{path_to_eplb_file}/decode_global_deployment.json"
                },
                "parallel_options": {
                  "lm_head_local_tp": 16,
                  "o_proj_local_tp": 2,
                  "hccl_moe_ep_buffer": 4096,
                  "hccl_moe_tp_buffer": 64
                },
                "enable_oproj_prefetch": false,
                "enable_mlapo_prefetch": false
              }
            }
          }
        ]
      },
      "ScheduleConfig": {
        "distributedEnable": true,
        "maxPrefillBatchSize": 4,
        "maxPrefillTokens": 67000,
        "maxBatchSize": 32,
        "maxIterTimes": 67000,
        "maxQueueDelayMicroseconds": 5000
      }
    }
  }
}

3.3.3 user_config.json_128k配置:

{
  "version": "v1.0",
  "deploy_config": {
    "p_instances_num": 4,
    "d_instances_num": 1,
    "single_p_instance_pod_num": 1,
    "single_d_instance_pod_num": 4,
    "p_pod_npu_num": 16,
    "d_pod_npu_num": 16,
    "p_instances_scale_num": 0,
    "d_instances_scale_num": 0,
    "model_id": "mindie_20251105150806",
    "prefill_distribute_enable": 0,
    "decode_distribute_enable": 1,
    "image_name": "mindie:dev-2.2.RC1.B150-800I-A3-py311-ubuntu22.04-aarch64",
    "job_id": "mindie",
    "hardware_type": "800I_A3",
    "mindie_env_path": "./conf/mindie_env_a3.json",
    "mindie_host_log_path": "/home/log/ascend_log",
    "mindie_container_log_path": "/root/mindie",
    "weight_mount_path": "{weight_path}",
    "coordinator_backup_cfg": {
      "function_enable": false
    },
    "controller_backup_cfg": {
      "function_sw": false
    },
    "deploy_mount_path": {
      "ms_controller_mount": {
        "${user_define_host_path}": "${user_define_container_path}"
      },
      "ms_coordinator_mount": {
        "${user_define_host_path}": "${user_define_container_path}"
      },
      "prefill_server_mount": {
        "${user_define_host_path1}": "${user_define_container_path}",
        "${user_define_host_path2}": "${user_define_container_path}"
      },
      "decode_server_mount": {
        "${user_define_host_path1}": "${user_define_container_path}",
        "${user_define_host_path2}": "${user_define_container_path}"
      }
    },
    "tls_config": {
      "tls_enable": false,
      "kmc_ksf_master": "./security/master/tools/pmt/master/ksfa",
      "kmc_ksf_standby": "./security/standby/tools/pmt/standby/ksfb",
      "infer_tls_enable": false,
      "infer_tls_items": {
        "ca_cert": "./security/infer/security/certs/ca.pem",
        "tls_cert": "./security/infer/security/certs/cert.pem",
        "tls_key": "./security/infer/security/keys/cert.key.pem",
        "tls_passwd": "./security/infer/security/pass/key_pwd.txt",
        "tls_crl": "infer"
      },
      "management_tls_enable": false,
      "management_tls_items": {
        "ca_cert": "./security/management/security/certs/ca.pem",
        "tls_cert": "./security/management/security/certs/cert.pem",
        "tls_key": "./security/management/security/keys/cert.key.pem",
        "tls_passwd": "./security/management/security/pass/key_pwd.txt",
        "tls_crl": "management"
      },
      "ccae_tls_enable": false,
      "ccae_tls_items": {
        "ca_cert": "./security/ccae/security/certs/ca.pem",
        "tls_cert": "./security/ccae/security/certs/cert.pem",
        "tls_key": "./security/ccae/security/keys/cert.key.pem",
        "tls_passwd": "./security/ccae/security/pass/key_pwd.txt",
        "tls_crl": "ccae"
      },
      "cluster_tls_enable": false,
      "cluster_tls_items": {
        "ca_cert": "./security/clusterd/security/certs/ca.pem",
        "tls_cert": "./security/clusterd/security/certs/cert.pem",
        "tls_key": "./security/clusterd/security/keys/cert.key.pem",
        "tls_passwd": "./security/clusterd/security/pass/key_pwd.txt",
        "tls_crl": "clusterd"
      },
      "etcd_server_tls_enable": false,
      "etcd_server_tls_items": {
        "ca_cert": "./security/etcd_server/security/certs/ca.pem",
        "tls_cert": "./security/etcd_server/security/certs/cert.pem",
        "tls_key": "./security/etcd_server/security/keys/cert.key.pem",
        "tls_passwd": "./security/etcd_server/security/pass/key_pwd.txt",
        "kmc_ksf_master": "./security/etcd_server/tools/pmt/master/ksfa",
        "kmc_ksf_standby": "./security/etcd_server/tools/pmt/standby/ksfb",
        "tls_crl": ""
      }
    }
  },
  "mindie_ms_controller_config": {
    "deploy_mode": "pd_separate",
    "digs_prefill_slo": 1000,
    "digs_decode_slo": 50,
    "multi_node_infer_config": {
      "multi_node_infer_enable": true
    }
  },
  "mindie_ms_coordinator_config": {
    "http_config": {
      "predict_ip": "127.0.0.1",
      "predict_port": "1025",
      "manage_ip": "127.0.0.1",
      "manage_port": "1026",
      "alarm_port": "1027",
      "server_thread_num": 10,
      "client_thread_num": 10,
      "http_timeout_seconds": 600,
      "keep_alive_seconds": 3600
    },
    "request_limit": {
      "single_node_max_requests": 4096,
      "max_requests": 90000
    },
    "exception_config": {
      "first_token_timeout": 3600,
      "schedule_timeout": 3600,
      "infer_timeout": 65535
    }
  },
  "mindie_server_prefill_config": {
    "ServerConfig": {
      "maxLinkNum": 4096,
      "inferMode": "dmi",
      "tokenTimeout": 3600,
      "e2eTimeout": 65535,
      "distDPServerEnabled": false
    },
    "BackendConfig": {
      "npuDeviceIds": [
        [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10,
          11,
          12,
          13,
          14,
          15
        ]
      ],
      "tokenizerProcessNumber": 1,
      "multiNodesInferEnabled": false,
      "kvPoolConfig": {
        "backend": "",
        "configPath": ""
      },
      "ModelDeployConfig": {
        "maxSeqLen": 133000,
        "maxInputTokenLen": 133000,
        "ModelConfig": [
          {
            "modelInstanceType": "Standard",
            "modelName": "{model_name}",
            "modelWeightPath": "{weight_path}",
            "worldSize": 16,
            "cpuMemSize": 5,
            "npuMemSize": -1,
            "backendType": "atb",
            "trustRemoteCode": false,
            "dp": 1,
            "cp": 2,
            "tp": 8,
            "sp": 8,
            "moe_ep": 16,
            "pp": 1,
            "moe_tp": 1,
            "kv_link_timeout": 1080,
            "modelCutPolicy": "custom",
            "models": {
              "deepseekv2": {
                "eplb": {
                  "level": 1,
                  "expert_map_file": "/{path_to_eplb_path}/gsm8k_prefill_1nodep.json"
                },
                "kv_cache_options": {
                  "enable_nz": true
                },
                "ep_level": 2
              }
            }
          }
        ]
      },
      "ScheduleConfig": {
        "distributedEnable": false,
        "maxPrefillBatchSize": 16,
        "maxPrefillTokens": 133000,
        "dpScheduling": true
      }
    }
  },
  "mindie_server_decode_config": {
    "ServerConfig": {
      "maxLinkNum": 256,
      "fullTextEnabled": false,
      "inferMode": "dmi",
      "tokenTimeout": 3600,
      "e2eTimeout": 65535,
      "distDPServerEnabled": true
    },
    "BackendConfig": {
      "npuDeviceIds": [
        [
          0
        ]
      ],
      "tokenizerProcessNumber": 1,
      "multiNodesInferEnabled": false,
      "kvPoolConfig": {
        "backend": "",
        "configPath": ""
      },
      "ModelDeployConfig": {
        "maxSeqLen": 133000,
        "maxInputTokenLen": 133000,
        "truncation": false,
        "ModelConfig": [
          {
            "modelInstanceType": "Standard",
            "modelName": "{model_name}",
            "modelWeightPath": "{weight_path}",
            "worldSize": 1,
            "cpuMemSize": 5,
            "npuMemSize": -1,
            "backendType": "atb",
            "trustRemoteCode": false,
            "dp": 64,
            "cp": 1,
            "tp": 1,
            "sp": 1,
            "moe_ep": 64,
            "pp": 1,
            "moe_tp": 1,
            "kv_trans_timeout": 10,
            "kv_link_timeout": 3600,
            "modelCutPolicy": "custom",
            "models": {
              "deepseekv2": {
                "mix_shared_routing": false,
                "enable_dispatch_combine_v2": true,
                "kv_cache_options": {
                  "enable_nz": true
                },
                "ep_level": 2,
                "eplb": {
                  "level": 1,
                  "expert_map_file": "/{path_to_eplb_file}/decode_global_deployment.json"
                },
                "parallel_options": {
                  "lm_head_local_tp": 16,
                  "o_proj_local_tp": 2,
                  "hccl_moe_ep_buffer": 4096,
                  "hccl_moe_tp_buffer": 64
                },
                "enable_oproj_prefetch": false,
                "enable_mlapo_prefetch": false
              }
            }
          }
        ]
      },
      "ScheduleConfig": {
        "distributedEnable": true,
        "maxPrefillBatchSize": 4,
        "maxPrefillTokens": 133000,
        "maxBatchSize": 32,
        "maxIterTimes": 133000,
        "maxQueueDelayMicroseconds": 5000
      }
    }
  }
}

3.3 拉起服务

3.3.1 启动服务

python3 deploy_ac_job.py --user_config_path user_config_base_A3.json

3.3.2 发送curl请求验证

curl -X POST http://xxx:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
    "model": "m_model",
    "messages": [
        {
            "role": "user",
            "content": "你是谁?"
        }
    ],
    "max_tokens": 100,
    "ignore_eos": false,
    "stream": false
}'

curl -X POST http://xxx:8080/v1/completions   -H "Content-Type: application/json"   -d '{
    "model": "m_model",
    "prompt": "<|begin▁of▁sentence|><|User|>hello<|Assistant|></think>Hello! I am DeepSeek.<|end▁of▁sentence|><|User|>自我介绍<|Assistant|>