DeepSeek-V3.2是一款在高计算效率与卓越推理能力间取得突破性平衡的先进模型,基于以下关键技术突破:
DeepSeek 稀疏注意力(DSA):引入DSA高效的注意力机制,它显著降低了计算复杂性,同时保持了模型性能,特别针对长上下文场景进行了优化。
可扩展的强化学习框架:通过实施强大的RL协议并扩展后训练计算,DeepSeek-V3.2的表现与GPT-5相当。
w8a8权重下载链接:https://modelers.cn/models/Eco-Tech/DeepSeek-V3.2-w8a8-QuaRot/
| 组件 | 版本 |
|---|---|
| 硬件环境 | 910B(16卡) |
| 组件 | 版本 |
|---|---|
| sglang | main分支 |
| HDK | Ascend HDK 25.2.1 |
| CANN | 8.3.RC1 |
| 模型 | DeepSeek V3.2 |
ARG CANN_VERSION=8.3.rc1
ARG DEVICE_TYPE=a3
ARG OS=ubuntu22.04
ARG PYTHON_VERSION=py3.11
FROM quay.io/ascend/cann:$CANN_VERSION-$DEVICE_TYPE-$OS-$PYTHON_VERSION
# Update pip & apt sources
ARG PIP_INDEX_URL="https://pypi.org/simple/"
ARG APTMIRROR=""
ARG PYTORCH_VERSION="2.8.0"
ARG TORCHVISION_VERSION="0.23.0"
ARG PTA_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/torch_npu/torch_npu-2.8.0.post2.dev20251113-cp311-cp311-manylinux_2_28_aarch64.whl"
ARG TRITON_ASCEND_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/triton_ascend/triton_ascend-3.2.0.dev2025112116-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
ARG BISHENG_NAME="Ascend-BiSheng-toolkit_aarch64_20251121.run"
ARG BISHENG_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/triton_ascend/${BISHENG_NAME}"
ARG SGLANG_TAG=main
ARG ASCEND_CANN_PATH=/usr/local/Ascend/ascend-toolkit
ARG SGLANG_KERNEL_NPU_TAG=main
ARG PIP_INSTALL="python3 -m pip install --no-cache-dir"
ARG DEVICE_TYPE
WORKDIR /workspace
# Define environments
ENV DEBIAN_FRONTEND=noninteractive
RUN pip config set global.index-url $PIP_INDEX_URL
RUN if [ -n "$APTMIRROR" ];then sed -i "s|.*.ubuntu.com|$APTMIRROR|g" /etc/apt/sources.list ;fi
# Install development tools and utilities
RUN apt-get update -y && apt upgrade -y && apt-get install -y \
build-essential \
cmake \
vim \
wget \
curl \
net-tools \
zlib1g-dev \
lld \
clang \
locales \
ccache \
openssl \
libssl-dev \
pkg-config \
ca-certificates \
&& rm -rf /var/cache/apt/* \
&& rm -rf /var/lib/apt/lists/* \
&& update-ca-certificates \
&& locale-gen en_US.UTF-8
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US:en
ENV LC_ALL=en_US.UTF-8
### Install MemFabric
RUN ${PIP_INSTALL} mf-adapter==1.0.0
### Install SGLang Model Gateway
RUN ${PIP_INSTALL} sglang-router
### Install PyTorch and PTA
RUN (${PIP_INSTALL} torch==${PYTORCH_VERSION} torchvision==${TORCHVISION_VERSION} --index-url https://download.pytorch.org/whl/cpu) \
&& (${PIP_INSTALL} ${PTA_URL})
# TODO: install from pypi released triton-ascend
RUN (${PIP_INSTALL} pybind11) \
&& (${PIP_INSTALL} ${TRITON_ASCEND_URL})
# Install SGLang
RUN git clone https://github.com/sgl-project/sglang --branch $SGLANG_TAG && \
(cd sglang/python && rm -rf pyproject.toml && mv pyproject_other.toml pyproject.toml && ${PIP_INSTALL} -v .[srt_npu]) && \
rm -rf sglang
# Install Deep-ep
# pin wheel to 0.45.1 ref: https://github.com/pypa/wheel/issues/662
RUN ${PIP_INSTALL} wheel==0.45.1 && git clone --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \
&& export LD_LIBRARY_PATH=${ASCEND_CANN_PATH}/latest/runtime/lib64/stub:$LD_LIBRARY_PATH && \
source ${ASCEND_CANN_PATH}/set_env.sh && \
cd sgl-kernel-npu && \
bash build.sh \
&& ${PIP_INSTALL} output/deep_ep*.whl output/sgl_kernel_npu*.whl \
&& cd .. && rm -rf sgl-kernel-npu \
&& cd "$(python3 -m pip show deep-ep | awk '/^Location:/ {print $2}')" && ln -s deep_ep/deep_ep_cpp*.so
# Install CustomOps
RUN wget https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/ops/CANN-custom_ops-8.2.0.0-$DEVICE_TYPE-linux.aarch64.run && \
chmod a+x ./CANN-custom_ops-8.2.0.0-$DEVICE_TYPE-linux.aarch64.run && \
./CANN-custom_ops-8.2.0.0-$DEVICE_TYPE-linux.aarch64.run --quiet --install-path=/usr/local/Ascend/ascend-toolkit/latest/opp && \
wget https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/ops/custom_ops-1.0.$DEVICE_TYPE-cp311-cp311-linux_aarch64.whl && \
${PIP_INSTALL} ./custom_ops-1.0.$DEVICE_TYPE-cp311-cp311-linux_aarch64.whl
# Install Bisheng
RUN wget -O "${BISHENG_NAME}" "${BISHENG_URL}" && chmod a+x "${BISHENG_NAME}" && "./${BISHENG_NAME}" --install && rm "${BISHENG_NAME}"
CMD ["/bin/bash"]
注意DockerFile内需要更新两个软件包:
https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com:443/ops/CANN-custom_ops-8.3.0.1-910b-linux.aarch64.run?AccessKeyId=HPUAXT4YM0U8JNTERLST&Expires=1795868352&Signature=fpkhjfHGDNvJviVY3ezAJeavx%2BU%3D
https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com:443/ops/custom_ops-2.0.910b-cp311-cp311-linux_aarch64.whl?AccessKeyId=HPUAXT4YM0U8JNTERLST&Expires=1795868372&Signature=tYX1oA3J0NigpLYkLOkRSTBC9lY%3Ddocker run -itd --privileged --name=sglang-test --net=host \
--shm-size 500g \
--device=/dev/davinci0 \
--device=/dev/davinci1 \
--device=/dev/davinci2 \
--device=/dev/davinci3 \
--device=/dev/davinci4 \
--device=/dev/davinci5 \
--device=/dev/davinci6 \
--device=/dev/davinci7 \
--device=/dev/davinci_manager \
--device=/dev/hisi_hdc \
--device /dev/devmm_svm \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \
-v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \
-v /usr/local/sbin:/usr/local/sbin \
-v /etc/hccn.conf:/etc/hccn.conf \
-v /home:/home \
-v /disk1:/disk1 \
-v /disk2:/disk2 \
-v /disk3:/disk3 \
-v /opt:/opt \
-v /home:/home \
--entrypoint /bin/bash sg-langxxx节点1启动:
节点1:
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export STREAMS_PER_DEVICE=32
export DEEP_NORMAL_MODE_USE_INT8_QUANT=1
export SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=32
export HCCL_BUFFSIZE=200
export SGLANG_ENABLE_SPEC_V2=1
export SGLANG_ENABLE_OVERLAP_PLAN_STREAM=1
export SGLANG_NPU_USE_MLAPO=1
export SGLANG_USE_FIA_NZ=1
export ENABLE_MOE_NZ=1
python3 -m sglang.launch_server \
--model-path /xxx/ds-v3.2 \
--tp 16 \
--trust-remote-code \
--attention-backend ascend \
--device npu \
--quantization w8a8_int8 \
--watchdog-timeout 9000 \
--host XXX1 \
--port 6688 \
--cuda-graph-max-bs 64 \
--mem-fraction-static 0.8 \
--max-running-requests 128 \
--context-length 32000 \
--disable-radix-cache \
--chunked-prefill-size -1 \
--max-prefill-tokens 32000 \
--max-total-tokens 32000 \
--enable-dp-attention \
--dp-size 1 \
--enable-dp-lm-head \
--speculative-algorithm NEXTN \
--speculative-num-steps 3 \
--speculative-eagle-topk 1 \
--speculative-num-draft-tokens 4 \
--dtype bfloat16 \
--dist-init-addr XXX1:8891 \
--tool-call-parser deepseekv32 \
--reasoning-parser deepseek-v3 \
--nnodes 2 \
--node-rank 0节点2:
export STREAMS_PER_DEVICE=32
export DEEP_NORMAL_MODE_USE_INT8_QUANT=1
export SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=32
export HCCL_BUFFSIZE=200
export SGLANG_ENABLE_SPEC_V2=1
export SGLANG_ENABLE_OVERLAP_PLAN_STREAM=1
export SGLANG_NPU_USE_MLAPO=1
export SGLANG_USE_FIA_NZ=1
export ENABLE_MOE_NZ=1
python3 -m sglang.launch_server \
--model-path /xxx/ds-v3.2 \
--tp 16 \
--trust-remote-code \
--attention-backend ascend \
--device npu \
--quantization w8a8_int8 \
--watchdog-timeout 9000 \
--host XXX2 \
--port 6688 \
--cuda-graph-max-bs 64 \
--mem-fraction-static 0.8 \
--max-running-requests 128 \
--context-length 32000 \
--disable-radix-cache \
--chunked-prefill-size -1 \
--max-prefill-tokens 32000 \
--max-total-tokens 32000 \
--enable-dp-attention \
--dp-size 1 \
--enable-dp-lm-head \
--speculative-algorithm NEXTN \
--speculative-num-steps 3 \
--speculative-eagle-topk 1 \
--speculative-num-draft-tokens 4 \
--dtype bfloat16 \
--dist-init-addr XXX1:8891 \
--tool-call-parser deepseekv32 \
--reasoning-parser deepseek-v3 \
--nnodes 2 \
--node-rank 1curl -X POST http:/xxx/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "xxx",
"messages": [
{
"role": "user",
"content": "你是谁?"
}
],
"max_tokens": 100,
"ignore_eos": false,
"stream": false
}'解决方案: 更新两个软件包:
https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com:443/ops/CANN-custom_ops-8.3.0.1-910b-linux.aarch64.run?AccessKeyId=HPUAXT4YM0U8JNTERLST&Expires=1795868352&Signature=fpkhjfHGDNvJviVY3ezAJeavx%2BU%3D
https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com:443/ops/custom_ops-2.0.910b-cp311-cp311-linux_aarch64.whl?AccessKeyId=HPUAXT4YM0U8JNTERLST&Expires=1795868372&Signature=tYX1oA3J0NigpLYkLOkRSTBC9lY%3DAttr commAlg 无效,当前仅支持 fullmesh_v1 和 fullmesh_v2,但获取到的 commAlg = fullmesh。 解决方案: 暂时关闭deepep,命令中删除:
--moe-a2a-backend deepep \
--deepep-mode auto \deepep问题待解决。
DeepSeek-V3.2是一款在高计算效率与卓越推理能力间取得突破性平衡的先进模型,基于以下关键技术突破:
DeepSeek 稀疏注意力(DSA):引入DSA高效的注意力机制,它显著降低了计算复杂性,同时保持了模型性能,特别针对长上下文场景进行了优化。
可扩展的强化学习框架:通过实施强大的 RL 协议并扩展后训练计算,DeepSeek-V3.2 的表现与 GPT-5 相当。
w8a8权重下载链接:https://modelers.cn/models/Eco-Tech/DeepSeek-V3.2-w8a8-QuaRot/
| 组件 | 版本 |
|---|---|
| 硬件环境 | 910B(16卡) |
| 组件 | 版本 |
|---|---|
| MindIE | 2.2.T32 |
| HDK | Ascend HDK 25.2.1 |
| CANN | 8.3.RC1 |
| 模型 | DeepSeek V3.2 |
https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48fdocker run -itd --privileged --name=mindie-test --net=host \
--shm-size 500g \
--device=/dev/davinci0 \
--device=/dev/davinci1 \
--device=/dev/davinci2 \
--device=/dev/davinci3 \
--device=/dev/davinci4 \
--device=/dev/davinci5 \
--device=/dev/davinci6 \
--device=/dev/davinci7 \
--device=/dev/davinci_manager \
--device=/dev/hisi_hdc \
--device /dev/devmm_svm \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \
-v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \
-v /usr/local/sbin:/usr/local/sbin \
-v /etc/hccn.conf:/etc/hccn.conf \
-v /home:/home \
-v /disk1:/disk1 \
-v /disk2:/disk2 \
-v /disk3:/disk3 \
-v /opt:/opt \
-v /home:/home \
--entrypoint /bin/bash mindiexxx{
"version": "1.0",
"server_count": "2",
"server_list": [
{
"server_id": "xxx",
"container_ip": "xxx",
"device": [
{
"device_id": "0",
"device_ip": "xxx",
"rank_id": "0"
},
{
"device_id": "1",
"device_ip": "xxx",
"rank_id": "1"
},
{
"device_id": "2",
"device_ip": "xxx",
"rank_id": "2"
},
{
"device_id": "3",
"device_ip": "xxx",
"rank_id": "3"
},
{
"device_id": "4",
"device_ip": "xxx",
"rank_id": "4"
},
{
"device_id": "5",
"device_ip": "xxx",
"rank_id": "5"
},
{
"device_id": "6",
"device_ip": "xxx",
"rank_id": "6"
},
{
"device_id": "7",
"device_ip": "xxx",
"rank_id": "7"
}
],
"host_nic_ip": "reserve"
},
{
"server_id": "xxx",
"container_ip": "xxx",
"device": [
{
"device_id": "0",
"device_ip": "xxx",
"rank_id": "8"
},
{
"device_id": "1",
"device_ip": "xxx",
"rank_id": "9"
},
{
"device_id": "2",
"device_ip": "xxx",
"rank_id": "10"
},
{
"device_id": "3",
"device_ip": "xxx",
"rank_id": "11"
},
{
"device_id": "4",
"device_ip": "xxx",
"rank_id": "12"
},
{
"device_id": "5",
"device_ip": "xxx",
"rank_id": "13"
},
{
"device_id": "6",
"device_ip": "xxx",
"rank_id": "14"
},
{
"device_id": "7",
"device_ip": "xxx",
"rank_id": "15"
}
],
"host_nic_ip": "reserve"
}
],
"status": "completed"
}供参考:
{
"BackendConfig": {
"ModelDeployConfig": {
"ModelConfig": [
{
"backendType": "atb",
"cpuMemSize": 5,
"dp": 1,
"enable_warmup_with_sampling": false,
"ignore_eos": true,
"modelInstanceType": "Standard",
"modelName": "m_model",
"modelWeightPath": "/disk2/models/dsv3.2",
"models": {
"deepseekv2": {
"enable_mlapo_prefetch": true,
"kv_cache_options": {
"enable_nz": true
}
}
},
"moe_ep": 1,
"moe_tp": 16,
"npuMemSize": 4,
"plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
"sp": 1,
"tp": 16,
"trustRemoteCode": false,
"worldSize": 8
}
],
"maxInputTokenLen": 8384,
"maxSeqLen": 8384,
"truncation": false
},
"ScheduleConfig": {
"cacheBlockSize": 128,
"decodePolicyType": 0,
"decodeTimeMsPerReq": 50,
"maxBatchSize": 200,
"maxIterTimes": 8384,
"maxPreemptCount": 0,
"maxPrefillBatchSize": 1,
"maxPrefillTokens": 8384,
"maxQueueDelayMicroseconds": 5000,
"prefillPolicyType": 0,
"prefillTimeMsPerReq": 150,
"supportSelectBatch": false,
"templateName": "Standard_LLM",
"templateType": "Standard"
},
"backendName": "mindieservice_llm_engine",
"interNodeKmcKsfMaster": "tools/pmt/master/ksfa",
"interNodeKmcKsfStandby": "tools/pmt/standby/ksfb",
"interNodeTLSEnabled": false,
"interNodeTlsCaFiles": [
"ca.pem"
],
"interNodeTlsCaPath": "security/grpc/ca/",
"interNodeTlsCert": "security/grpc/certs/server.pem",
"interNodeTlsCrlFiles": [
"server_crl.pem"
],
"interNodeTlsCrlPath": "security/grpc/certs/",
"interNodeTlsPk": "security/grpc/keys/server.key.pem",
"interNodeTlsPkPwd": "security/grpc/pass/mindie_server_key_pwd.txt",
"modelInstanceNumber": 1,
"multiNodesInferEnabled": true,
"multiNodesInferPort": 1120,
"npuDeviceIds": [
[
0,
1,
2,
3,
4,
5,
6,
7
]
],
"tokenizerProcessNumber": 8
},
"LogConfig": {
"dynamicLogLevel": "",
"dynamicLogLevelValidHours": 2,
"dynamicLogLevelValidTime": ""
},
"ServerConfig": {
"allowAllZeroIpListening": false,
"distDPServerEnabled": false,
"e2eTimeout": 3600,
"fullTextEnabled": false,
"httpsEnabled": false,
"inferMode": "standard",
"interCommPk": "security/grpc/keys/server.key.pem",
"interCommPkPwd": "security/grpc/pass/key_pwd.txt",
"interCommPort": 1121,
"interCommTLSEnabled": false,
"interCommTlsCaFiles": [
"ca.pem"
],
"interCommTlsCaPath": "security/grpc/ca/",
"interCommTlsCert": "security/grpc/certs/server.pem",
"interCommTlsCrlFiles": [
"server_crl.pem"
],
"interCommTlsCrlPath": "security/grpc/certs/",
"ipAddress": "此次为MASTER_IP",
"kmcKsfMaster": "tools/pmt/master/ksfa",
"kmcKsfStandby": "tools/pmt/standby/ksfb",
"managementIpAddress": "127.0.0.2",
"managementPort": 1026,
"managementTlsCaFile": [
"management_ca.pem"
],
"managementTlsCert": "security/certs/management/server.pem",
"managementTlsCrlFiles": [
"server_crl.pem"
],
"managementTlsCrlPath": "security/management/certs/",
"managementTlsPk": "security/keys/management/server.key.pem",
"managementTlsPkPwd": "security/pass/management/key_pwd.txt",
"maxLinkNum": 1000,
"metricsPort": 1027,
"openAiSupport": "vllm",
"port": 8080,
"tlsCaFile": [
"ca.pem"
],
"tlsCaPath": "security/ca/",
"tlsCert": "security/certs/server.pem",
"tlsCrlFiles": [
"server_crl.pem"
],
"tlsCrlPath": "security/certs/",
"tlsPk": "security/keys/server.key.pem",
"tlsPkPwd": "security/pass/key_pwd.txt",
"tokenTimeout": 3600
},
"Version": "1.0.0"
}{
"BackendConfig": {
"ModelDeployConfig": {
"ModelConfig": [
{
"backendType": "atb",
"cpuMemSize": 5,
"dp": 1,
"enable_warmup_with_sampling": false,
"ignore_eos": true,
"modelInstanceType": "Standard",
"modelName": "m_model",
"modelWeightPath": "/disk2/models/dsv3.2",
"models": {
"deepseekv2": {
"enable_mlapo_prefetch": true,
"kv_cache_options": {
"enable_nz": true
}
}
},
"moe_ep": 1,
"moe_tp": 16,
"npuMemSize": 4,
"plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
"sp": 1,
"tp": 16,
"trustRemoteCode": false,
"worldSize": 8
}
],
"maxInputTokenLen": 8384,
"maxSeqLen": 8384,
"truncation": false
},
"ScheduleConfig": {
"cacheBlockSize": 128,
"decodePolicyType": 0,
"decodeTimeMsPerReq": 50,
"maxBatchSize": 200,
"maxIterTimes": 8384,
"maxPreemptCount": 0,
"maxPrefillBatchSize": 1,
"maxPrefillTokens": 8384,
"maxQueueDelayMicroseconds": 5000,
"prefillPolicyType": 0,
"prefillTimeMsPerReq": 150,
"supportSelectBatch": false,
"templateName": "Standard_LLM",
"templateType": "Standard"
},
"backendName": "mindieservice_llm_engine",
"interNodeKmcKsfMaster": "tools/pmt/master/ksfa",
"interNodeKmcKsfStandby": "tools/pmt/standby/ksfb",
"interNodeTLSEnabled": false,
"interNodeTlsCaFiles": [
"ca.pem"
],
"interNodeTlsCaPath": "security/grpc/ca/",
"interNodeTlsCert": "security/grpc/certs/server.pem",
"interNodeTlsCrlFiles": [
"server_crl.pem"
],
"interNodeTlsCrlPath": "security/grpc/certs/",
"interNodeTlsPk": "security/grpc/keys/server.key.pem",
"interNodeTlsPkPwd": "security/grpc/pass/mindie_server_key_pwd.txt",
"modelInstanceNumber": 1,
"multiNodesInferEnabled": true,
"multiNodesInferPort": 1120,
"npuDeviceIds": [
[
0,
1,
2,
3,
4,
5,
6,
7
]
],
"tokenizerProcessNumber": 8
},
"LogConfig": {
"dynamicLogLevel": "",
"dynamicLogLevelValidHours": 2,
"dynamicLogLevelValidTime": ""
},
"ServerConfig": {
"allowAllZeroIpListening": false,
"distDPServerEnabled": false,
"e2eTimeout": 3600,
"fullTextEnabled": false,
"httpsEnabled": false,
"inferMode": "standard",
"interCommPk": "security/grpc/keys/server.key.pem",
"interCommPkPwd": "security/grpc/pass/key_pwd.txt",
"interCommPort": 1121,
"interCommTLSEnabled": false,
"interCommTlsCaFiles": [
"ca.pem"
],
"interCommTlsCaPath": "security/grpc/ca/",
"interCommTlsCert": "security/grpc/certs/server.pem",
"interCommTlsCrlFiles": [
"server_crl.pem"
],
"interCommTlsCrlPath": "security/grpc/certs/",
"ipAddress": "127.0.0.1",
"kmcKsfMaster": "tools/pmt/master/ksfa",
"kmcKsfStandby": "tools/pmt/standby/ksfb",
"managementIpAddress": "127.0.0.2",
"managementPort": 1026,
"managementTlsCaFile": [
"management_ca.pem"
],
"managementTlsCert": "security/certs/management/server.pem",
"managementTlsCrlFiles": [
"server_crl.pem"
],
"managementTlsCrlPath": "security/management/certs/",
"managementTlsPk": "security/keys/management/server.key.pem",
"managementTlsPkPwd": "security/pass/management/key_pwd.txt",
"maxLinkNum": 1000,
"metricsPort": 1027,
"openAiSupport": "vllm",
"port": 8080,
"tlsCaFile": [
"ca.pem"
],
"tlsCaPath": "security/ca/",
"tlsCert": "security/certs/server.pem",
"tlsCrlFiles": [
"server_crl.pem"
],
"tlsCrlPath": "security/certs/",
"tlsPk": "security/keys/server.key.pem",
"tlsPkPwd": "security/pass/key_pwd.txt",
"tokenTimeout": 3600
},
"Version": "1.0.0"
}修改模型配置文件config.json: "model_type": "deepseek_v3"
source /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/nnal/atb/set_env.sh
source /usr/local/Ascend/atb-models/set_env.sh
source /usr/local/Ascend/mindie/set_env.sh
export MIES_CONTAINER_IP=xxx1
# ranktable.json文件位置
export RANKTABLEFILE=xxx/hccl_2s_16p.json
# 主节点 ip 和端口
export MASTER_IP=xxx1
export MASTER_PORT=8088
# 使能内存池扩展段特性
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
# 提升权重加载速度
export OMP_NUM_THREADS=10
export HCCL_CONNECT_TIMEOUT=7200
export HCCL_HOST_SOCKET_PORT_RANGE=60000-60050
export MINDIE_LOG_LEVEL=INFO
cd /usr/local/Ascend/mindie/latest/mindie-service/
./bin/mindieservice_daemonsource /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/nnal/atb/set_env.sh
source /usr/local/Ascend/atb-models/set_env.sh
source /usr/local/Ascend/mindie/set_env.sh
export MIES_CONTAINER_IP=xxx2
# ranktable.json文件位置
export RANKTABLEFILE=xxx/hccl_2s_16p.json
# 主节点 ip 和端口
export MASTER_IP=xxx1
export MASTER_PORT=8088
# 使能内存池扩展段特性
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
# 提升权重加载速度
export OMP_NUM_THREADS=10
export HCCL_CONNECT_TIMEOUT=7200
export HCCL_HOST_SOCKET_PORT_RANGE=60000-60050
export MINDIE_LOG_LEVEL=INFO
cd /usr/local/Ascend/mindie/latest/mindie-service/
./bin/mindieservice_daemon注意:MASTER_PORT不要和config.json内的port冲突。
curl -X POST http://xxx:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "m_model",
"messages": [
{
"role": "user",
"content": "你是谁?"
}
],
"max_tokens": 100,
"ignore_eos": false,
"stream": false
}'
curl -X POST http://xxx:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "m_model",
"prompt": "<|begin▁of▁sentence|><|User|>hello<|Assistant|></think>Hello! I am DeepSeek.<|end▁of▁sentence|><|User|>自我介绍<|Assistant|>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{''}}{%- endif %}{%- endif %}{%- if message['prefix'] is defined and message['prefix'] %}{%- set ns.is_prefix = true -%}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- if message['role'] != 'system' %}{% set ns.is_only_sys = false %}{%- endif %}{%- endfor -%}{% if add_generation_prompt and not ns.is_tool%}{% if ns.is_last_user or ns.is_only_sys or not ns.is_prefix %}{{'<|Assistant|>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'