Whisper 是一种最先进的自动语音识别(ASR)和语音翻译模型,由 OpenAI 的 Alec Radford 等人在论文 Robust Speech Recognition via Large-Scale Weak Supervision 中提出。该模型在超过 500 万小时的标注数据上进行了训练,展示了在零样本设置下对多种数据集和领域的强大泛化能力。
本文主要介绍通过vLLM-Ascend服务化部署whisper-large-v3/whisper-large-v3-turbo模型
| 配套 | 版本 |
|---|---|
| vLLM-Ascend | v0.14.0rc1 |
| CANN | 8.5.0 |
| Python | 3.11.14 |
| torch | 2.9.0+cpu |
| torch_npu | 2.9.0 |
| 驱动版本 | 25.2.0 |
| 算力设备 | Atlas 800T A2 910B |
# 1、拉取镜像
docker pull quay.io/ascend/vllm-ascend:v0.14.0rc1
# 2、容器启动
export IMAGE=quay.io/ascend/vllm-ascend:v0.14.0rc1
docker run -itd \
--name Whisper-large-v3 \
--shm-size=1g \
--net=host \
--device /dev/davinci0 \
--device /dev/davinci1 \
--device /dev/davinci2 \
--device /dev/davinci3 \
--device /dev/davinci4 \
--device /dev/davinci5 \
--device /dev/davinci6 \
--device /dev/davinci7 \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/Ascend/driver/tools/hccn_tool:/usr/local/Ascend/driver/tools/hccn_tool \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \
-v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
-v /etc/ascend_install.info:/etc/ascend_install.info \
$IMAGE bash
# 3、进入容器
docker exec -it {container_id} /bin/bashvllm启动服务时可自动下载模型权重至/root/.cache/目录下,如果离线环境不方便下载,也可手动下载准备
# whisper-large-v3模型下载
modelscope download --model openai-mirror/whisper-large-v3 --local_dir ./whisper-large-v3
# whisper-large-v3-turbo模型下载
modelscope download --model openai-mirror/whisper-large-v3-turbo --local_dir ./whisper-large-v3-turbo# 指定模型名称,自动下载模型
# openai-mirror/whisper-large-v3
# openai-mirror/whisper-large-v3-turbo
export VLLM_USE_MODELSCOPE=true
vllm serve "openai-mirror/whisper-large-v3" \
--max-num-batched-tokens 16384 \
--trust-remote-code \
--no-enable-prefix-caching
# 指定模型下载路径,启动服务
vllm serve /workspace/whisper-large-v3 \
--served-model-name "openai-mirror/whisper-large-v3" \
--max-num-batched-tokens 16384 \
--trust-remote-code \
--no-enable-prefix-caching 文本转录接口调用
curl -X POST "http://0.0.0.0:8000/v1/audio/transcriptions" \
-F "file=@test.wav" \
-F "model=openai-mirror/whisper-large-v3" \
-F "language=zh" \
-F "response_format=verbose_json"执行结果
{
"duration":"3.47",
"language":"zh",
"text":"成都双流机场到中河有多远?",
"segments":[
{
"id":0,
"avg_logprob":null,
"compression_ratio":null,
"end":3.48,
"no_speech_prob":null,
"seek":0,
"start":0.0,
"temperature":0.0,
"text":"成都双流机场到中河有多远?",
"tokens":[11336,7182,2129,234,27854,37960,50255,4511,5975,3308,111,2412,6392,3316,250,30]
}
],
"words":null
}