Surya是Datalab公司(Vik Paruchuri)开发的开源、高性能、多语言文档智能处理OCR工具包,具备以下功能:
支持90多种语言的OCR,性能优于云服务
支持任何语言的行级文本检测
布局分析(检测表格、图像、标题等)
阅读顺序检测
表格识别(检测行/列)
LaTeX OCR
根据实操环境,选择A2或A3镜像
# A2镜像
docker pull swr.cn-south-1.myhuaweicloud.com/ascendhub/cann:8.5.0-910b-ubuntu22.04-py3.11
# A3镜像
docker pull swr.cn-south-1.myhuaweicloud.com/ascendhub/cann:8.5.0-a3-ubuntu22.04-py3.11根据vllm-ascend包启动一个容器
export IMAGE=swr.cn-south-1.myhuaweicloud.com/ascendhub/cann:8.5.0-910b-ubuntu22.04-py3.11
export NAME=surya_test
docker run -it -d \
--name $NAME \
--net=host \
--shm-size=1g \
--privileged \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/Ascend/driver/tools/hccn_tool:/usr/local/Ascend/driver/tools/hccn_tool \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \
-v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-v /root/.cache:/root/.cache \
-v /root/.nvm/versions/node:/root/.nvm/versions/node \
-it $IMAGE bash进入容器
docker exec -it surya_test bash从hf-mirror,使用huggingface-hub下载 surya_det3 和 surya_rec2 权重
cd /home
pip install huggingface_hub==0.36.2
export HF_ENDPOINT=https://hf-mirror.com
# surya_det3 模型权重下载
huggingface-cli download --resume-download vikp/surya_det3 --local-dir surya_det3
# surya_rec2 模型权重下载
huggingface-cli download --resume-download vikp/surya_rec2 --local-dir surya_rec2安装依赖
pip install numpy attrs decorator psutil scipy attrs decorator psutil absl-py cloudpickle ml-dtypes scipy tornado
pip install transformers==4.43.3
pip install torch==2.9.0
pip install torch_npu==2.9.0如果pip下载速度太慢,可以配置华为源 -i https://repo.huaweicloud.com/repository/pypi/simple
获取surya官方源码(假设是在/home目录下操作)
cd /home
git clone https://github.com/datalab-to/surya.git切换到 v0.13.0 的 tag
cd /home/surya
git checkout v0.13.0下载本仓库源码,并应用patch
cd /home
git clone https://atomgit.com/Ascend-SACT/Surya.git
cd /home/surya
git apply /home/Surya/0_13_0_patch.patch
pip install -e .测试前,导入环境变量
source /usr/local/Ascend/ascend-toolkit/set_env.sh && export ASCEND_RT_VISIBLE_DEVICES=0方式一:surya_ocr 工具
# 使用本地权重执行(可配置,默认执行surya_ocr会下载权重,可能受网络影响)
export DETECTOR_MODEL_CHECKPOINT=/home/surya_det3
export RECOGNITION_MODEL_CHECKPOINT=/home/surya_rec2
# 参考surya官网启动方式 (https://github.com/datalab-to/surya)
surya_ocr IMAGE方式二:使用 Python 方式(使用本地权重) 测试样例如下:test_surya.py
#!/usr/bin/env python3
"""
Test script for surya-rec2 and surya-det3 NPU migration verification.
This script tests both recognition and detection models on NPU device.
"""
import os
import sys
from PIL import Image, ImageDraw
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
import torch
import torch_npu
print("=" * 80)
print("Surya-Rec2 & Det3 NPU Migration Verification Test")
print("=" * 80)
print(f"\n[Environment Check]")
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"torch_npu version: {torch_npu.__version__}")
print(f"\n[NPU Device Check]")
print(f"NPU available: Yes")
print(f"NPU device count: {torch.npu.device_count()}")
print(f"Current NPU device: {torch.npu.current_device()}")
print(f"NPU device name: {torch.npu.get_device_name(0)}")
print(f"\n[Creating test image]")
test_image = Image.new("RGB", (1024, 1024), "white")
draw = ImageDraw.Draw(test_image)
draw.text((10, 10), "Hello World", fill="black", font_size=72)
draw.text((10, 200), "This is a sentence of text.\nNow it is a paragraph.\nA three-line one.", fill="black", font_size=24)
print(f"Test image created: {test_image.size}")
print(f"\n[Loading Surya models]")
try:
from surya.detection import DetectionPredictor
from surya.recognition import RecognitionPredictor
# Use local surya_det3 model weights
detection_checkpoint = "/home/surya_det3"
use_det3_local = True
try:
if not os.path.exists(detection_checkpoint):
print(f"Local detection model not found at {detection_checkpoint}, using default model")
detection_checkpoint = None
use_det3_local = False
else:
print(f"Using local detection model: {detection_checkpoint}")
except:
print(f"Using default detection model")
detection_checkpoint = None
use_det3_local = False
# Use local surya_rec2 model weights
recognition_checkpoint = "/home/surya_rec2"
use_rec2_local = True
try:
if not os.path.exists(recognition_checkpoint):
print(f"Local recognition model not found at {recognition_checkpoint}, using default model")
recognition_checkpoint = None
use_rec2_local = False
else:
print(f"Using local recognition model: {recognition_checkpoint}")
except:
print(f"Using default recognition model")
recognition_checkpoint = None
use_rec2_local = False
print("Loading detection predictor...")
if use_det3_local:
detection_predictor = DetectionPredictor(checkpoint=detection_checkpoint)
print(f"Detection model device: {detection_predictor.model.device}")
print(f"Detection model loaded from local checkpoint")
else:
detection_predictor = DetectionPredictor()
print(f"Detection model device: {detection_predictor.model.device}")
print("Loading recognition predictor...")
if use_rec2_local:
recognition_predictor = RecognitionPredictor(checkpoint=recognition_checkpoint)
print(f"Recognition model device: {recognition_predictor.model.device}")
print(f"Recognition model loaded from local checkpoint")
else:
recognition_predictor = RecognitionPredictor()
print(f"Recognition model device: {recognition_predictor.model.device}")
print(f"\n[Running OCR inference]")
print("Processing test image...")
recognition_results = recognition_predictor([test_image], [None], detection_predictor)
print(f"\n[Results]")
assert len(recognition_results) == 1, f"Expected 1 result, got {len(recognition_results)}"
print(f"Number of results: {len(recognition_results)}")
result = recognition_results[0]
print(f"Image bbox: {result.image_bbox}")
print(f"Languages: {result.languages}")
text_lines = result.text_lines
print(f"Number of text lines detected: {len(text_lines)}")
for i, line in enumerate(text_lines):
print(f" Line {i+1}: '{line.text}' (confidence: {line.confidence:.4f})")
print(f"\n[Verification]")
expected_text = "Hello World"
detected_text = text_lines[0].text if len(text_lines) > 0 else ""
if expected_text in detected_text:
print(f"✓ SUCCESS: Expected text '{expected_text}' found in detection")
print(f"✓ Both surya-rec2 and surya-det3 NPU migration verification PASSED")
sys.exit(0)
else:
print(f"✗ FAILED: Expected text '{expected_text}' not found")
print(f" Detected: '{detected_text}'")
print(f"✗ NPU migration verification FAILED")
sys.exit(1)
except Exception as e:
print(f"\n✗ ERROR: {type(e).__name__}: {e}")
import traceback
traceback.print_exc()
print(f"✗ NPU migration verification FAILED")
sys.exit(1)执行
python test_surya.py