TabPFN(Tabular Prior-data Fitted Network)是 Prior Labs 推出的表格数据基础模型,核心定位是小样本、零训练、秒级推理,替代传统 AutoML 与 GBDT,兼顾速度与精度。
本质:基于 Transformer 的先验拟合网络(PFN),通过海量合成表格数据预训练,学会 “从少量标注快速预测” 的通用能力。
适用场景:小样本(≤10k 样本)、快速原型、低代码表格预测;v6+ 微调版可扩展至 10k–50k 样本,适配小众领域
本文介绍在昇腾910B3上基于torch_npu对TabPFN进行推理的全流程。
模型:TabPFN v2.9.0
AI加速卡:910B3
CPU架构:ARM
CANN:8.5.0
torch:< 2.1.0
docker run -it -u root -d --net=host \
--privileged \
--ipc=host \
--device=/dev/davinci_manager \
--device=/dev/devmm_svm \
--device=/dev/hisi_hdc \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/sbin:/usr/local/sbin \
-v /usr/local/Ascend/driver/tools/hccn_tool:/usr/local/Ascend/driver/tools/hccn_tool \
--name tabpfn\
quay.io/ascend/cann:8.5.0 \
/bin/bash说明:后续操作是在容器内
apt update
apt install libxcb1 libx11-xcb1
apt install -y libgl1-mesa-glx
apt install -y libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1
apt-get install -y zlib1g-dev build-essential
pip install torch==2.1.0
pip install torch-npu==2.1.0.post17
pip install tabpfn==2.0.9export HF_ENDPOINT=https://hf-mirror.com参考如下脚本,实现推理脚本
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import torch
import torch_npu
import time
import logging
IS_NPU = False
try:
import torch_npu
if torch_npu.npu.is_available():
from torch_npu.contrib import transfer_to_npu
IS_NPU = True
except ImportError as e:
pass
if IS_NPU:
print("Using NPU device")
else:
print("Using CPU device")
class TabPFNInferEngine:
def __init__(self, task_type="classification"):
from tabpfn import TabPFNClassifier, TabPFNRegressor
if task_type == "classification":
self.predictor = TabPFNClassifier(device="cuda")
else:
self.predictor = TabPFNRegressor(device="cuda")
logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger("TabPFN")
def inference_pretrain(self, target, sample, data_df):
SAMPLE_MAX_SIZE = 20000
if len(sample) > SAMPLE_MAX_SIZE:
raise CheckError("Sample size exceeds limit")
FEATURES_MAX_SIZE = 500
if len(sample.columns) > FEATURES_MAX_SIZE:
raise CheckError("Feature count exceeds limit")
SAMPLE_MAX_DATA_SIZE = 1000000
if len(sample.columns) * len(sample) > SAMPLE_MAX_DATA_SIZE:
raise CheckError("Total data size exceeds limit")
PREDICT_MAX_SIZE = 1000
if len(data_df) > PREDICT_MAX_SIZE:
raise CheckError("Prediction size exceeds limit")
if len(data_df.columns) > FEATURES_MAX_SIZE:
raise CheckError("Feature count exceeds limit")
CPU_CORE_NUM = 8
torch.set_num_threads(CPU_CORE_NUM)
X_train = sample.drop(columns=[target])
y_train = sample[[target]]
try:
start = time.time()
self.predictor.fit(X_train, y_train)
end = time.time()
self.logger.info(f"model fit cost: {end - start:.2f}s")
start = time.time()
y_pred = self.predictor.predict(data_df)
end = time.time()
self.logger.info(f"model predict cost: {end - start:.2f}s")
except Exception as exception:
self.logger.info(f"predict error: {str(exception)}")
raise exception
return y_pred.tolist()
if __name__ == "__main__":
print("Start TabPFN NPU inference test")
np.random.seed(42)
sample = pd.DataFrame({
"f1": np.random.randn(100),
"f2": np.random.randn(100),
"f3": np.random.randn(100),
"label": np.random.randint(0, 2, size=100)
})
data_df = pd.DataFrame({
"f1": np.random.randn(10),
"f2": np.random.randn(10),
"f3": np.random.randn(10)
})
engine = TabPFNInferEngine(task_type="classification")
result = engine.inference_pretrain(target="label", sample=sample, data_df=data_df)
print("Inference completed!")
print("Predictions:", result)
sed -i "s/torch.cuda.get_device_capability(device)/1,0/g" /usr/local/python3.11.14/lib/python3.11/site-packages/tabpfn/model/multi_head_attention.py python tabpfn_npu_infer.py

低版本的TabPFN不支持微调,微调是 v6+ 新增的高级可选训练模式,本体依然是“零训练、开箱即用”的固定权重模型。本次针对v6.4.0版本对微调进行适配验证。
升级到TabPFNv6.4.0。
pip install tabpfn==6.4.0微调及验证脚本如下:
# -*- coding: utf-8 -*-
import gc
import logging
import os
import warnings
import numpy as np
import sklearn.datasets
import torch
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from tabpfn import TabPFNClassifier
from tabpfn.finetuning.finetuned_classifier import (
FinetunedTabPFNClassifier,
)
try:
import torch_npu
if torch_npu.npu.is_available():
from torch_npu.contrib import transfer_to_npu
except Exception:
pass
warnings.filterwarnings("ignore")
# ==========================
# 训练参数
# ==========================
NUM_EPOCHS = 5
LEARNING_RATE = 2e-5
NUM_ESTIMATORS_FINETUNE = 1
NUM_ESTIMATORS_VALIDATION = 1
NUM_ESTIMATORS_FINAL_INFERENCE = 1
RANDOM_STATE = 0
def calculate_roc_auc(y_true, y_pred_proba):
if len(np.unique(y_true)) == 2:
return roc_auc_score(y_true, y_pred_proba[:, 1])
return roc_auc_score(y_true, y_pred_proba, multi_class="ovr")
def main() -> None:
is_main_process = int(os.environ.get("LOCAL_RANK", "0")) == 0
# ==========================
# 本地生成数据
# ==========================
X, y = sklearn.datasets.make_classification(
n_samples=5000, n_features=20, random_state=RANDOM_STATE
)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.1, random_state=RANDOM_STATE
)
# ==========================
# 官方 6.4.0 微调代码
# ==========================
finetuned_clf = FinetunedTabPFNClassifier(
device="cuda",
epochs=NUM_EPOCHS,
learning_rate=LEARNING_RATE,
n_estimators_finetune=NUM_ESTIMATORS_FINETUNE,
n_estimators_validation=NUM_ESTIMATORS_VALIDATION,
n_estimators_final_inference=NUM_ESTIMATORS_FINAL_INFERENCE,
random_state=RANDOM_STATE,
)
finetuned_clf.fit(X_train, y_train)
# ==========================
# 评估输出
# ==========================
y_pred_proba = finetuned_clf.predict_proba(X_test)
roc_auc = calculate_roc_auc(y_test, y_pred_proba)
loss = log_loss(y_test, y_pred_proba)
print(f"\n✅ Finetuned TabPFN Test ROC: {roc_auc:.4f}")
print(f"✅ Finetuned TabPFN Test Log Loss: {loss:.4f}")
if __name__ == "__main__":
main()
python finetuning.py结果如下图所示:

报错信息如下:

措施:按照第三章第二节配置即可