这是一个 Inception-v3 图像分类模型。由 MxNet GLUON 的作者在 ImageNet-1k 数据集上训练而成。
from PIL import Image
import timm
from timm.models.efficientnet import _cfg
from openmind import is_torch_npu_available
import torch_npu
import torch
import argparse
if is_torch_npu_available():
device = "npu:0"
else:
device = "cpu"
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument( "--model_name_or_path", type=str, default="inception_v3.gluon_in1k", )
args = parser.parse_args()
return args
args = parse_args()
model_path = args.model_name_or_path
# load tokenizer
img = Image.open('./beignets-task-guide.png')
config = _cfg(url='', file='model.safetensors')
model = timm.create_model("inception_v3.gluon_in1k", pretrained=True, pretrained_cfg=config).to(device)
model = model.eval()
# get model specific transforms (normalization, resize)
data_config = timm.data.resolve_model_data_config(model)
transforms = timm.data.create_transform(**data_config, is_training=False)
output = model(transforms(img).unsqueeze(0).npu()) # unsqueeze single image into batch of 1
for o in output:
# print shape of each feature map in output
# e.g.:
# torch.Size([1, 64, 147, 147])
# torch.Size([1, 192, 71, 71])
# torch.Size([1, 288, 35, 35])
# torch.Size([1, 768, 17, 17])
# torch.Size([1, 2048, 8, 8])
print(o.shape)在 timm 模型结果 中探索此模型的数据集和运行时指标。
@article{DBLP:journals/corr/SzegedyVISW15,
author = {Christian Szegedy and
Vincent Vanhoucke and
Sergey Ioffe and
Jonathon Shlens and
Zbigniew Wojna},
title = {Rethinking the Inception Architecture for Computer Vision},
journal = {CoRR},
volume = {abs/1512.00567},
year = {2015},
url = {http://arxiv.org/abs/1512.00567},
archivePrefix = {arXiv},
eprint = {1512.00567},
timestamp = {Mon, 13 Aug 2018 16:49:07 +0200},
biburl = {https://dblp.org/rec/journals/corr/SzegedyVISW15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}