mobilenetv2_100.ra_in1k 模型卡片

MobileNet-v2 图像分类模型。基于下方所述的方案模板，在 timm 中使用 ImageNet-1k 数据集进行训练。

方案详情：

RandAugment RA 方案。灵感来源于 EfficientNet RandAugment 方案并在其基础上改进。在 ResNet Strikes Back 中被称为 B 方案。
RMSProp（TF 1.0 行为）优化器，EMA 权重平均
带预热的 Step（指数衰减带阶梯式下降）学习率调度

模型详情

模型类型： 图像分类 / 特征主干网络
模型统计：
- 参数（M）：3.5
- GMACs：0.3
- 激活值（M）：6.7
- 图像尺寸：224 x 224
相关论文：
- MobileNetV2: Inverted Residuals and Linear Bottlenecks: https://arxiv.org/abs/1801.04381
- ResNet strikes back: An improved training procedure in timm: https://arxiv.org/abs/2110.00476
数据集： ImageNet-1k
原始来源： https://github.com/huggingface/pytorch-image-models

模型使用

图像分类

import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
import torch
import torch_npu
import argparse
from openmind import pipeline, is_torch_npu_available
from PIL import Image
import requests
import timm

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args



def main():
    current_path = os.getcwd()
    print("Current path:",current_path)

    args = parse_args()
    model_path = args.model_name_or_path
    
    if is_torch_npu_available():
        device = "npu:0"
    else:
        device = "cpu"
    
    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    img = Image.open(requests.get(url, stream=True).raw)
    
    # model = timm.create_model(model_path, pretrained=True).to(device)
    print("model_path:", model_path)
    model = timm.create_model('mobilenetv2_100.ra_in1k', pretrained=True).to(device)
    model = model.eval()

    # get model specific transforms (normalization, resize)
    data_config = timm.data.resolve_model_data_config(model)
    transforms = timm.data.create_transform(**data_config, is_training=False)

    output = model(transforms(img).unsqueeze(0).to(device))  # unsqueeze single image into batch of 1
    top5_probabilities, top5_class_indices = torch.topk(output.softmax(dim=1) * 100, k=5)
    print(top5_class_indices)

if __name__=="__main__":
    main()

特征图提取

import torch
import torch_npu
import argparse
from openmind import pipeline, is_torch_npu_available
from PIL import Image
import requests
import timm

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args



def main():
    args = parse_args()
    model_path = args.model_name_or_path
    
    if is_torch_npu_available():
        device = "npu:0"
    else:
        device = "cpu"
    
    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    img = Image.open(requests.get(url, stream=True).raw)
    
    model = timm.create_model(model_path, pretrained=True).to(device)
    model = model.eval()

    # get model specific transforms (normalization, resize)
    data_config = timm.data.resolve_model_data_config(model)
    transforms = timm.data.create_transform(**data_config, is_training=False)

    output = model(transforms(img).unsqueeze(0).to(device))  # unsqueeze single image into batch of 1
    for o in output:
        # print shape of each feature map in output
        # e.g. for swin_base_patch4_window7_224 (NHWC output)
        #  torch.Size([1, 56, 56, 128])
        #  torch.Size([1, 28, 28, 256])
        #  torch.Size([1, 14, 14, 512])
        #  torch.Size([1, 7, 7, 1024])
        # e.g. for swinv2_cr_small_ns_224 (NCHW output)
        #  torch.Size([1, 96, 56, 56]) 
        #  torch.Size([1, 192, 28, 28])
        #  torch.Size([1, 384, 14, 14])
        #  torch.Size([1, 768, 7, 7])
        print(o.shape)

if __name__=="__main__":
    main()

图像嵌入

import torch
import torch_npu
import argparse
from openmind import pipeline, is_torch_npu_available
from PIL import Image
import requests
import timm

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args



def main():
    args = parse_args()
    model_path = args.model_name_or_path
    
    if is_torch_npu_available():
        device = "npu:0"
    else:
        device = "cpu"
    
    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    img = Image.open(requests.get(url, stream=True).raw)
    
    model = timm.create_model(model_path, pretrained=True).to(device)
    model = model.eval()

    # get model specific transforms (normalization, resize)
    data_config = timm.data.resolve_model_data_config(model)
    transforms = timm.data.create_transform(**data_config, is_training=False)

    output = model.forward_features(transforms(img).unsqueeze(0).to(device))
    # output is unpooled (ie.e a (batch_size, H, W,  num_features) tensor for swin / swinv2
    # or (batch_size, num_features, H, W) for swinv2_cr

    output = model.forward_head(output, pre_logits=True)
    # output is (batch_size, num_features) tensor

模型对比

在 timm 的 model results 中探索此模型的数据集和运行时指标。

引用

@inproceedings{howard2019searching,
  title={Searching for mobilenetv3},
  author={Howard, Andrew and Sandler, Mark and Chu, Grace and Chen, Liang-Chieh and Chen, Bo and Tan, Mingxing and Wang, Weijun and Zhu, Yukun and Pang, Ruoming and Vasudevan, Vijay and others},
  booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
  pages={1314--1324},
  year={2019}
}

@misc{rw2019timm,
  author = {Ross Wightman},
  title = {PyTorch Image Models},
  year = {2019},
  publisher = {GitHub},
  journal = {GitHub repository},
  doi = {10.5281/zenodo.4414861},
  howpublished = {\url{https://github.com/huggingface/pytorch-image-models}}
}

@inproceedings{wightman2021resnet,
  title={ResNet strikes back: An improved training procedure in timm},
  author={Wightman, Ross and Touvron, Hugo and Jegou, Herve},
  booktitle={NeurIPS 2021 Workshop on ImageNet: Past, Present, and Future}
}

mobilenetv2_100.ra_in1k 模型卡片

MobileNet-v2 图像分类模型。基于下方所述的方案模板，在 timm 中使用 ImageNet-1k 数据集进行训练。

方案详情：

RandAugment RA 方案。灵感来源于 EfficientNet RandAugment 方案并在其基础上改进。在 ResNet Strikes Back 中被称为 B 方案。
RMSProp（TF 1.0 行为）优化器，EMA 权重平均
带预热的 Step（指数衰减带阶梯式下降）学习率调度

模型详情

模型类型： 图像分类 / 特征主干网络
模型统计：
- 参数（M）：3.5
- GMACs：0.3
- 激活值（M）：6.7
- 图像尺寸：224 x 224
相关论文：
- MobileNetV2: Inverted Residuals and Linear Bottlenecks: https://arxiv.org/abs/1801.04381
- ResNet strikes back: An improved training procedure in timm: https://arxiv.org/abs/2110.00476
数据集： ImageNet-1k
原始来源： https://github.com/huggingface/pytorch-image-models

模型使用

图像分类

import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
import torch
import torch_npu
import argparse
from openmind import pipeline, is_torch_npu_available
from PIL import Image
import requests
import timm

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args



def main():
    current_path = os.getcwd()
    print("Current path:",current_path)

    args = parse_args()
    model_path = args.model_name_or_path
    
    if is_torch_npu_available():
        device = "npu:0"
    else:
        device = "cpu"
    
    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    img = Image.open(requests.get(url, stream=True).raw)
    
    # model = timm.create_model(model_path, pretrained=True).to(device)
    print("model_path:", model_path)
    model = timm.create_model('mobilenetv2_100.ra_in1k', pretrained=True).to(device)
    model = model.eval()

    # get model specific transforms (normalization, resize)
    data_config = timm.data.resolve_model_data_config(model)
    transforms = timm.data.create_transform(**data_config, is_training=False)

    output = model(transforms(img).unsqueeze(0).to(device))  # unsqueeze single image into batch of 1
    top5_probabilities, top5_class_indices = torch.topk(output.softmax(dim=1) * 100, k=5)
    print(top5_class_indices)

if __name__=="__main__":
    main()

特征图提取

import torch
import torch_npu
import argparse
from openmind import pipeline, is_torch_npu_available
from PIL import Image
import requests
import timm

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args



def main():
    args = parse_args()
    model_path = args.model_name_or_path
    
    if is_torch_npu_available():
        device = "npu:0"
    else:
        device = "cpu"
    
    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    img = Image.open(requests.get(url, stream=True).raw)
    
    model = timm.create_model(model_path, pretrained=True).to(device)
    model = model.eval()

    # get model specific transforms (normalization, resize)
    data_config = timm.data.resolve_model_data_config(model)
    transforms = timm.data.create_transform(**data_config, is_training=False)

    output = model(transforms(img).unsqueeze(0).to(device))  # unsqueeze single image into batch of 1
    for o in output:
        # print shape of each feature map in output
        # e.g. for swin_base_patch4_window7_224 (NHWC output)
        #  torch.Size([1, 56, 56, 128])
        #  torch.Size([1, 28, 28, 256])
        #  torch.Size([1, 14, 14, 512])
        #  torch.Size([1, 7, 7, 1024])
        # e.g. for swinv2_cr_small_ns_224 (NCHW output)
        #  torch.Size([1, 96, 56, 56]) 
        #  torch.Size([1, 192, 28, 28])
        #  torch.Size([1, 384, 14, 14])
        #  torch.Size([1, 768, 7, 7])
        print(o.shape)

if __name__=="__main__":
    main()

图像嵌入

import torch
import torch_npu
import argparse
from openmind import pipeline, is_torch_npu_available
from PIL import Image
import requests
import timm

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args



def main():
    args = parse_args()
    model_path = args.model_name_or_path
    
    if is_torch_npu_available():
        device = "npu:0"
    else:
        device = "cpu"
    
    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    img = Image.open(requests.get(url, stream=True).raw)
    
    model = timm.create_model(model_path, pretrained=True).to(device)
    model = model.eval()

    # get model specific transforms (normalization, resize)
    data_config = timm.data.resolve_model_data_config(model)
    transforms = timm.data.create_transform(**data_config, is_training=False)

    output = model.forward_features(transforms(img).unsqueeze(0).to(device))
    # output is unpooled (ie.e a (batch_size, H, W,  num_features) tensor for swin / swinv2
    # or (batch_size, num_features, H, W) for swinv2_cr

    output = model.forward_head(output, pre_logits=True)
    # output is (batch_size, num_features) tensor

模型对比

在 timm 的 model results 中探索此模型的数据集和运行时指标。

引用

@inproceedings{howard2019searching,
  title={Searching for mobilenetv3},
  author={Howard, Andrew and Sandler, Mark and Chu, Grace and Chen, Liang-Chieh and Chen, Bo and Tan, Mingxing and Wang, Weijun and Zhu, Yukun and Pang, Ruoming and Vasudevan, Vijay and others},
  booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
  pages={1314--1324},
  year={2019}
}

@misc{rw2019timm,
  author = {Ross Wightman},
  title = {PyTorch Image Models},
  year = {2019},
  publisher = {GitHub},
  journal = {GitHub repository},
  doi = {10.5281/zenodo.4414861},
  howpublished = {\url{https://github.com/huggingface/pytorch-image-models}}
}

@inproceedings{wightman2021resnet,
  title={ResNet strikes back: An improved training procedure in timm},
  author={Wightman, Ross and Touvron, Hugo and Jegou, Herve},
  booktitle={NeurIPS 2021 Workshop on ImageNet: Past, Present, and Future}
}