llama-160m-openmind:可用于文本生成任务，作为 SpecInfer 论文中的基础小型推测模型。该模型类似 LLaMA，含 160M 参数，在 Wikipedia 及部分 C4 数据集上训练，支持 NPU 和 CPU 运行。【此简介由AI生成】

模型说明

这是一个类LLaMA模型，仅包含160M参数，训练数据来源于Wikipedia以及部分C4-en和C4-realnewslike数据集。

目前尚未进行任何评估，因此使用时请谨慎。

该模型主要是作为SpecInfer论文中的基础小型推测模型（Small Speculative Model）开发的。

使用方法

from openmind import AutoTokenizer, AutoModelForCausalLM, is_torch_npu_available
from openmind_hub import snapshot_download
import torch.nn.functional as F
from torch import Tensor
import openmind
import torch
import argparse

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default="jeffding/llama-160m-openmind",
    )
    args = parser.parse_args()
    return args

def main():
    args = parse_args()
    model_path = args.model_name_or_path

    if is_torch_npu_available():
        device = "npu:0"
    else:
        device = "cpu"
        
    model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model.eval()

    prompt = 'Q: What is the largest bird?\nA:'
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
    tokens = model.generate(input_ids, max_length=20)
    print( tokenizer.decode(tokens[0].tolist(), skip_special_tokens=True) )
    # Q: What is the largest bird?\nA: The largest bird is a black-headed gull.
    
if __name__ == "__main__":
    main()

引用说明

如需引用本模型，请使用

@misc{miao2023specinfer,
      title={SpecInfer: Accelerating Generative LLM Serving with Speculative Inference and Token Tree Verification}, 
      author={Xupeng Miao and Gabriele Oliaro and Zhihao Zhang and Xinhao Cheng and Zeyu Wang and Rae Ying Yee Wong and Zhuoming Chen and Daiyaan Arfeen and Reyna Abhyankar and Zhihao Jia},
      year={2023},
      eprint={2305.09781},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

使用方法

from openmind import AutoTokenizer, AutoModelForCausalLM, is_torch_npu_available
from openmind_hub import snapshot_download
import torch.nn.functional as F
from torch import Tensor
import openmind
import torch
import argparse

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default="jeffding/llama-160m-openmind",
    )
    args = parser.parse_args()
    return args

def main():
    args = parse_args()
    model_path = args.model_name_or_path

    if is_torch_npu_available():
        device = "npu:0"
    else:
        device = "cpu"
        
    model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model.eval()

    prompt = 'Q: What is the largest bird?\nA:'
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
    tokens = model.generate(input_ids, max_length=20)
    print( tokenizer.decode(tokens[0].tolist(), skip_special_tokens=True) )
    # Q: What is the largest bird?\nA: The largest bird is a black-headed gull.
    
if __name__ == "__main__":
    main()

引用说明

如需引用本模型，请使用

@misc{miao2023specinfer,
      title={SpecInfer: Accelerating Generative LLM Serving with Speculative Inference and Token Tree Verification}, 
      author={Xupeng Miao and Gabriele Oliaro and Zhihao Zhang and Xinhao Cheng and Zeyu Wang and Rae Ying Yee Wong and Zhuoming Chen and Daiyaan Arfeen and Reyna Abhyankar and Zhihao Jia},
      year={2023},
      eprint={2305.09781},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}