这是一个类LLaMA模型,仅包含160M参数,训练数据来源于Wikipedia以及部分C4-en和C4-realnewslike数据集。
目前尚未进行任何评估,因此使用时请谨慎。
该模型主要是作为SpecInfer论文中的基础小型推测模型(Small Speculative Model)开发的。
from openmind import AutoTokenizer, AutoModelForCausalLM, is_torch_npu_available
from openmind_hub import snapshot_download
import torch.nn.functional as F
from torch import Tensor
import openmind
import torch
import argparse
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name_or_path",
type=str,
help="Path to model",
default="jeffding/llama-160m-openmind",
)
args = parser.parse_args()
return args
def main():
args = parse_args()
model_path = args.model_name_or_path
if is_torch_npu_available():
device = "npu:0"
else:
device = "cpu"
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model.eval()
prompt = 'Q: What is the largest bird?\nA:'
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
tokens = model.generate(input_ids, max_length=20)
print( tokenizer.decode(tokens[0].tolist(), skip_special_tokens=True) )
# Q: What is the largest bird?\nA: The largest bird is a black-headed gull.
if __name__ == "__main__":
main()
如需引用本模型,请使用
@misc{miao2023specinfer,
title={SpecInfer: Accelerating Generative LLM Serving with Speculative Inference and Token Tree Verification},
author={Xupeng Miao and Gabriele Oliaro and Zhihao Zhang and Xinhao Cheng and Zeyu Wang and Rae Ying Yee Wong and Zhuoming Chen and Daiyaan Arfeen and Reyna Abhyankar and Zhihao Jia},
year={2023},
eprint={2305.09781},
archivePrefix={arXiv},
primaryClass={cs.CL}
}