1、适配昇腾处理器:Ascend310、Ascend910系列 2、开发环境:Ascend-cann-toolkit_xxx、Ascend-cann-kernels-xxx(可选)、python3.8 3、下载代码:git clone https://modelers.cn/ShanXi/Llama-3-Instruct-8B-SPPO-Iter3.git 4、安装依赖:pip install -r examples/requirements.txt 5、推理测试:nohup python examples/inference.py 6、推理脚本:
import argparse import torch from openmind import pipeline, is_torch_npu_available from transformers import AutoTokenizer, AutoModelForCausalLM from openmind_hub import snapshot_download def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--model_name_or_path",type=str,help="模型路径",default="./",) args = parser.parse_args() return args
if is_torch_npu_available():
device = "npu:0"
else:
device = "cpu"
args = parse_args()
if args.model_name_or_path:
model_path = args.model_name_or_path
else:
model_path = snapshot_download('ShanXi/Llama-3-Instruct-8B-SPPO-Iter3',revision='main',resume_donwload=True,ignore_patterns=['*.h5','*.ot','*.msgpack'])
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map=device,
torch_dtype=torch.float16
)
model = model.eval()
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
input_ids = tokenizer(
["<s>Human: 推荐一些精彩的电影\n</s><s>Assistant: "],
return_tensors="pt",
add_special_tokens=False,
).input_ids
input_ids = input_ids.to("npu")
generate_input = {
"input_ids": input_ids,
"max_new_tokens": 512,
"do_sample": True,
"top_k": 50,
"top_p": 0.95,
"temperature": 0.3,
"repetition_penalty": 1.3,
"eos_token_id": tokenizer.eos_token_id,
"bos_token_id": tokenizer.bos_token_id,
"pad_token_id": tokenizer.pad_token_id,
}
generate_ids = model.generate(**generate_input)
text = tokenizer.decode(generate_ids[0])
print(text)