from openmind import AutoTokenizer, AutoModelForCausalLM, is_torch_npu_available
from openmind_hub import snapshot_download
import torch.nn.functional as F
from torch import Tensor
import openmind
import torch
import argparse
import sys
import time
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name_or_path",
type=str,
help="Path to model",
default= "zhouhui/Mozaic-7B",
)
args = parser.parse_args()
return args
def main():
args = parse_args()
model_path = args.model_name_or_path
if is_torch_npu_available():
device = "npu:0"
else:
device = "cpu"
#device = "cpu"
start_time = time.time()
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model.eval()
prompt = "Hello, who are you?"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
outputs = model.generate(input_ids=input_ids, max_length=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
end_time = time.time()
print(f"硬件环境:{device},推理执行时间:{end_time - start_time}秒")
if __name__ == "__main__":
main()我们很好奇,如果采用以下组合会产生什么结果:
我使用的基础模型是 /Weyaxi/OpenHermes-2.5-neural-chat-v3-3-Slerp。
数据集:/argilla/distilabel-intel-orca-dpo-pairs
该数据集约有 3000 个样本,但质量很高(根据 chosen_score 判断)。
对原始数据集应用了以下筛选条件:
dataset = dataset.filter(
lambda r:
r["status"] != "tie" and
r["chosen_score"] >= 8 and
not r["in_gsm8k_train"]
)我决定采用 OpenHermes2.5 所使用的 ChatML。 顺便说一下,我已将该聊天模板集成到模型的分词器中。
<|im_start|>system
{system}<|im_end|>
<|im_start|>user
{user}<|im_end|>
<|im_start|>assistant
{asistant}<|im_end|>详细结果可查看此处
| 指标 | 数值 |
|---|---|
| 平均值 | 71.71 |
| AI2 推理挑战(25次射击) | 68.94 |
| HellaSwag(10次射击) | 86.45 |
| MMLU(5次射击) | 63.97 |
| TruthfulQA(0次射击) | 64.01 |
| Winogrande(5次射击) | 79.95 |
| GSM8k(5次射击) | 66.94 |