合并大师
from openmind import AutoTokenizer, AutoModelForCausalLM, is_torch_npu_available
from openmind_hub import snapshot_download
import torch.nn.functional as F
from torch import Tensor
import openmind
import torch
import argparse
import sys
import time
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name_or_path",
type=str,
help="Path to model",
default="zhouhui/pretrainmodel/Exodia-7B",
)
args = parser.parse_args()
return args
def main():
args = parse_args()
model_path = args.model_name_or_path
if is_torch_npu_available():
device = "npu:0"
else:
device = "cpu"
start_time = time.time()
model = AutoModelForCausalLM.from_pretrained(model_path,trust_remote_code=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path,trust_remote_code=True)
model.eval()
prompt = "Hello, who are you?"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
max_new_tokens = 100
outputs = model.generate(input_ids=input_ids, max_length=max_new_tokens, do_sample=True, temperature=0.7, top_p=0.3, top_k=0)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
end_time = time.time()
print(f"硬件环境:{device},推理执行时间:{end_time - start_time}秒")
if __name__ == "__main__":
main()大型语言模型(LLM)是人工智能领域的一项突破性进展。通过融合来自各个独立模型的见解与数据,LLM旨在充分利用每个模型的优势,同时弥补它们各自的不足。这种融合使LLM在理解语境、生成准确内容以及适应多样化任务方面展现出无与伦比的能力。这种集成方法确保用户能够从更高的准确性、更广泛的知识覆盖以及对结构化和非结构化数据更细致的理解中受益。本质上,LLM代表了人工智能进化的下一步,催生了一个性能远超各组成部分简单相加的模型。