HuggingFace镜像/Llama3-Chinese-8B-Instruct
模型介绍文件和版本分析
下载使用量0

1、适配昇腾处理器:Ascend310、Ascend910系列 2、开发环境:Ascend-cann-toolkit_xxx、Ascend-cann-kernels-xxx(可选)、python3.8 3、下载代码:git clone https://modelers.cn/ShanXi/Llama3-Chinese-8B-Instruct.git 4、安装依赖:pip install -r examples/requirements.txt 5、推理测试:python examples/inference.py 6、推理脚本:

import argparse
    import torch
    from openmind import pipeline, is_torch_npu_available
    from transformers import AutoTokenizer, AutoModelForCausalLM
    from openmind_hub import snapshot_download
    def parse_args():
        parser = argparse.ArgumentParser()
        parser.add_argument("--model_name_or_path",type=str,help="模型路径",default="./",)
        args = parser.parse_args()
        return args


    if is_torch_npu_available():
        device = "npu:0"
    else:
        device = "cpu"


    args = parse_args()
    if args.model_name_or_path:
        model_path = args.model_name_or_path
    else:
        model_path = snapshot_download('ShanXi/Llama3-Chinese-8B-Instruct',revision='main',resume_donwload=True,ignore_patterns=['*.h5','*.ot','*.msgpack'])

    pipeline = pipeline( "text-generation",model=model_path,model_kwargs={"torch_dtype": torch.float16},device="npu")
    messages = [{"role": "system", "content": ""}]
    messages.append({"role": "user", "content": "介绍一下机器学习"})

    prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    terminators = [pipeline.tokenizer.eos_token_id,pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")]

    outputs = pipeline(prompt,max_new_tokens=512,eos_token_id=terminators,do_sample=True,top_p=0.9)

    content = outputs[0]["generated_text"][len(prompt):]
    print(content)