HuggingFace镜像/Llama-medx_v3
模型介绍文件和版本分析
下载使用量0

1、适配昇腾处理器:Ascend310、Ascend910系列 2、开发环境:Ascend-cann-toolkit_xxx、Ascend-cann-kernels-xxx(可选)、python3.8 3、下载代码:git clone https://modelers.cn/ShanXi/Llama-medx_v3.git 5、推理测试:python examples/inference.py 6、推理脚本:

    import argparse
        import torch
        from openmind import pipeline, is_torch_npu_available
        from transformers import AutoTokenizer, AutoModelForCausalLM
        from openmind_hub import snapshot_download
        def parse_args():
            parser = argparse.ArgumentParser()
            parser.add_argument("--model_name_or_path",type=str,help="模型路径",default="./",)
            args = parser.parse_args()
            return args


        if is_torch_npu_available():
            device = "npu:0"
        else:
            device = "cpu"


        args = parse_args()
        if args.model_name_or_path:
            model_path = args.model_name_or_path
        else:
            model_path = snapshot_download('ShanXi/Llama-medx_v3',revision='main',resume_donwload=True,ignore_patterns=['*.h5','*.ot','*.msgpack'])

        model = AutoModelForCausalLM.from_pretrained(
            model_path,
            device_map=device,
            torch_dtype=torch.float16
        )
        model = model.eval()
        tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
        tokenizer.pad_token = tokenizer.eos_token
        input_ids = tokenizer(
            ["<s>Human: Introducing Beijing\n</s><s>Assistant: "],
            return_tensors="pt",
            add_special_tokens=False,
        ).input_ids
        input_ids = input_ids.to("npu")
        generate_input = {
            "input_ids": input_ids,
            "max_new_tokens": 512,
            "do_sample": True,
            "top_k": 50,
            "top_p": 0.95,
            "temperature": 0.3,
            "repetition_penalty": 1.3,
            "eos_token_id": tokenizer.eos_token_id,
            "bos_token_id": tokenizer.bos_token_id,
            "pad_token_id": tokenizer.pad_token_id,
        }
        generate_ids = model.generate(**generate_input)
        text = tokenizer.decode(generate_ids[0])
        print(text)