HuggingFace镜像/Qwen2.5-14B-Instruct-4bit
模型介绍文件和版本分析
下载使用量0

开放思维

import argparse

import torch
from openmind import is_torch_npu_available
from transformers import AutoTokenizer, AutoModelForCausalLM


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default=None,
    )

    args = parser.parse_args()
    return args


def main():
    args = parse_args()
    if args.model_name_or_path:
        model_path = args.model_name_or_path
    else:
        model_path = "../"

    if is_torch_npu_available():
        device = "npu:0"
    else:
        device = "cpu"

    tokenizer = AutoTokenizer.from_pretrained("Rose/Qwen2.5-14B-Instruct-4bit")
    model = AutoModelForCausalLM.from_pretrained("Rose/Qwen2.5-14B-Instruct-4bit").to(device)

    input_ids = tokenizer("Gra", return_tensors='pt').to(model.device)["input_ids"]
    output = model.generate(input_ids, max_new_tokens=48, do_sample=True, temperature=0.7)
    print(tokenizer.decode(output[0]))


if __name__ == "__main__":
    main()

mlx-community/Qwen2.5-14B-Instruct-4bit

模型 mlx-community/Qwen2.5-14B-Instruct-4bit 是使用 mlx-lm 版本 0.18.1 从 Qwen/Qwen2.5-14B-Instruct 转换为 MLX 格式的。

与 mlx 配合使用

pip install mlx-lm
from mlx_lm import load, generate

model, tokenizer = load("mlx-community/Qwen2.5-14B-Instruct-4bit")
response = generate(model, tokenizer, prompt="hello", verbose=True)