MistralHermes-CodePro-7B-v1

image/png

在人工智能的数字殿堂中，“MistralHermes-CodePro-7B-v1”犹如算法的缔造者，语法的掌控者，以无与伦比的技艺编织着代码的经纬。该模型的命名，彰显了其双重传承——Mistral的深厚底蕴与Hermes的迅捷传递。它以资深大师般的精准驾驭着二进制的精妙乐章，将数据的跃动编排得优雅流畅，模糊了硅基与脑智之间的界限。

模型说明

MistralHermes-CodePro-7B-v1 是著名模型 teknium/OpenHermes-2.5-Mistral-7B 的精调版本。此版本通过包含超过 20 万个来自多种编程语言的代码样本数据集进行了精心微调。它专为编码助手功能打造，因此其效用在编码相关任务上得到优化，而非广泛的应用场景。

在 openmind 中的使用

import os
import time
import argparse
import torch
import numpy as np
from openmind import pipeline, is_torch_npu_available
from openmind import AutoTokenizer, AutoModelForCausalLM
from openmind_hub import snapshot_download

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        "-m",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args

def model_npu_inference(model_path: str):
    # 确保使用 NPU 设备
    if is_torch_npu_available():
        print("NPU available, use device_map='auto'.")
        device_map = "auto"
    else:
        print("NPU not available, use device_map='cpu'.")
        device_map = "cpu"

        # 创建 Text Generation pipeline，指定 NPU 设备
    try:
        task_pipeline = pipeline(
            task="text-generation",
            model=model_path,
            device_map=device_map,
            framework="pt",
            truncation=True
        )

        abs_model_path = os.path.abspath(model_path)
        model_name = os.path.basename(abs_model_path)

        chat = [
            {
                "role": "system",
                "content": "You are a friendly chatbot who always responds in the style of a pirate",
            },
            {
                "role": "user",
                "content": "How many helicopters can a human eat in one sitting?",
            },
        ]

        chat_input = task_pipeline.tokenizer.apply_chat_template(chat, tokenize=False)

        # 定义推理样例
        prompt = [
            chat_input,
        ]

        # 推理性能测试
        inference_times = []
        num_runs = 10

        print(f"\n=== NPU {model_name} 性能测试 ===")

        for _ in range(num_runs):
            # 随机选择输入文本
            input_text = prompt[_ % len(prompt)]

            # 性能计时
            start_time = time.time()
            results = task_pipeline(input_text, max_new_tokens=50)
            torch.npu.synchronize()

            inference_time = time.time() - start_time
            inference_times.append(inference_time)

            # 打印第一次推理的详细结果
            if _ == 0:
                print(f"输入文本: {input_text}")
                print("生成结果：")
                print(f"  {results[0]['generated_text']}")

                # 计算性能统计
        avg_time = np.mean(inference_times)
        std_time = np.std(inference_times)

        print("\n性能分析:")
        print(f"NPU平均推理时间: {avg_time:.4f} 秒")
        print(f"NPU推理时间标准差: {std_time:.4f} 秒")
        print("推理时间列表:", inference_times)

    except Exception as e:
        print(f"NPU 推理发生错误: {e}")

def main():
    # 解析命令行参数
    # 参数：--model_name_or_path
    args = parse_args()
    model_path = args.model_name_or_path
    model_npu_inference(model_path)


if __name__ == "__main__":
    main()

提示词格式

MistralHermes-CodePro 采用与 OpenHermes 2.5 相同的提示词格式。

您应使用 LM Studio 与模型进行对话。

量化模型：

GGUF：beowolx/MistralHermes-CodePro-7B-v1-GGUF

MistralHermes-CodePro-7B-v1

模型说明

在 openmind 中的使用

import os
import time
import argparse
import torch
import numpy as np
from openmind import pipeline, is_torch_npu_available
from openmind import AutoTokenizer, AutoModelForCausalLM
from openmind_hub import snapshot_download

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        "-m",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args

def model_npu_inference(model_path: str):
    # 确保使用 NPU 设备
    if is_torch_npu_available():
        print("NPU available, use device_map='auto'.")
        device_map = "auto"
    else:
        print("NPU not available, use device_map='cpu'.")
        device_map = "cpu"

        # 创建 Text Generation pipeline，指定 NPU 设备
    try:
        task_pipeline = pipeline(
            task="text-generation",
            model=model_path,
            device_map=device_map,
            framework="pt",
            truncation=True
        )

        abs_model_path = os.path.abspath(model_path)
        model_name = os.path.basename(abs_model_path)

        chat = [
            {
                "role": "system",
                "content": "You are a friendly chatbot who always responds in the style of a pirate",
            },
            {
                "role": "user",
                "content": "How many helicopters can a human eat in one sitting?",
            },
        ]

        chat_input = task_pipeline.tokenizer.apply_chat_template(chat, tokenize=False)

        # 定义推理样例
        prompt = [
            chat_input,
        ]

        # 推理性能测试
        inference_times = []
        num_runs = 10

        print(f"\n=== NPU {model_name} 性能测试 ===")

        for _ in range(num_runs):
            # 随机选择输入文本
            input_text = prompt[_ % len(prompt)]

            # 性能计时
            start_time = time.time()
            results = task_pipeline(input_text, max_new_tokens=50)
            torch.npu.synchronize()

            inference_time = time.time() - start_time
            inference_times.append(inference_time)

            # 打印第一次推理的详细结果
            if _ == 0:
                print(f"输入文本: {input_text}")
                print("生成结果：")
                print(f"  {results[0]['generated_text']}")

                # 计算性能统计
        avg_time = np.mean(inference_times)
        std_time = np.std(inference_times)

        print("\n性能分析:")
        print(f"NPU平均推理时间: {avg_time:.4f} 秒")
        print(f"NPU推理时间标准差: {std_time:.4f} 秒")
        print("推理时间列表:", inference_times)

    except Exception as e:
        print(f"NPU 推理发生错误: {e}")

def main():
    # 解析命令行参数
    # 参数：--model_name_or_path
    args = parse_args()
    model_path = args.model_name_or_path
    model_npu_inference(model_path)


if __name__ == "__main__":
    main()