Mistral 7B Grok

This model is a fine-tuned version of mistralai/Mistral-7B-v0.1 that has been aligned via Constitutional AI to mimic the style of xAI's Grok assistant.

It achieves the following results on the evaluation set:

Loss: 0.9348

Use in openmind

import os
import time
import argparse
import torch
import numpy as np
from openmind import pipeline, is_torch_npu_available
from openmind import AutoTokenizer, AutoModelForCausalLM
from openmind_hub import snapshot_download

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        "-m",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args

def model_npu_inference(model_path: str):
    # 确保使用 NPU 设备
    if is_torch_npu_available():
        print("NPU available, use device_map='auto'.")
        device_map = "auto"
    else:
        print("NPU not available, use device_map='cpu'.")
        device_map = "cpu"

        # 创建 Text Generation pipeline，指定 NPU 设备
    try:
        task_pipeline = pipeline(
            task="text-generation",
            model=model_path,
            device_map=device_map,
            framework="pt",
            truncation=True
        )

        abs_model_path = os.path.abspath(model_path)
        model_name = os.path.basename(abs_model_path)

        chat = [
            {
                "role": "system",
                "content": "You are a friendly chatbot who always responds in the style of a pirate",
            },
            {
                "role": "user",
                "content": "How many helicopters can a human eat in one sitting?",
            },
        ]

        chat_input = task_pipeline.tokenizer.apply_chat_template(chat, tokenize=False)

        # 定义推理样例
        prompt = [
            chat_input,
        ]

        # 推理性能测试
        inference_times = []
        num_runs = 10

        print(f"\n=== NPU {model_name} 性能测试 ===")

        for _ in range(num_runs):
            # 随机选择输入文本
            input_text = prompt[_ % len(prompt)]

            # 性能计时
            start_time = time.time()
            results = task_pipeline(input_text, max_new_tokens=50)
            torch.npu.synchronize()

            inference_time = time.time() - start_time
            inference_times.append(inference_time)

            # 打印第一次推理的详细结果
            if _ == 0:
                print(f"输入文本: {input_text}")
                print("生成结果：")
                print(f"  {results[0]['generated_text']}")

                # 计算性能统计
        avg_time = np.mean(inference_times)
        std_time = np.std(inference_times)

        print("\n性能分析:")
        print(f"NPU平均推理时间: {avg_time:.4f} 秒")
        print(f"NPU推理时间标准差: {std_time:.4f} 秒")
        print("推理时间列表:", inference_times)

    except Exception as e:
        print(f"NPU 推理发生错误！")
        raise e

def main():
    # 解析命令行参数
    # 参数：--model_name_or_path
    args = parse_args()
    model_path = args.model_name_or_path
    model_npu_inference(model_path)


if __name__ == "__main__":
    main()

Model description

More information needed

Intended uses & limitations

More information needed

Training and evaluation data

More information needed

Training procedure

Training hyperparameters

The following hyperparameters were used during training:

learning_rate: 2e-05
train_batch_size: 8
eval_batch_size: 8
seed: 42
distributed_type: multi-GPU
num_devices: 8
gradient_accumulation_steps: 4
total_train_batch_size: 256
total_eval_batch_size: 64
optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
lr_scheduler_type: cosine
lr_scheduler_warmup_ratio: 0.1
num_epochs: 1

Training results

Training Loss	Epoch	Step	Validation Loss
0.9326	1.0	545	0.9348

Framework versions

Transformers 4.36.2
Pytorch 2.1.2+cu121
Datasets 2.16.1
Tokenizers 0.15.0

Use in openmind

import os
import time
import argparse
import torch
import numpy as np
from openmind import pipeline, is_torch_npu_available
from openmind import AutoTokenizer, AutoModelForCausalLM
from openmind_hub import snapshot_download

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        "-m",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args

def model_npu_inference(model_path: str):
    # 确保使用 NPU 设备
    if is_torch_npu_available():
        print("NPU available, use device_map='auto'.")
        device_map = "auto"
    else:
        print("NPU not available, use device_map='cpu'.")
        device_map = "cpu"

        # 创建 Text Generation pipeline，指定 NPU 设备
    try:
        task_pipeline = pipeline(
            task="text-generation",
            model=model_path,
            device_map=device_map,
            framework="pt",
            truncation=True
        )

        abs_model_path = os.path.abspath(model_path)
        model_name = os.path.basename(abs_model_path)

        chat = [
            {
                "role": "system",
                "content": "You are a friendly chatbot who always responds in the style of a pirate",
            },
            {
                "role": "user",
                "content": "How many helicopters can a human eat in one sitting?",
            },
        ]

        chat_input = task_pipeline.tokenizer.apply_chat_template(chat, tokenize=False)

        # 定义推理样例
        prompt = [
            chat_input,
        ]

        # 推理性能测试
        inference_times = []
        num_runs = 10

        print(f"\n=== NPU {model_name} 性能测试 ===")

        for _ in range(num_runs):
            # 随机选择输入文本
            input_text = prompt[_ % len(prompt)]

            # 性能计时
            start_time = time.time()
            results = task_pipeline(input_text, max_new_tokens=50)
            torch.npu.synchronize()

            inference_time = time.time() - start_time
            inference_times.append(inference_time)

            # 打印第一次推理的详细结果
            if _ == 0:
                print(f"输入文本: {input_text}")
                print("生成结果：")
                print(f"  {results[0]['generated_text']}")

                # 计算性能统计
        avg_time = np.mean(inference_times)
        std_time = np.std(inference_times)

        print("\n性能分析:")
        print(f"NPU平均推理时间: {avg_time:.4f} 秒")
        print(f"NPU推理时间标准差: {std_time:.4f} 秒")
        print("推理时间列表:", inference_times)

    except Exception as e:
        print(f"NPU 推理发生错误！")
        raise e

def main():
    # 解析命令行参数
    # 参数：--model_name_or_path
    args = parse_args()
    model_path = args.model_name_or_path
    model_npu_inference(model_path)


if __name__ == "__main__":
    main()

Training procedure

Training hyperparameters

The following hyperparameters were used during training:

learning_rate: 2e-05

train_batch_size: 8

eval_batch_size: 8

seed: 42

distributed_type: multi-GPU

num_devices: 8

gradient_accumulation_steps: 4

total_train_batch_size: 256

total_eval_batch_size: 64

optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08

lr_scheduler_type: cosine

lr_scheduler_warmup_ratio: 0.1

num_epochs: 1

Training results

Training Loss	Epoch	Step	Validation Loss
0.9326	1.0	545	0.9348

Framework versions

Transformers 4.36.2

Pytorch 2.1.2+cu121

Datasets 2.16.1

Tokenizers 0.15.0