开放思维

import argparse
import torch
from openmind import is_torch_npu_available, AutoTokenizer, AutoModelForCausalLM,AutoModel
import time
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] # model_output的第一个元素包含所有token嵌入
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default='xuyuan-trial-sentiment-bert-chinese',
    )

    args = parser.parse_args()
    return args


def main():
    start_time = time.time()  # 记录开始时间

    args = parse_args()
    if args.model_name_or_path:
        model_path = args.model_name_or_path
    else:
        model_path = ""

    if is_torch_npu_available():
        device = "npu:0"
    else:
        device='cpu'
#     device='cpu'
    sentences = ['This is an example sentence', 'Each sentence is converted']

    # 从openmind_hub加载模型
    tokenizer = AutoTokenizer.from_pretrained(model_path)
#     tokenizer.add_special_tokens({'pad_token': '[PAD]'})
#     tokenizer.pad_token = tokenizer.eos_token
    model = AutoModel.from_pretrained(model_path).to(device)

    # 对句子进行分词
    encoded_input = tokenizer(sentences,return_tensors='pt',padding=True).to(device)

    # 计算token嵌入
    with torch.no_grad():
        model_output = model(**encoded_input)

    # 执行池化
    sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']).to(device)

    # 归一化嵌入
    #     sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

    print("Sentence embeddings:")
    print(sentence_embeddings)

    end_time = time.time()  # 记录结束时间
    elapsed_time = end_time - start_time  # 计算差值
    print(f"{device}:Program finished in {elapsed_time:.2f} seconds.")  # 打印运行时间


if __name__ == "__main__":
    main()

xuyuan-trial-sentiment-bert-chinese

该模型是 hfl/chinese-bert-wwm-ext 在一个未知数据集上的微调版本。它在评估集上取得了以下结果：

损失：0.0247
F1 宏平均：0.9899

模型描述

需要更多信息

预期用途与局限性

需要更多信息

训练和评估数据

需要更多信息

训练过程

训练超参数

训练过程中使用了以下超参数：

学习率：2e-05
训练批次大小：24
评估批次大小：24
随机种子：42
优化器：Adam，betas=(0.9,0.999)，epsilon=1e-08
学习率调度器类型：线性
训练轮次：10

训练结果

框架版本

Transformers 4.28.0
Pytorch 2.0.0+cu118
Datasets 2.12.0
Tokenizers 0.13.3

开放思维

import argparse
import torch
from openmind import is_torch_npu_available, AutoTokenizer, AutoModelForCausalLM,AutoModel
import time
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] # model_output的第一个元素包含所有token嵌入
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        type=str,
        help="Path to model",
        default='xuyuan-trial-sentiment-bert-chinese',
    )

    args = parser.parse_args()
    return args


def main():
    start_time = time.time()  # 记录开始时间

    args = parse_args()
    if args.model_name_or_path:
        model_path = args.model_name_or_path
    else:
        model_path = ""

    if is_torch_npu_available():
        device = "npu:0"
    else:
        device='cpu'
#     device='cpu'
    sentences = ['This is an example sentence', 'Each sentence is converted']

    # 从openmind_hub加载模型
    tokenizer = AutoTokenizer.from_pretrained(model_path)
#     tokenizer.add_special_tokens({'pad_token': '[PAD]'})
#     tokenizer.pad_token = tokenizer.eos_token
    model = AutoModel.from_pretrained(model_path).to(device)

    # 对句子进行分词
    encoded_input = tokenizer(sentences,return_tensors='pt',padding=True).to(device)

    # 计算token嵌入
    with torch.no_grad():
        model_output = model(**encoded_input)

    # 执行池化
    sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']).to(device)

    # 归一化嵌入
    #     sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

    print("Sentence embeddings:")
    print(sentence_embeddings)

    end_time = time.time()  # 记录结束时间
    elapsed_time = end_time - start_time  # 计算差值
    print(f"{device}:Program finished in {elapsed_time:.2f} seconds.")  # 打印运行时间


if __name__ == "__main__":
    main()

训练过程

训练超参数

训练过程中使用了以下超参数：

学习率：2e-05

训练批次大小：24

评估批次大小：24

随机种子：42

优化器：Adam，betas=(0.9,0.999)，epsilon=1e-08

学习率调度器类型：线性

训练轮次：10

训练结果

框架版本

Transformers 4.28.0

Pytorch 2.0.0+cu118

Datasets 2.12.0

Tokenizers 0.13.3