import argparse
import torch
from openmind import is_torch_npu_available, AutoTokenizer, AutoModelForCausalLM,AutoModel
import time
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[0] # model_output的第一个元素包含所有token嵌入
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name_or_path",
type=str,
help="Path to model",
default='InsTagger',
)
args = parser.parse_args()
return args
def main():
start_time = time.time() # 记录开始时间
args = parse_args()
if args.model_name_or_path:
model_path = args.model_name_or_path
else:
model_path = ""
if is_torch_npu_available():
device = "npu:0"
else:
device='cpu'
device='cpu'
sentences = ['This is an example sentence', 'Each sentence is converted']
# 从openmind_hub加载模型
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model = AutoModel.from_pretrained(model_path).to(device)
# 对句子进行分词
encoded_input = tokenizer(sentences,return_tensors='pt',padding=True).to(device)
# 计算token嵌入
with torch.no_grad():
model_output = model(**encoded_input)
# 执行池化
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']).to(device)
# 归一化嵌入
# sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
print("Sentence embeddings:")
print(sentence_embeddings)
end_time = time.time() # 记录结束时间
elapsed_time = end_time - start_time # 计算差值
print(f"{device}:Program finished in {elapsed_time:.2f} seconds.") # 打印运行时间
if __name__ == "__main__":
main()InsTagger is an tool for automatically providing instruction tags by distilling tagging results from InsTag.
InsTag aims analyzing supervised fine-tuning (SFT) data in LLM aligning with human preference. For local tagging deployment, we release InsTagger, fine-tuned on InsTag results, to tag the queries in SFT data. Through the scope of tags, we sample a 6K subset of open-resourced SFT data to fine-tune LLaMA and LLaMA-2 and the fine-tuned models TagLM-13B-v1.0 and TagLM-13B-v2.0 outperform many open-resourced LLMs on MT-Bench.
This model is directly developed with FastChat. So it can be easily infer or serve with FastChat selecting the vicuna template.