在 NER 任务中使用 BC5CDR-chemicals 和 BC4CHEMD 语料库对 BioBERT 模型进行微调。
import argparse
from openmind import AutoModel, AutoTokenizer
from openmind import is_torch_npu_available
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model_name_or_path",type=str,help="Path to model",default=None,)
args = parser.parse_args()
return args
if __name__ == '__main__':
if is_torch_npu_available():
device = "npu:0"
else:
device = "cpu"
args = parse_args()
model_path = args.model_name_or_path
# Note: CodeSage requires adding eos token at the end of
# each tokenized sequence to ensure good performance
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, add_eos_token=True)
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).to(device)
inputs = tokenizer.encode("def print_hello_world():\tprint('Hello World!')", return_tensors="pt").to(device)
embedding = model(inputs)[0]
print(f'Dimension of the embedding: {embedding[0].size()}')
print(embedding)