from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import argparse
from openmind_hub import snapshot_download
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name_or_path",
type=str,
help="",
default="Jinan_AICC/EnvironmentalBERT-base",
)
args = parser.parse_args()
return args
args = parse_args()
if args.model_name_or_path:
modelname = args.model_name_or_path
else:
modelname = snapshot_download(
"Jinan_AICC/Yuan2.0-2B-hf",
revision="main",
ignore_patterns=["*.h5", "*.ot", "*.msgpack"],
)
tokenizer_name = modelname
model_name = modelname
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, max_len=512)
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
print(pipe("Scope 1 emissions are reported here on a like-for-like basis against the 2013 baseline and exclude emissions from additional vehicles used during repairs.", padding=True, truncation=True))