import os
from openmind import AutoModelForCausalLM, AutoTokenizer
from openmind_hub import snapshot_download
import argparse
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name_or_path",
type=str,
help="Jinan_AICC/free-llama3-dpo-v0.2",
default=None,
)
args = parser.parse_args()
return args
args = parse_args()
model_path = args.model_name_or_path
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_path)
messages = [
{"role": "system", "content": "You are a helpful assistant. Always answer with a short response."},
{"role": "user", "content": "Tell me what is Pythagorean theorem like you are a pirate."}
]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
generated_ids = model.generate(input_ids, max_new_tokens=512, temperature=0.5)
response = tokenizer.decode(generated_ids[0], skip_special_tokens=True) # Aye, matey! The Pythagorean theorem is a nautical rule that helps us find the length of the third side of a triangle. ...
print(response)