Spaces:

chihhuiho
/

VirtualTA

Sleeping

App Files Files Community

VirtualTA / app.py

bill83221

update

a5a23fd 7 months ago

raw

history blame contribute delete

1.93 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from peft import prepare_model_for_kbit_training
	from peft import LoraConfig, get_peft_model
	from peft import PeftModel, PeftConfig
	from datasets import load_dataset
	import transformers

	fixed_llm_name = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
	model = AutoModelForCausalLM.from_pretrained(fixed_llm_name,
	device_map="auto", # automatically figures out how to best use CPU + GPU for loading model
	trust_remote_code=False, # prevents running custom model files on your machine
	revision="main") # which version of model to use in repo

	tokenizer = AutoTokenizer.from_pretrained(fixed_llm_name, use_fast=True)

	config = PeftConfig.from_pretrained("chihhuiho/VirtualTA")
	model = PeftModel.from_pretrained(model, "chihhuiho/VirtualTA")


	intstructions_string = "Assume you are a virtual teaching assistant in the statistical and machine learning course. Your job is to communicate with students, answer technical questions and help the student to solve the problem. Please respond to the following post from a student."

	prompt_template = lambda comment: f"[INST] {intstructions_string} {comment} [/INST]"

	def predict(comment):
	prompt = prompt_template(comment)

	model.eval() # model in evaluation mode (dropout modules are deactivated)

	# tokenize input
	inputs = tokenizer(prompt, return_tensors="pt")

	# generate output
	outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=400, pad_token_id=tokenizer.eos_token_id)
	outputs = tokenizer.batch_decode(outputs)[0]
	outputs = outputs.split("[/INST]")[1]
	outputs = outputs.split("[INST]")[0]
	outputs = outputs.split("</s>")[0]
	return outputs


	iface = gr.Interface(fn=predict, inputs="text", outputs="text")
	iface.launch()