Multimodal-CoT

Runtime error

App Files Files Community

Multimodal-CoT / app.py

cooelf

update

a6dac9a over 1 year ago

raw

history blame

No virus

5.77 kB

	import string
	import gradio as gr
	import requests
	import torch
	from PIL import Image

	rationale_model_dir = "cooelf/MM-CoT-UnifiedQA-Base-Rationale-Joint"
	vit_model = timm.create_model("vit_base_patch16_384", pretrained=True, num_classes=0)
	vit_model.eval()
	config = resolve_data_config({}, model=vit_model)
	transform = create_transform(**config)
	tokenizer = T5Tokenizer.from_pretrained(rationale_model_dir)
	r_model = T5ForMultimodalGeneration.from_pretrained(rationale_model_dir, patch_size=(577, 768))

	def inference_chat(input_image,input_text):
	with torch.no_grad():
	img = Image.open(input_image).convert("RGB")
	input = transform(img).unsqueeze(0)
	out = vit_model.forward_features(input)
	image_features = out.detach()

	input_ids = tokenizer(input_text, return_tensors='pt', padding=True).input_ids
	source = tokenizer.batch_encode_plus(
	[input_text],
	max_length=512,
	pad_to_max_length=True,
	truncation=True,
	padding="max_length",
	return_tensors="pt",
	)
	source_ids = source["input_ids"]
	source_mask = source["attention_mask"]
	rationale = r_model.generate(
	input_ids=source_ids,
	attention_mask=source_mask,
	image_ids=image_features,
	max_length=512,
	num_beams=1,
	do_sample=False
	)
	gpt3_out = tokenizer.batch_decode(rationale, skip_special_tokens=True)[0]
	gpt3_out1 = gpt3_out
	return out[0][0], gpt3_out,gpt3_out1


	title = """# VQA with VLE and LLM"""
	description = """VLE (Visual-Language Encoder) is an image-text multimodal understanding model built on the pre-trained text and image encoders. See https://github.com/iflytek/VLE for more details.
	We demonstrate visual question answering systems built with VLE and LLM."""
	description1 = """VQA: The image and the question are fed to a VQA model (VLEForVQA) and the model predicts the answer.

	VQA+LLM: We feed the caption, question, and answers predicted by the VQA model to the LLM and ask the LLM to generate the final answer. The outptus from VQA+LLM may vary due to the decoding strategy of the LLM."""

	with gr.Blocks(
	css="""
	.message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}
	#component-21 > div.wrap.svelte-w6rprc {height: 600px;}
	"""
	) as iface:
	state = gr.State([])
	#caption_output = None
	gr.Markdown(title)
	gr.Markdown(description)
	#gr.Markdown(article)

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(type="pil",label="VQA Image Input")
	with gr.Row():
	with gr.Column(scale=1):
	chat_input = gr.Textbox(lines=1, label="VQA Question Input")
	with gr.Row():
	clear_button = gr.Button(value="Clear", interactive=True,width=30)
	submit_button = gr.Button(
	value="Submit", interactive=True, variant="primary"
	)
	'''
	cap_submit_button = gr.Button(
	value="Submit_CAP", interactive=True, variant="primary"
	)
	gpt3_submit_button = gr.Button(
	value="Submit_GPT3", interactive=True, variant="primary"
	)
	'''
	with gr.Column():
	gr.Markdown(description1)
	caption_output = gr.Textbox(lines=0, label="VQA")
	caption_output_v1 = gr.Textbox(lines=0, label="VQA + LLM (short answer)")
	gpt3_output_v1 = gr.Textbox(lines=0, label="VQA+LLM (long answer)")

	chat_input.submit(
	inference_chat,
	[
	image_input,
	chat_input,
	],
	[ caption_output,gpt3_output_v1,caption_output_v1],
	)
	clear_button.click(
	lambda: ("", [],"","",""),
	[],
	[chat_input, state,caption_output,gpt3_output_v1,caption_output_v1],
	queue=False,
	)
	submit_button.click(
	inference_chat,
	[
	image_input,
	chat_input,
	],
	[caption_output,gpt3_output_v1,caption_output_v1],
	)
	examples=[['api/61.png',"Think about the magnetic force between the magnets in each pair. Which of the following statements is true?","The images below show two pairs of magnets. The magnets in different pairs do not affect each other. All the magnets shown are made of the same material, but some of them are different sizes and shapes.","(A) The magnitude of the magnetic force is the same in both pairs. (B) The magnitude of the magnetic force is smaller in Pair 1. (C) The magnitude of the magnetic force is smaller in Pair 2.","Magnet sizes affect the magnitude of the magnetic force. Imagine magnets that are the same shape and made of the same material. The smaller the magnets, the smaller the magnitude of the magnetic force between them.nMagnet A is the same size in both pairs. But Magnet B is smaller in Pair 2 than in Pair 1. So, the magnitude of the magnetic force is smaller in Pair 2 than in Pair 1."],
	examples = gr.Examples(
	examples=examples,inputs=[image_input, chat_input,caption_output,caption_output_v1,gpt3_output_v1],
	)

	iface.queue(concurrency_count=1, api_open=False, max_size=10)
	iface.launch(enable_queue=True)