Spaces:

IDEA-CCNL
/

Ziya-BLIP2-14B-Visual-v1-Demo

Runtime error

App Files Files Community

Ziya-BLIP2-14B-Visual-v1-Demo / launch.py

wuxiaojun

指定BNB_CUDA_VERSION in bitsandbytes

d9c12fa over 1 year ago

raw

history blame contribute delete

8.22 kB

	#!/usr/bin/env python
	# this code modify from https://huggingface.co/spaces/lykeven/visualglm-6b
	import gradio as gr
	import re
	from PIL import Image
	import torch
	from io import BytesIO
	import hashlib
	import os
	from transformers import LlamaForCausalLM, LlamaTokenizer, BlipImageProcessor, BitsAndBytesConfig, AutoModelForCausalLM

	DESCRIPTION = '''# <a href="https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1">Ziya-Blip2-14B</a>'''

	MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.\nHint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.'
	MAINTENANCE_NOTICE2 = '提示1: 如果应用报了“Something went wrong, connection error out”的错误，请关闭代理并重试。\n提示2: 如果你上传了很大的图片，比如10MB大小，那将需要一些时间来上传和处理，请耐心等待。'

	NOTES = 'This app is adapted from <a href="https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1">https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1</a>. It would be recommended to check out the repo if you want to see the detail of our model. And most of the codes attach to this demo are modify from <a href="https://huggingface.co/spaces/lykeven/visualglm-6b">lykeven/visualglm-6b</a>.'

	import json

	default_chatbox = []


	def is_chinese(text):
	zh_pattern = re.compile(u'[\u4e00-\u9fa5]+')
	return zh_pattern.search(text)

	AUTH_TOKEN = os.getenv("AUTH_TOKEN")

	LM_MODEL_PATH = "wuxiaojun/Ziya-LLaMA-13B-v1"
	# LM_MODEL_PATH = "/cognitive_comp/wuxiaojun/pretrained/pytorch/huggingface/Ziya-LLaMA-13B-v1"
	lm_model = LlamaForCausalLM.from_pretrained(
	LM_MODEL_PATH,
	device_map="auto",
	torch_dtype=torch.float16,
	use_auth_token=AUTH_TOKEN,
	quantization_config=BitsAndBytesConfig(load_in_4bit=True))

	TOKENIZER_PATH = "IDEA-CCNL/Ziya-LLaMA-13B-v1"
	# TOKENIZER_PATH = "/cognitive_comp/wuxiaojun/pretrained/pytorch/huggingface/Ziya-LLaMA-13B-v1"
	# tokenizer = LlamaTokenizer.from_pretrained(LM_MODEL_PATH, use_auth_token=AUTH_TOKEN)
	tokenizer = LlamaTokenizer.from_pretrained(TOKENIZER_PATH)

	# visual model
	OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
	OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711]
	# demo.py is in the project path, so we can use local path ".". Otherwise you should use "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1"
	visual_model_path = "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1"
	# visual_model_path = "/cognitive_comp/wuxiaojun/pretrained/pytorch/huggingface/Ziya-BLIP2-14B-Visual-v1"
	model = AutoModelForCausalLM.from_pretrained(
	visual_model_path,
	trust_remote_code=True, use_auth_token=AUTH_TOKEN,
	torch_dtype=torch.float16)
	model.cuda() # if you use on cpu, comment this line
	model.language_model = lm_model
	image_size = model.config.vision_config.image_size
	image_processor = BlipImageProcessor(
	size={"height": image_size, "width": image_size},
	image_mean=OPENAI_CLIP_MEAN,
	image_std=OPENAI_CLIP_STD,
	)

	def post(
	input_text,
	temperature,
	top_p,
	image_prompt,
	result_previous,
	hidden_image
	):
	result_text = [(ele[0], ele[1]) for ele in result_previous]
	previous_querys = []
	previous_outputs = []
	for i in range(len(result_text)-1, -1, -1):
	if result_text[i][0] == "":
	del result_text[i]
	else:
	previous_querys.append(result_text[i][0])
	previous_outputs.append(result_text[i][1])

	is_zh = is_chinese(input_text)

	if image_prompt is None:
	print("Image empty")
	if is_zh:
	result_text.append((input_text, '图片为空！请上传图片并重试。'))
	else:
	result_text.append((input_text, 'Image empty! Please upload a image and retry.'))
	return input_text, result_text, hidden_image
	elif input_text == "":
	print("Text empty")
	result_text.append((input_text, 'Text empty! Please enter text and retry.'))
	return "", result_text, hidden_image

	generate_config = {
	"max_new_tokens": 128,
	"top_p": top_p,
	"temperature": temperature,
	"repetition_penalty": 1.18,
	}
	img = Image.open(image_prompt)
	pixel_values = image_processor(
	img,
	return_tensors="pt").pixel_values.to(
	model.device).to(model.dtype)
	output_buffer = BytesIO()
	img.save(output_buffer, "PNG")
	byte_data = output_buffer.getvalue()
	md = hashlib.md5()
	md.update(byte_data)
	img_hash = md.hexdigest()
	if img_hash != hidden_image:
	previous_querys = []
	previous_outputs = []
	result_text = []

	answer = model.chat(
	tokenizer=tokenizer,
	pixel_values=pixel_values,
	query=input_text,
	previous_querys=previous_querys,
	previous_outputs=previous_outputs,
	**generate_config,
	)

	result_text.append((input_text, answer))
	print(result_text)
	return "", result_text, img_hash


	def clear_fn(value):
	return "", default_chatbox, None

	def clear_fn2(value):
	return default_chatbox

	def io_fn(a, b, c):
	print(f"call io_fn")
	return a, b


	def change_language(value):
	if value == "Change hint to English":
	return "提示变为中文", MAINTENANCE_NOTICE1
	else:
	return "Change hint to English", MAINTENANCE_NOTICE2


	def main():
	gr.close_all()
	examples = []
	with open("./examples/example_inputs.jsonl") as f:
	for line in f:
	data = json.loads(line)
	examples.append(data)


	with gr.Blocks(css='style.css') as demo:

	with gr.Row():
	with gr.Column(scale=4.5):
	with gr.Group():
	input_text = gr.Textbox(label='Input Text', placeholder='Please enter text prompt below and press ENTER.')
	with gr.Row():
	run_button = gr.Button('Generate')
	clear_button = gr.Button('Clear')

	image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None)
	with gr.Row():
	temperature = gr.Slider(maximum=1, value=0.7, minimum=0, label='Temperature')
	top_p = gr.Slider(maximum=1, value=0.1, minimum=0, label='Top P')
	with gr.Group():
	with gr.Row():
	with gr.Column(scale=7):
	maintenance_notice = gr.Markdown(MAINTENANCE_NOTICE1)
	with gr.Column(scale=2):
	change_button = gr.Button('Change hint to English', visible=False)
	with gr.Column(scale=5.5):
	result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[]).style(height=550)
	hidden_image_hash = gr.Textbox(visible=False)

	gr_examples = gr.Examples(examples=[[example["text"], example["image"]] for example in examples],
	inputs=[input_text, image_prompt],
	label="Example Inputs (Click to insert an examplet into the input box)",
	examples_per_page=3)

	gr.Markdown(NOTES)

	print(gr.__version__)
	run_button.click(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash],
	outputs=[input_text, result_text, hidden_image_hash])
	input_text.submit(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash],
	outputs=[input_text, result_text, hidden_image_hash])
	clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt])
	image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
	image_prompt.clear(fn=clear_fn2, inputs=clear_button, outputs=[result_text])

	print(gr.__version__)

	demo.queue(concurrency_count=10)
	demo.launch(server_name="0.0.0.0")


	if __name__ == '__main__':
	main()