Spaces:
Build error
Build error
File size: 2,562 Bytes
789acc7 fd950ef 2a5c763 789acc7 fd950ef 5220358 fd950ef 41b96d9 fd950ef 2a5c763 fd950ef 2a5c763 5220358 4f9f0e6 f8d9f18 4f9f0e6 fd950ef 4f9f0e6 fd950ef 5220358 225c3f2 5220358 fd950ef 225c3f2 fd950ef cd44f8b 2a5c763 5220358 fd950ef 789acc7 5ee7893 fd950ef 789acc7 f8d9f18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from PIL import Image
import warnings
# disable some warnings
transformers.logging.set_verbosity_error()
transformers.logging.disable_progress_bar()
warnings.filterwarnings('ignore')
# Set device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model_name = 'cognitivecomputations/dolphin-vision-72b'
# Configure 8-bit quantization
quantization_config = BitsAndBytesConfig(
load_in_8bit=True,
llm_int8_threshold=6.0,
llm_int8_has_fp16_weight=False
)
# create model and load it to the specified device with 8-bit quantization
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=quantization_config,
device_map="auto", # This will automatically use the GPU if available
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
def inference(prompt, image):
messages = [
{"role": "user", "content": f'<image>\n{prompt}'}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to(device)
image_tensor = model.process_images([image], model.config).to(device)
# Add debug prints
print(f"Device of model: {next(model.parameters()).device}")
print(f"Device of input_ids: {input_ids.device}")
print(f"Device of image_tensor: {image_tensor.device}")
# generate
with torch.inference_mode():
output_ids = model.generate(
input_ids,
images=image_tensor,
max_new_tokens=1024,
use_cache=True
)[0]
return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
image_input = gr.Image(label="Image", type="pil")
submit_button = gr.Button("Submit")
with gr.Column():
output_text = gr.Textbox(label="Output")
submit_button.click(fn=inference, inputs=[prompt_input, image_input], outputs=output_text)
demo.launch(share=True) |