Ketengan-Diffusion-Lab commited on
Commit
2fbbc3e
1 Parent(s): 9aeab55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -38
app.py CHANGED
@@ -4,30 +4,34 @@ import transformers
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from PIL import Image
6
  import warnings
 
7
 
8
  # disable some warnings
9
  transformers.logging.set_verbosity_error()
10
  transformers.logging.disable_progress_bar()
11
  warnings.filterwarnings('ignore')
12
 
13
- # Set device to GPU if available, else CPU
14
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
- print(f"Using device: {device}")
16
 
17
  model_name = 'cognitivecomputations/dolphin-vision-72b'
18
 
19
- # create model and load it to the specified device
20
- model = AutoModelForCausalLM.from_pretrained(
21
- model_name,
22
- torch_dtype=torch.float16,
23
- device_map="auto",
24
- trust_remote_code=True
25
- )
 
 
 
 
 
 
26
 
27
- tokenizer = AutoTokenizer.from_pretrained(
28
- model_name,
29
- trust_remote_code=True
30
- )
31
 
32
  def inference(prompt, image, temperature, beam_size):
33
  messages = [
@@ -40,18 +44,17 @@ def inference(prompt, image, temperature, beam_size):
40
  )
41
 
42
  text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
43
- input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to(device)
44
 
45
- image_tensor = model.process_images([image], model.config).to(device)
46
 
47
- # Add debug prints
48
- print(f"Device of model: {next(model.parameters()).device}")
49
- print(f"Device of input_ids: {input_ids.device}")
50
- print(f"Device of image_tensor: {image_tensor.device}")
51
 
52
  # generate
53
  with torch.cuda.amp.autocast():
54
- output_ids = model.generate(
55
  input_ids,
56
  images=image_tensor,
57
  max_new_tokens=1024,
@@ -60,23 +63,31 @@ def inference(prompt, image, temperature, beam_size):
60
  use_cache=True
61
  )[0]
62
 
 
 
 
63
  return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
64
 
65
- with gr.Blocks() as demo:
66
- with gr.Row():
67
- with gr.Column():
68
- prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
69
- image_input = gr.Image(label="Image", type="pil")
70
- temperature_input = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
71
- beam_size_input = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Beam Size")
72
- submit_button = gr.Button("Submit")
73
- with gr.Column():
74
- output_text = gr.Textbox(label="Output")
75
-
76
- submit_button.click(
77
- fn=inference,
78
- inputs=[prompt_input, image_input, temperature_input, beam_size_input],
79
- outputs=output_text
80
- )
 
 
 
 
81
 
82
- demo.launch(share=True)
 
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from PIL import Image
6
  import warnings
7
+ from accelerate import Accelerator
8
 
9
  # disable some warnings
10
  transformers.logging.set_verbosity_error()
11
  transformers.logging.disable_progress_bar()
12
  warnings.filterwarnings('ignore')
13
 
14
+ # Initialize Accelerator
15
+ accelerator = Accelerator()
 
16
 
17
  model_name = 'cognitivecomputations/dolphin-vision-72b'
18
 
19
+ # Load model and tokenizer within main_process_first context
20
+ with accelerator.main_process_first():
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_name,
23
+ torch_dtype=torch.float16,
24
+ device_map="auto",
25
+ trust_remote_code=True
26
+ )
27
+
28
+ tokenizer = AutoTokenizer.from_pretrained(
29
+ model_name,
30
+ trust_remote_code=True
31
+ )
32
 
33
+ # Prepare model
34
+ model = accelerator.prepare(model)
 
 
35
 
36
  def inference(prompt, image, temperature, beam_size):
37
  messages = [
 
44
  )
45
 
46
  text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
47
+ input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
48
 
49
+ image_tensor = model.process_images([image], model.config)
50
 
51
+ # Move tensors to the appropriate device
52
+ input_ids = input_ids.to(accelerator.device)
53
+ image_tensor = image_tensor.to(accelerator.device)
 
54
 
55
  # generate
56
  with torch.cuda.amp.autocast():
57
+ output_ids = accelerator.unwrap_model(model).generate(
58
  input_ids,
59
  images=image_tensor,
60
  max_new_tokens=1024,
 
63
  use_cache=True
64
  )[0]
65
 
66
+ # Gather output from all processes
67
+ output_ids = accelerator.gather(output_ids)
68
+
69
  return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
70
 
71
+ # Only create and launch Gradio interface on the main process
72
+ if accelerator.is_main_process:
73
+ with gr.Blocks() as demo:
74
+ with gr.Row():
75
+ with gr.Column():
76
+ prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
77
+ image_input = gr.Image(label="Image", type="pil")
78
+ temperature_input = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
79
+ beam_size_input = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Beam Size")
80
+ submit_button = gr.Button("Submit")
81
+ with gr.Column():
82
+ output_text = gr.Textbox(label="Output")
83
+
84
+ submit_button.click(
85
+ fn=inference,
86
+ inputs=[prompt_input, image_input, temperature_input, beam_size_input],
87
+ outputs=output_text
88
+ )
89
+
90
+ demo.launch(share=True)
91
 
92
+ # Wait for all processes to finish
93
+ accelerator.wait_for_everyone()