Ketengan-Diffusion-Lab commited on
Commit
93f8b15
1 Parent(s): 2fbbc3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -39
app.py CHANGED
@@ -4,7 +4,8 @@ import transformers
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from PIL import Image
6
  import warnings
7
- from accelerate import Accelerator
 
8
 
9
  # disable some warnings
10
  transformers.logging.set_verbosity_error()
@@ -16,19 +17,22 @@ accelerator = Accelerator()
16
 
17
  model_name = 'cognitivecomputations/dolphin-vision-72b'
18
 
19
- # Load model and tokenizer within main_process_first context
20
- with accelerator.main_process_first():
21
- model = AutoModelForCausalLM.from_pretrained(
22
- model_name,
23
- torch_dtype=torch.float16,
24
- device_map="auto",
25
- trust_remote_code=True
26
- )
27
 
28
- tokenizer = AutoTokenizer.from_pretrained(
29
- model_name,
30
- trust_remote_code=True
31
- )
 
 
 
 
 
 
 
 
32
 
33
  # Prepare model
34
  model = accelerator.prepare(model)
@@ -63,31 +67,25 @@ def inference(prompt, image, temperature, beam_size):
63
  use_cache=True
64
  )[0]
65
 
66
- # Gather output from all processes
67
- output_ids = accelerator.gather(output_ids)
68
-
69
  return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
70
 
71
- # Only create and launch Gradio interface on the main process
72
- if accelerator.is_main_process:
73
- with gr.Blocks() as demo:
74
- with gr.Row():
75
- with gr.Column():
76
- prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
77
- image_input = gr.Image(label="Image", type="pil")
78
- temperature_input = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
79
- beam_size_input = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Beam Size")
80
- submit_button = gr.Button("Submit")
81
- with gr.Column():
82
- output_text = gr.Textbox(label="Output")
83
-
84
- submit_button.click(
85
- fn=inference,
86
- inputs=[prompt_input, image_input, temperature_input, beam_size_input],
87
- outputs=output_text
88
- )
89
-
90
- demo.launch(share=True)
91
-
92
- # Wait for all processes to finish
93
- accelerator.wait_for_everyone()
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from PIL import Image
6
  import warnings
7
+ from accelerate import Accelerator, DistributedType
8
+ import os
9
 
10
  # disable some warnings
11
  transformers.logging.set_verbosity_error()
 
17
 
18
  model_name = 'cognitivecomputations/dolphin-vision-72b'
19
 
20
+ # Determine the number of GPUs available
21
+ num_gpus = torch.cuda.device_count()
22
+ print(f"Number of GPUs available: {num_gpus}")
 
 
 
 
 
23
 
24
+ # Load model and tokenizer
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ model_name,
27
+ torch_dtype=torch.float16,
28
+ device_map="auto",
29
+ trust_remote_code=True
30
+ )
31
+
32
+ tokenizer = AutoTokenizer.from_pretrained(
33
+ model_name,
34
+ trust_remote_code=True
35
+ )
36
 
37
  # Prepare model
38
  model = accelerator.prepare(model)
 
67
  use_cache=True
68
  )[0]
69
 
 
 
 
70
  return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
71
 
72
+ # Create Gradio interface
73
+ with gr.Blocks() as demo:
74
+ with gr.Row():
75
+ with gr.Column():
76
+ prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
77
+ image_input = gr.Image(label="Image", type="pil")
78
+ temperature_input = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
79
+ beam_size_input = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Beam Size")
80
+ submit_button = gr.Button("Submit")
81
+ with gr.Column():
82
+ output_text = gr.Textbox(label="Output")
83
+
84
+ submit_button.click(
85
+ fn=inference,
86
+ inputs=[prompt_input, image_input, temperature_input, beam_size_input],
87
+ outputs=output_text
88
+ )
89
+
90
+ # Launch the app
91
+ demo.launch()