Ketengan-Diffusion-Lab commited on
Commit
2a5c763
1 Parent(s): 41b96d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import torch
3
  import transformers
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from PIL import Image
6
  import warnings
7
 
@@ -16,10 +16,17 @@ print(f"Using device: {device}")
16
 
17
  model_name = 'cognitivecomputations/dolphin-vision-72b'
18
 
19
- # create model and load it to the specified device
 
 
 
 
 
 
 
20
  model = AutoModelForCausalLM.from_pretrained(
21
  model_name,
22
- torch_dtype=torch.float16,
23
  device_map="auto", # This will automatically use the GPU if available
24
  trust_remote_code=True
25
  )
@@ -50,7 +57,7 @@ def inference(prompt, image):
50
  print(f"Device of image_tensor: {image_tensor.device}")
51
 
52
  # generate
53
- with torch.cuda.amp.autocast():
54
  output_ids = model.generate(
55
  input_ids,
56
  images=image_tensor,
 
1
  import gradio as gr
2
  import torch
3
  import transformers
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
  from PIL import Image
6
  import warnings
7
 
 
16
 
17
  model_name = 'cognitivecomputations/dolphin-vision-72b'
18
 
19
+ # Configure 8-bit quantization
20
+ quantization_config = BitsAndBytesConfig(
21
+ load_in_8bit=True,
22
+ llm_int8_threshold=6.0,
23
+ llm_int8_has_fp16_weight=False
24
+ )
25
+
26
+ # create model and load it to the specified device with 8-bit quantization
27
  model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
29
+ quantization_config=quantization_config,
30
  device_map="auto", # This will automatically use the GPU if available
31
  trust_remote_code=True
32
  )
 
57
  print(f"Device of image_tensor: {image_tensor.device}")
58
 
59
  # generate
60
+ with torch.inference_mode():
61
  output_ids = model.generate(
62
  input_ids,
63
  images=image_tensor,