Ketengan-Diffusion-Lab commited on
Commit
4f9f0e6
·
verified ·
1 Parent(s): f4d3338

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -10,20 +10,23 @@ transformers.logging.set_verbosity_error()
10
  transformers.logging.disable_progress_bar()
11
  warnings.filterwarnings('ignore')
12
 
13
- # set device
14
- torch.set_default_device('cuda') # or 'cpu'
15
 
16
  model_name = 'cognitivecomputations/dolphin-vision-7b'
17
 
18
- # create model
19
  model = AutoModelForCausalLM.from_pretrained(
20
  model_name,
21
  torch_dtype=torch.float16,
22
- device_map='auto',
23
- trust_remote_code=True)
 
 
24
  tokenizer = AutoTokenizer.from_pretrained(
25
  model_name,
26
- trust_remote_code=True)
 
27
 
28
  def inference(prompt, image):
29
  messages = [
@@ -39,12 +42,12 @@ def inference(prompt, image):
39
  input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
40
 
41
 
42
- image_tensor = model.process_images([image], model.config).to(dtype=model.dtype, device=model.device)
43
 
44
- # Generate with autocast for mixed precision on GPU
45
- with torch.cuda.amp.autocast():
46
  output_ids = model.generate(
47
- input_ids.to(model.device), # Move input_ids to GPU
48
  images=image_tensor,
49
  max_new_tokens=2048,
50
  use_cache=True
 
10
  transformers.logging.disable_progress_bar()
11
  warnings.filterwarnings('ignore')
12
 
13
+ # set device to a specific GPU (e.g., GPU 0)
14
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
15
 
16
  model_name = 'cognitivecomputations/dolphin-vision-7b'
17
 
18
+ # create model and load it to the specified device
19
  model = AutoModelForCausalLM.from_pretrained(
20
  model_name,
21
  torch_dtype=torch.float16,
22
+ # device_map='auto', # Remove auto device mapping
23
+ trust_remote_code=True
24
+ ).to(device) # Load the model to the specified device
25
+
26
  tokenizer = AutoTokenizer.from_pretrained(
27
  model_name,
28
+ trust_remote_code=True
29
+ )
30
 
31
  def inference(prompt, image):
32
  messages = [
 
42
  input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
43
 
44
 
45
+ image_tensor = model.process_images([image], model.config).to(dtype=model.dtype, device=device)
46
 
47
+ # Generate with autocast for mixed precision on the specified GPU
48
+ with torch.cuda.amp.autocast():
49
  output_ids = model.generate(
50
+ input_ids.to(device),
51
  images=image_tensor,
52
  max_new_tokens=2048,
53
  use_cache=True