Dolphin-Inference-MGPU

Build error

Ketengan-Diffusion-Lab commited on Sep 14, 2024

Commit

5220358

•

1 Parent(s): 225c3f2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,17 +10,17 @@ transformers.logging.set_verbosity_error()
 transformers.logging.disable_progress_bar()
 warnings.filterwarnings('ignore')
-# Force CPU usage
-device = torch.device("cpu")
-torch.set_default_tensor_type(torch.FloatTensor)
 model_name = 'cognitivecomputations/dolphin-vision-7b'
-# create model and load it to CPU
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    torch_dtype=torch.float32,  # Use float32 for CPU
-    device_map={'': device},
     trust_remote_code=True
 )
@@ -40,9 +40,9 @@ def inference(prompt, image):
     )
     text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
-    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
-    image_tensor = model.process_images([image], model.config)
     # Add debug prints
     print(f"Device of model: {next(model.parameters()).device}")
@@ -50,12 +50,13 @@ def inference(prompt, image):
     print(f"Device of image_tensor: {image_tensor.device}")
     # generate
-    output_ids = model.generate(
-        input_ids,
-        images=image_tensor,
-        max_new_tokens=2048,
-        use_cache=True
-    )[0]
     return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()

 transformers.logging.disable_progress_bar()
 warnings.filterwarnings('ignore')
+# Set device to GPU if available, else CPU
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
 model_name = 'cognitivecomputations/dolphin-vision-7b'
+# create model and load it to the specified device
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    torch_dtype=torch.float16,
+    device_map="auto",  # This will automatically use the GPU if available
     trust_remote_code=True
 )
     )
     text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
+    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to(device)
+    image_tensor = model.process_images([image], model.config).to(device)
     # Add debug prints
     print(f"Device of model: {next(model.parameters()).device}")
     print(f"Device of image_tensor: {image_tensor.device}")
     # generate
+    with torch.cuda.amp.autocast():
+        output_ids = model.generate(
+            input_ids,
+            images=image_tensor,
+            max_new_tokens=1024,
+            use_cache=True
+        )[0]
     return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()