Dolphin-Inference-MGPU

Build error

Ketengan-Diffusion-Lab commited on Sep 14, 2024

Commit

4f9f0e6

verified ·

1 Parent(s): f4d3338

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,20 +10,23 @@ transformers.logging.set_verbosity_error()
 transformers.logging.disable_progress_bar()
 warnings.filterwarnings('ignore')
-# set device
-torch.set_default_device('cuda')  # or 'cpu'
 model_name = 'cognitivecomputations/dolphin-vision-7b'
-# create model
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float16,
-    device_map='auto',
-    trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
-    trust_remote_code=True)
 def inference(prompt, image):
     messages = [
@@ -39,12 +42,12 @@ def inference(prompt, image):
     input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
-    image_tensor = model.process_images([image], model.config).to(dtype=model.dtype, device=model.device)
-    # Generate with autocast for mixed precision on GPU
-    with torch.cuda.amp.autocast():
         output_ids = model.generate(
-            input_ids.to(model.device), # Move input_ids to GPU
             images=image_tensor,
             max_new_tokens=2048,
             use_cache=True

 transformers.logging.disable_progress_bar()
 warnings.filterwarnings('ignore')
+# set device to a specific GPU (e.g., GPU 0)
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model_name = 'cognitivecomputations/dolphin-vision-7b'
+# create model and load it to the specified device
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float16,
+    # device_map='auto',  # Remove auto device mapping
+    trust_remote_code=True
+).to(device) # Load the model to the specified device
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
+    trust_remote_code=True
+)
 def inference(prompt, image):
     messages = [
     input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
+    image_tensor = model.process_images([image], model.config).to(dtype=model.dtype, device=device)
+    # Generate with autocast for mixed precision on the specified GPU
+    with torch.cuda.amp.autocast():
         output_ids = model.generate(
+            input_ids.to(device),
             images=image_tensor,
             max_new_tokens=2048,
             use_cache=True