Qwen2-VL-2B-clone-2

Runtime error

tuandunghcmut commited on Sep 9, 2024

Commit

c1fbf73

•

1 Parent(s): 92ccacb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import numpy as np
 import os
 HF_TOKEN = os.environ['HF_TOKEN']
-# subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 # models = {
 #     "Qwen/Qwen2-VL-2B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
@@ -37,7 +37,8 @@ models = {
         "Qwen/Qwen2-VL-72B-Instruct",
         trust_remote_code=True,
         token=HF_TOKEN,
-        torch_dtype=torch.bfloat16
     ).cuda().eval()
 }

 import os
 HF_TOKEN = os.environ['HF_TOKEN']
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 # models = {
 #     "Qwen/Qwen2-VL-2B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
         "Qwen/Qwen2-VL-72B-Instruct",
         trust_remote_code=True,
         token=HF_TOKEN,
+        torch_dtype=torch.bfloat16,
+        attn_implementation="flash_attention_2"
     ).cuda().eval()
 }