Spaces:
Runtime error
Runtime error
tuandunghcmut
commited on
Commit
•
c1fbf73
1
Parent(s):
92ccacb
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ import numpy as np
|
|
10 |
import os
|
11 |
|
12 |
HF_TOKEN = os.environ['HF_TOKEN']
|
13 |
-
|
14 |
|
15 |
# models = {
|
16 |
# "Qwen/Qwen2-VL-2B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
|
@@ -37,7 +37,8 @@ models = {
|
|
37 |
"Qwen/Qwen2-VL-72B-Instruct",
|
38 |
trust_remote_code=True,
|
39 |
token=HF_TOKEN,
|
40 |
-
torch_dtype=torch.bfloat16
|
|
|
41 |
).cuda().eval()
|
42 |
|
43 |
}
|
|
|
10 |
import os
|
11 |
|
12 |
HF_TOKEN = os.environ['HF_TOKEN']
|
13 |
+
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
14 |
|
15 |
# models = {
|
16 |
# "Qwen/Qwen2-VL-2B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
|
|
|
37 |
"Qwen/Qwen2-VL-72B-Instruct",
|
38 |
trust_remote_code=True,
|
39 |
token=HF_TOKEN,
|
40 |
+
torch_dtype=torch.bfloat16,
|
41 |
+
attn_implementation="flash_attention_2"
|
42 |
).cuda().eval()
|
43 |
|
44 |
}
|