minhdang commited on
Commit
ec6946b
1 Parent(s): 6140d08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -12,7 +12,7 @@ key = os.environ.get("key")
12
  from huggingface_hub import login
13
  login(key)
14
  from bitnet import replace_linears_in_hf
15
- os.system("pip install flash-attn --no-build-isolation")
16
  nf4_config = BitsAndBytesConfig(
17
  load_in_4bit=True,
18
  bnb_4bit_quant_type="nf4",
@@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
25
  model = AutoModelForCausalLM.from_pretrained(model_id,
26
  # load_in_8bit=True,
27
  quantization_config=nf4_config,
28
- attn_implementation="flash_attention_2",
29
  # torch_dtype = torch.bfloat16,
30
  device_map="auto"
31
  )
 
12
  from huggingface_hub import login
13
  login(key)
14
  from bitnet import replace_linears_in_hf
15
+ # os.system("pip install flash-attn --no-build-isolation")
16
  nf4_config = BitsAndBytesConfig(
17
  load_in_4bit=True,
18
  bnb_4bit_quant_type="nf4",
 
25
  model = AutoModelForCausalLM.from_pretrained(model_id,
26
  # load_in_8bit=True,
27
  quantization_config=nf4_config,
28
+ # attn_implementation="flash_attention_2",
29
  # torch_dtype = torch.bfloat16,
30
  device_map="auto"
31
  )