thobuiq commited on
Commit
d1a07f8
1 Parent(s): b4e7be1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -4,9 +4,20 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
5
  from threading import Thread
6
 
 
 
 
 
 
 
7
  # Loading the tokenizer and model from Hugging Face's model hub.
8
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
9
- model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", load_in_4bit = True)
 
 
 
 
 
10
 
11
  # using CUDA for an optimal experience
12
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
4
  from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
5
  from threading import Thread
6
 
7
+
8
+ bnb_config = BitsAndBytesConfig(
9
+ load_in_4bit=True,
10
+ bnb_4bit_quant_type="nf4",
11
+ bnb_4bit_use_double_quant=True,
12
+ )
13
  # Loading the tokenizer and model from Hugging Face's model hub.
14
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
15
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1",
16
+ load_in_4bit=True,
17
+ quantization_config=bnb_config,
18
+ torch_dtype=torch.bfloat16,
19
+ device_map="auto",
20
+ trust_remote_code=True)
21
 
22
  # using CUDA for an optimal experience
23
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')