eswardivi commited on
Commit
8ea3940
1 Parent(s): 2cdab2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -14,7 +14,7 @@ import time
14
  token = os.environ["HF_TOKEN"]
15
 
16
  quantization_config = BitsAndBytesConfig(
17
- load_in_8bit=True, bnb_4bit_compute_dtype=torch.float16
18
  )
19
 
20
  model = AutoModelForCausalLM.from_pretrained(
@@ -100,6 +100,6 @@ demo = gr.ChatInterface(
100
  ],
101
  stop_btn="Stop Generation",
102
  title="Chat With LLMs",
103
- description="Now Running [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) in 8bit"
104
  )
105
  demo.launch()
 
14
  token = os.environ["HF_TOKEN"]
15
 
16
  quantization_config = BitsAndBytesConfig(
17
+ load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
18
  )
19
 
20
  model = AutoModelForCausalLM.from_pretrained(
 
100
  ],
101
  stop_btn="Stop Generation",
102
  title="Chat With LLMs",
103
+ description="Now Running [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) in 4bit"
104
  )
105
  demo.launch()