eswardivi commited on
Commit
2cdab2a
1 Parent(s): e9cb74c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -7
app.py CHANGED
@@ -14,7 +14,7 @@ import time
14
  token = os.environ["HF_TOKEN"]
15
 
16
  quantization_config = BitsAndBytesConfig(
17
- load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
18
  )
19
 
20
  model = AutoModelForCausalLM.from_pretrained(
@@ -34,7 +34,7 @@ else:
34
 
35
 
36
  @spaces.GPU(duration=150)
37
- def chat(message, history, temperature, top_p, top_k, max_tokens):
38
  start_time = time.time()
39
  chat = []
40
  for item in history:
@@ -52,7 +52,6 @@ def chat(message, history, temperature, top_p, top_k, max_tokens):
52
  streamer=streamer,
53
  max_new_tokens=max_tokens,
54
  do_sample=True,
55
- top_p=top_p,
56
  top_k=top_k,
57
  temperature=temperature,
58
  )
@@ -86,9 +85,7 @@ demo = gr.ChatInterface(
86
  gr.Slider(
87
  minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
88
  ),
89
- gr.Slider(
90
- minimum=0, maximum=1, step=0.1, value=0.95, label="top_p", render=False
91
- ),
92
  gr.Slider(
93
  minimum=1, maximum=10000, step=5, value=1000, label="top_k", render=False
94
  ),
@@ -103,6 +100,6 @@ demo = gr.ChatInterface(
103
  ],
104
  stop_btn="Stop Generation",
105
  title="Chat With LLMs",
106
- description="Now Running [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) in 4bit"
107
  )
108
  demo.launch()
 
14
  token = os.environ["HF_TOKEN"]
15
 
16
  quantization_config = BitsAndBytesConfig(
17
+ load_in_8bit=True, bnb_4bit_compute_dtype=torch.float16
18
  )
19
 
20
  model = AutoModelForCausalLM.from_pretrained(
 
34
 
35
 
36
  @spaces.GPU(duration=150)
37
+ def chat(message, history, temperature,do_sample, top_k, max_tokens):
38
  start_time = time.time()
39
  chat = []
40
  for item in history:
 
52
  streamer=streamer,
53
  max_new_tokens=max_tokens,
54
  do_sample=True,
 
55
  top_k=top_k,
56
  temperature=temperature,
57
  )
 
85
  gr.Slider(
86
  minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
87
  ),
88
+ gr.Checkbox(label="Sampling",value=True),
 
 
89
  gr.Slider(
90
  minimum=1, maximum=10000, step=5, value=1000, label="top_k", render=False
91
  ),
 
100
  ],
101
  stop_btn="Stop Generation",
102
  title="Chat With LLMs",
103
+ description="Now Running [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) in 8bit"
104
  )
105
  demo.launch()