alfredplpl commited on
Commit
e32c4ee
1 Parent(s): a915b3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -8
app.py CHANGED
@@ -12,7 +12,7 @@ from threading import Thread
12
  DESCRIPTION = '''
13
  <div>
14
  <h1 style="text-align: center;">非公式Llama-3.1-Swallow-8B-Instruct-v0.1</h1>
15
- <p>tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1の非公式デモだよ。 <a href="https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1"><b>tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1</b></a>.</p>
16
  </div>
17
  '''
18
 
@@ -25,7 +25,7 @@ Built with Meta Llama 3.1
25
 
26
  PLACEHOLDER = """
27
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
28
- <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Meta llama3</h1>
29
  <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">なんでもきいてね</p>
30
  </div>
31
  """
@@ -48,10 +48,6 @@ h1 {
48
  # Load the tokenizer and model
49
  tokenizer = AutoTokenizer.from_pretrained("tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1")
50
  model = AutoModelForCausalLM.from_pretrained("tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
51
- terminators = [
52
- tokenizer.eos_token_id,
53
- tokenizer.convert_tokens_to_ids("<|eot_id|>")
54
- ]
55
 
56
  @spaces.GPU()
57
  def chat_llama3_8b(message: str,
@@ -87,7 +83,6 @@ def chat_llama3_8b(message: str,
87
  temperature=temperature,
88
  top_p=0.9,
89
  repetition_penalty=1.1,
90
- eos_token_id=terminators,
91
  )
92
  # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
93
  if temperature == 0:
@@ -119,7 +114,7 @@ with gr.Blocks(fill_height=True, css=css) as demo:
119
  gr.Slider(minimum=0,
120
  maximum=1,
121
  step=0.1,
122
- value=0.4,
123
  label="Temperature",
124
  render=False),
125
  gr.Slider(minimum=128,
 
12
  DESCRIPTION = '''
13
  <div>
14
  <h1 style="text-align: center;">非公式Llama-3.1-Swallow-8B-Instruct-v0.1</h1>
15
+ <p>tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1の非公式デモだよ。 <a href="https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1"><b>tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1</b></a>.</p>
16
  </div>
17
  '''
18
 
 
25
 
26
  PLACEHOLDER = """
27
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
28
+ <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Meta llama3.1</h1>
29
  <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">なんでもきいてね</p>
30
  </div>
31
  """
 
48
  # Load the tokenizer and model
49
  tokenizer = AutoTokenizer.from_pretrained("tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1")
50
  model = AutoModelForCausalLM.from_pretrained("tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
 
 
 
 
51
 
52
  @spaces.GPU()
53
  def chat_llama3_8b(message: str,
 
83
  temperature=temperature,
84
  top_p=0.9,
85
  repetition_penalty=1.1,
 
86
  )
87
  # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
88
  if temperature == 0:
 
114
  gr.Slider(minimum=0,
115
  maximum=1,
116
  step=0.1,
117
+ value=0.6,
118
  label="Temperature",
119
  render=False),
120
  gr.Slider(minimum=128,