m-ric HF staff commited on
Commit
ed5e872
β€’
1 Parent(s): 62a5f44

Make tokenizer more robuts

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -36,9 +36,13 @@ def count_string_tokens(string: str, model: str) -> int:
36
  try:
37
  encoding = tiktoken.encoding_for_model(model.split('/')[-1])
38
  except:
39
- try:
40
- encoding = tiktoken.encoding_for_model(model.split('/')[-2] + '/' + model.split('/')[-1])
41
- except KeyError:
 
 
 
 
42
  print(f"Model {model} not found. Using cl100k_base encoding.")
43
  encoding = tiktoken.get_encoding("cl100k_base")
44
  return len(encoding.encode(string))
@@ -179,7 +183,7 @@ with gr.Blocks(css="""
179
  max_price = gr.Slider(label="Max Price per Input Token", minimum=0, maximum=0.001, step=0.00001, value=0.001)
180
  litellm_provider = gr.Dropdown(label="Inference Provider", choices=["Any"] + TOKEN_COSTS['litellm_provider'].unique().tolist(), value="Any")
181
 
182
- model = gr.Dropdown(label="Models (at least 1)", choices=TOKEN_COSTS['model'].tolist(), value="anyscale/meta-llama/Meta-Llama-3-8B-Instruct", multiselect=True)
183
 
184
  gr.Markdown("## Resulting Costs πŸ‘‡")
185
 
 
36
  try:
37
  encoding = tiktoken.encoding_for_model(model.split('/')[-1])
38
  except:
39
+ if len(model.split('/')) > 1:
40
+ try:
41
+ encoding = tiktoken.encoding_for_model(model.split('/')[-2] + '/' + model.split('/')[-1])
42
+ except KeyError:
43
+ print(f"Model {model} not found. Using cl100k_base encoding.")
44
+ encoding = tiktoken.get_encoding("cl100k_base")
45
+ else:
46
  print(f"Model {model} not found. Using cl100k_base encoding.")
47
  encoding = tiktoken.get_encoding("cl100k_base")
48
  return len(encoding.encode(string))
 
183
  max_price = gr.Slider(label="Max Price per Input Token", minimum=0, maximum=0.001, step=0.00001, value=0.001)
184
  litellm_provider = gr.Dropdown(label="Inference Provider", choices=["Any"] + TOKEN_COSTS['litellm_provider'].unique().tolist(), value="Any")
185
 
186
+ model = gr.Dropdown(label="Models (at least 1)", choices=TOKEN_COSTS['model'].tolist(), value=["anyscale/meta-llama/Meta-Llama-3-8B-Instruct", "gpt-4o", "claude-3-sonnet-20240229"], multiselect=True)
187
 
188
  gr.Markdown("## Resulting Costs πŸ‘‡")
189