Spaces:

featherless-ai
/

try-this-model

Running

App Files Files Community

wxgeorge commited on Sep 6

Commit

30bad6e

•

1 Parent(s): 68492c3

:sparkles: support mattshumer's Reflection

Browse files

Files changed (2) hide show

app.py +67 -32
model-cache.json +1 -0

app.py CHANGED Viewed

@@ -5,6 +5,9 @@ import json
 import functools
 import random
 import datetime
 api_key = os.environ.get('FEATHERLESS_API_KEY')
 client = OpenAI(
@@ -18,24 +21,50 @@ def respond(message, history, model):
         history_openai_format.append({"role": "user", "content": human })
         history_openai_format.append({"role": "assistant", "content":assistant})
     history_openai_format.append({"role": "user", "content": message})
-    response = client.chat.completions.create(
-        model=model,
-        messages= history_openai_format,
-        temperature=1.0,
-        stream=True,
-        max_tokens=2000,
-        extra_headers={
-            'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
-            'X-Title': "HF's missing inference widget"
-        }
-    )
-    partial_message = ""
-    for chunk in response:
-        if chunk.choices[0].delta.content is not None:
-              partial_message = partial_message + chunk.choices[0].delta.content
-              yield partial_message
 logo = open('./logo.svg').read()
@@ -69,26 +98,32 @@ def build_model_choices():
             continue
         all_choices += [ (f"{model_id} ({model_class})", model_id) for model_id in model_cache[model_class] ]
     return all_choices
 model_choices = build_model_choices()
 def initial_model(referer=None):
-    if referer == 'http://127.0.0.1:7860/':
-        return 'Sao10K/Venomia-1.1-m7'
-    if referer and referer.startswith("https://huggingface.co/"):
-        possible_model = referer[23:]
-        full_model_list = functools.reduce(lambda x,y: x+y, model_cache.values(), [])
-        model_is_supported = possible_model in full_model_list
-        if model_is_supported:
-            return possible_model
-    # let's use a random but different model each day.
-    key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
-    o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
-    return o.choice(model_choices)[1]
 title_text="HuggingFace's missing inference widget"
 css = """

 import functools
 import random
 import datetime
+from transformers import AutoTokenizer
+reflection_tokenizer = AutoTokenizer.from_pretrained("mattshumer/Reflection-Llama-3.1-70B")
 api_key = os.environ.get('FEATHERLESS_API_KEY')
 client = OpenAI(
         history_openai_format.append({"role": "user", "content": human })
         history_openai_format.append({"role": "assistant", "content":assistant})
     history_openai_format.append({"role": "user", "content": message})
+    if model == "mattshumer/Reflection-Llama-3.1-70B":
+        # chat/completions not working for this model;
+        # apply chat template locally
+        response = client.completions.create(
+            model=model,
+            prompt=reflection_tokenizer.apply_chat_template(history_openai_format, tokenize=False),
+            temperature=1.0,
+            stream=True,
+            max_tokens=2000,
+            extra_headers={
+                'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
+                'X-Title': "HF's missing inference widget"
+            }
+        )
+        # debugger_ran = False
+        partial_message = ""
+        for chunk in response:
+            # if not debugger_ran:
+            #     import code
+            #     code.InteractiveConsole(locals=locals()).interact()
+            #     debugger_ran = True
+            if chunk.choices[0].text is not None:
+                partial_message = partial_message + chunk.choices[0].text
+                yield partial_message
+    else:
+        response = client.chat.completions.create(
+            model=model,
+            messages= history_openai_format,
+            temperature=1.0,
+            stream=True,
+            max_tokens=2000,
+            extra_headers={
+                'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
+                'X-Title': "HF's missing inference widget"
+            }
+        )
+        partial_message = ""
+        for chunk in response:
+            if chunk.choices[0].delta.content is not None:
+                  partial_message = partial_message + chunk.choices[0].delta.content
+                  yield partial_message
 logo = open('./logo.svg').read()
             continue
         all_choices += [ (f"{model_id} ({model_class})", model_id) for model_id in model_cache[model_class] ]
+    # and add one more ...
+    model_class = "llama3-70b-8k"
+    model_id = "mattshumer/Reflection-Llama-3.1-70B"
+    all_choices += [(f"{model_id} ({model_class})", model_id)]
     return all_choices
 model_choices = build_model_choices()
 def initial_model(referer=None):
+    return "mattshumer/Reflection-Llama-3.1-70B"
+    # if referer == 'http://127.0.0.1:7860/':
+    #     return 'Sao10K/Venomia-1.1-m7'
+    # if referer and referer.startswith("https://huggingface.co/"):
+    #     possible_model = referer[23:]
+    #     full_model_list = functools.reduce(lambda x,y: x+y, model_cache.values(), [])
+    #     model_is_supported = possible_model in full_model_list
+    #     if model_is_supported:
+    #         return possible_model
+    # # let's use a random but different model each day.
+    # key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
+    # o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
+    # return o.choice(model_choices)[1]
 title_text="HuggingFace's missing inference widget"
 css = """

model-cache.json CHANGED Viewed

@@ -515,6 +515,7 @@
     "jondurbin/airoboros-70b-3.3",
     "jondurbin/airoboros-dpo-70b-3.3",
     "m42-health/Llama3-Med42-70B",
     "meta-llama/Meta-Llama-3-70B-Instruct",
     "meta-llama/Meta-Llama-3.1-70B-Instruct",
     "migtissera/Llama-3-70B-Synthia-v3.5",

     "jondurbin/airoboros-70b-3.3",
     "jondurbin/airoboros-dpo-70b-3.3",
     "m42-health/Llama3-Med42-70B",
+    "mattshumer/Reflection-LLama-3.1-70B",
     "meta-llama/Meta-Llama-3-70B-Instruct",
     "meta-llama/Meta-Llama-3.1-70B-Instruct",
     "migtissera/Llama-3-70B-Synthia-v3.5",