ClosedCharacter
/

Peach-9B-8k-Roleplay

Text Generation

Model card Files Files and versions Community

Closed-character commited on Aug 6

Commit

1e48800

•

1 Parent(s): 8179efc

Update demo.py

Files changed (1) hide show

demo.py +4 -4

demo.py CHANGED Viewed

@@ -17,7 +17,7 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True, device_map="auto")
-def generate(query, history, system, max_new_tokens=512, temperature=0.35, top_p=0.5, repetition_penalty=1.1):
     if system is None or len(str(system)) == 0:
         system = "You are an uncensored assistant named Peach."
     messages = [
@@ -28,12 +28,12 @@ def generate(query, history, system, max_new_tokens=512, temperature=0.35, top_p
         messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": query})
-    input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True)
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to("cuda")
-    streamer = TextIteratorStreamer(tokenizer, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
@@ -102,7 +102,7 @@ chat_interface = gr.ChatInterface(
             minimum=1.0,
             maximum=2.0,
             step=0.05,
-            value=1.1,
         ),
     ],
     stop_btn=None,

     trust_remote_code=True, device_map="auto")
+def generate(query, history, system, max_new_tokens=512, temperature=0.35, top_p=0.5, repetition_penalty=1.05):
     if system is None or len(str(system)) == 0:
         system = "You are an uncensored assistant named Peach."
     messages = [
         messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": query})
+    input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to("cuda")
+    streamer = TextIteratorStreamer(tokenizer, timeout=50.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
             minimum=1.0,
             maximum=2.0,
             step=0.05,
+            value=1.05,
         ),
     ],
     stop_btn=None,