Spaces:

zetavg
/

LLaMA-LoRA-Tuner-UI-Demo

Runtime error

App Files Files Community

zetavg commited on Apr 6, 2023

Commit

4ac0d6a

•

1 Parent(s): f79f0d5

add possible missing model configs and update ui

Browse files

Files changed (4) hide show

llama_lora/models.py +5 -0
llama_lora/ui/inference_ui.py +42 -9
llama_lora/ui/main_page.py +9 -1
llama_lora/ui/tokenizer_ui.py +82 -0

llama_lora/models.py CHANGED Viewed

@@ -89,6 +89,11 @@ def load_base_model():
                 base_model, device_map={"": device}, low_cpu_mem_usage=True
             )
 def unload_models():
     del Global.loaded_base_model

                 base_model, device_map={"": device}, low_cpu_mem_usage=True
             )
+    # unwind broken decapoda-research config
+    model.config.pad_token_id = tokenizer.pad_token_id = 0  # unk
+    model.config.bos_token_id = 1
+    model.config.eos_token_id = 2
 def unload_models():
     del Global.loaded_base_model

llama_lora/ui/inference_ui.py CHANGED Viewed

@@ -16,6 +16,8 @@ from ..utils.callbacks import Iteratorize, Stream
 device = get_device()
 def do_inference(
     lora_model_name,
@@ -29,6 +31,7 @@ def do_inference(
     repetition_penalty=1.2,
     max_new_tokens=128,
     stream_output=False,
     progress=gr.Progress(track_tqdm=True),
 ):
     try:
@@ -47,7 +50,7 @@ def do_inference(
             message = f"Currently in UI dev mode, not running actual inference.\n\nLoRA model: {lora_model_name}\n\nYour prompt is:\n\n{prompt}"
             print(message)
             time.sleep(1)
-            yield message
             return
         if lora_model_name == "None":
@@ -102,7 +105,10 @@ def do_inference(
                     if output[-1] in [tokenizer.eos_token_id]:
                         break
-                    yield prompter.get_response(decoded_output)
             return  # early return for stream_output
         # Without streaming
@@ -116,7 +122,10 @@ def do_inference(
             )
         s = generation_output.sequences[0]
         output = tokenizer.decode(s)
-        yield prompter.get_response(output)
     except Exception as e:
         raise gr.Error(e)
@@ -249,11 +258,17 @@ def inference_ui():
                         elem_id="inference_max_new_tokens"
                     )
-                    stream_output = gr.Checkbox(
-                        label="Stream Output",
-                        elem_id="inference_stream_output",
-                        value=True
-                    )
                 with gr.Column():
                     with gr.Row():
@@ -267,6 +282,23 @@ def inference_ui():
                 inference_output = gr.Textbox(
                     lines=12, label="Output", elem_id="inference_output")
                 inference_output.style(show_copy_button=True)
         reload_selections_button.click(
             reload_selections,
@@ -291,8 +323,9 @@ def inference_ui():
                 repetition_penalty,
                 max_new_tokens,
                 stream_output,
             ],
-            outputs=inference_output,
             api_name="inference"
         )
         stop_btn.click(fn=None, inputs=None, outputs=None,

 device = get_device()
+default_show_raw = True
 def do_inference(
     lora_model_name,
     repetition_penalty=1.2,
     max_new_tokens=128,
     stream_output=False,
+    show_raw=False,
     progress=gr.Progress(track_tqdm=True),
 ):
     try:
             message = f"Currently in UI dev mode, not running actual inference.\n\nLoRA model: {lora_model_name}\n\nYour prompt is:\n\n{prompt}"
             print(message)
             time.sleep(1)
+            yield message, '[0]'
             return
         if lora_model_name == "None":
                     if output[-1] in [tokenizer.eos_token_id]:
                         break
+                    raw_output = None
+                    if show_raw:
+                        raw_output = str(output)
+                    yield prompter.get_response(decoded_output), raw_output
             return  # early return for stream_output
         # Without streaming
             )
         s = generation_output.sequences[0]
         output = tokenizer.decode(s)
+        raw_output = None
+        if show_raw:
+            raw_output = str(s)
+        yield prompter.get_response(output), raw_output
     except Exception as e:
         raise gr.Error(e)
                         elem_id="inference_max_new_tokens"
                     )
+                    with gr.Row():
+                        stream_output = gr.Checkbox(
+                            label="Stream Output",
+                            elem_id="inference_stream_output",
+                            value=True
+                        )
+                        show_raw = gr.Checkbox(
+                            label="Show Raw",
+                            elem_id="inference_show_raw",
+                            value=default_show_raw
+                        )
                 with gr.Column():
                     with gr.Row():
                 inference_output = gr.Textbox(
                     lines=12, label="Output", elem_id="inference_output")
                 inference_output.style(show_copy_button=True)
+                with gr.Accordion(
+                        "Raw Output",
+                        open=False,
+                        visible=default_show_raw,
+                        elem_id="inference_inference_raw_output_accordion"
+                ) as raw_output_group:
+                    inference_raw_output = gr.Code(
+                        label="Raw Output",
+                        show_label=False,
+                        language="json",
+                        interactive=False,
+                        elem_id="inference_raw_output")
+        show_raw.change(
+            fn=lambda show_raw: gr.Accordion.update(visible=show_raw),
+            inputs=[show_raw],
+            outputs=[raw_output_group])
         reload_selections_button.click(
             reload_selections,
                 repetition_penalty,
                 max_new_tokens,
                 stream_output,
+                show_raw,
             ],
+            outputs=[inference_output, inference_raw_output],
             api_name="inference"
         )
         stop_btn.click(fn=None, inputs=None, outputs=None,

llama_lora/ui/main_page.py CHANGED Viewed

@@ -5,6 +5,7 @@ from ..models import get_model_with_lora
 from .inference_ui import inference_ui
 from .finetune_ui import finetune_ui
 from .js_scripts import popperjs_core_code, tippy_js_code
@@ -25,6 +26,8 @@ def main_page():
                 inference_ui()
             with gr.Tab("Fine-tuning"):
                 finetune_ui()
             info = []
             if Global.version:
                 info.append(f"LLaMA-LoRA `{Global.version}`")
@@ -100,6 +103,10 @@ def main_page_custom_css():
         font-weight: 100;
     }
     .textbox_that_is_only_used_to_display_a_label {
         border: 0 !important;
         box-shadow: none !important;
@@ -143,7 +150,8 @@ def main_page_custom_css():
         box-shadow: none;
     }
-    #inference_output > .wrap {
         /* allow users to select text while generation is still in progress */
         pointer-events: none;
     }

 from .inference_ui import inference_ui
 from .finetune_ui import finetune_ui
+from .tokenizer_ui import tokenizer_ui
 from .js_scripts import popperjs_core_code, tippy_js_code
                 inference_ui()
             with gr.Tab("Fine-tuning"):
                 finetune_ui()
+            with gr.Tab("Tokenizer"):
+                tokenizer_ui()
             info = []
             if Global.version:
                 info.append(f"LLaMA-LoRA `{Global.version}`")
         font-weight: 100;
     }
+    .error-message, .error-message p {
+        color: var(--error-text-color) !important;
+    }
     .textbox_that_is_only_used_to_display_a_label {
         border: 0 !important;
         box-shadow: none !important;
         box-shadow: none;
     }
+    #inference_output > .wrap,
+    #inference_raw_output > .wrap {
         /* allow users to select text while generation is still in progress */
         pointer-events: none;
     }

llama_lora/ui/tokenizer_ui.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import gradio as gr
+import time
+import json
+from ..globals import Global
+from ..models import get_tokenizer
+def handle_decode(encoded_tokens_json):
+    try:
+        encoded_tokens = json.loads(encoded_tokens_json)
+        if Global.ui_dev_mode:
+            return f"Not actually decoding tokens in UI dev mode.", gr.Markdown.update("", visible=False)
+        tokenizer = get_tokenizer()
+        decoded_tokens = tokenizer.decode(encoded_tokens)
+        return decoded_tokens, gr.Markdown.update("", visible=False)
+    except Exception as e:
+        return "", gr.Markdown.update("Error: " + str(e), visible=True)
+def handle_encode(decoded_tokens):
+    try:
+        if Global.ui_dev_mode:
+            return f"[\"Not actually encoding tokens in UI dev mode.\"]", gr.Markdown.update("", visible=False)
+        tokenizer = get_tokenizer()
+        result = tokenizer(decoded_tokens)
+        encoded_tokens_json = json.dumps(result['input_ids'], indent=2)
+        return encoded_tokens_json, gr.Markdown.update("", visible=False)
+    except Exception as e:
+        return "", gr.Markdown.update("Error: " + str(e), visible=True)
+def tokenizer_ui():
+    with gr.Blocks() as tokenizer_ui_blocks:
+        with gr.Row():
+            with gr.Column():
+                encoded_tokens = gr.Code(
+                    label="Encoded Tokens (JSON)",
+                    language="json",
+                    value=sample_encoded_tokens_value,
+                    elem_id="tokenizer_encoded_tokens_input_textbox")
+                decode_btn = gr.Button("Decode ➡️")
+                encoded_tokens_error_message = gr.Markdown(
+                    "", visible=False, elem_classes="error-message")
+            with gr.Column():
+                decoded_tokens = gr.Code(
+                    label="Decoded Tokens",
+                    value=sample_decoded_text_value,
+                    elem_id="tokenizer_decoded_text_input_textbox")
+                encode_btn = gr.Button("⬅️ Encode")
+                decoded_tokens_error_message = gr.Markdown(
+                    "", visible=False, elem_classes="error-message")
+            stop_btn = gr.Button("Stop")
+            decoding = decode_btn.click(
+                fn=handle_decode,
+                inputs=[encoded_tokens],
+                outputs=[decoded_tokens, encoded_tokens_error_message],
+            )
+            encoding = encode_btn.click(
+                fn=handle_encode,
+                inputs=[decoded_tokens],
+                outputs=[encoded_tokens, decoded_tokens_error_message],
+            )
+            stop_btn.click(fn=None, inputs=None, outputs=None, cancels=[decoding, encoding])
+    tokenizer_ui_blocks.load(_js="""
+    function tokenizer_ui_blocks_js() {
+    }
+    """)
+sample_encoded_tokens_value = """
+[
+  15043,
+  3186,
+  29889
+]
+"""
+sample_decoded_text_value = """
+"""