zetavg commited on
Commit
66c7018
1 Parent(s): d754e91
llama_lora/ui/inference_ui.py CHANGED
@@ -140,6 +140,16 @@ def handle_prompt_template_change(prompt_template):
140
  return gr_updates
141
 
142
 
 
 
 
 
 
 
 
 
 
 
143
  def inference_ui():
144
  with gr.Blocks() as inference_ui_blocks:
145
  with gr.Row():
@@ -154,7 +164,7 @@ def inference_ui():
154
  elem_id="inference_prompt_template",
155
  )
156
  reload_selections_button = gr.Button(
157
- "Reload",
158
  elem_id="inference_reload_selections_button"
159
  )
160
  reload_selections_button.style(
@@ -162,8 +172,8 @@ def inference_ui():
162
  size="sm")
163
  with gr.Row():
164
  with gr.Column():
165
- with gr.Column():
166
- variable_0 = gr.Textbox(lines=2, label="Prompt")
167
  variable_1 = gr.Textbox(lines=2, label="", visible=False)
168
  variable_2 = gr.Textbox(lines=2, label="", visible=False)
169
  variable_3 = gr.Textbox(lines=2, label="", visible=False)
@@ -172,6 +182,10 @@ def inference_ui():
172
  variable_6 = gr.Textbox(lines=2, label="", visible=False)
173
  variable_7 = gr.Textbox(lines=2, label="", visible=False)
174
 
 
 
 
 
175
  with gr.Column():
176
  with gr.Row():
177
  generate_btn = gr.Button(
@@ -224,7 +238,7 @@ def inference_ui():
224
  )
225
  with gr.Column():
226
  inference_output = gr.Textbox(
227
- lines=12, label="Output Text", elem_id="inference_output")
228
 
229
  reload_selections_button.click(
230
  reload_selections,
@@ -255,6 +269,34 @@ def inference_ui():
255
  stop_btn.click(fn=None, inputs=None, outputs=None,
256
  cancels=[generate_event])
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  inference_ui_blocks.load(_js="""
259
  function inference_ui_blocks_js() {
260
  // Auto load options
@@ -269,51 +311,75 @@ def inference_ui():
269
  setTimeout(function () {
270
 
271
  tippy("#inference_prompt_template", {
272
- placement: 'bottom-start',
273
- delay: [500, 0],
274
- content: 'Templates are loaded from the "templates" folder of your data directory. Be sure to select the template that matches your selected LoRA model to get the best results.',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  });
276
 
277
  tippy("#inference_temperature", {
278
- placement: 'right',
279
- delay: [500, 0],
280
- content: 'Controls randomness: Lowering results in less random completions. Higher values (e.g., 1.0) make the model generate more diverse and random outputs. As the temperature approaches zero, the model will become deterministic and repetitive.',
 
281
  });
282
 
283
  tippy("#inference_top_p", {
284
- placement: 'right',
285
- delay: [500, 0],
286
- content: 'Controls diversity via nucleus sampling: only the tokens whose cumulative probability exceeds "top_p" are considered. 0.5 means half of all likelihood-weighted options are considered.',
 
287
  });
288
 
289
  tippy("#inference_top_k", {
290
- placement: 'right',
291
- delay: [500, 0],
292
- content: 'Controls diversity of the generated text by only considering the "top_k" tokens with the highest probabilities. This method can lead to more focused and coherent outputs by reducing the impact of low probability tokens.',
 
293
  });
294
 
295
  tippy("#inference_beams", {
296
- placement: 'right',
297
- delay: [500, 0],
298
- content: 'Number of candidate sequences explored in parallel during text generation using beam search. A higher value increases the chances of finding high-quality, coherent output, but may slow down the generation process.',
 
299
  });
300
 
301
  tippy("#inference_repetition_penalty", {
302
- placement: 'right',
303
- delay: [500, 0],
304
- content: 'Applies a penalty to the probability of tokens that have already been generated, discouraging the model from repeating the same words or phrases. The penalty is applied by dividing the token probability by a factor based on the number of times the token has appeared in the generated text.',
 
305
  });
306
 
307
  tippy("#inference_max_new_tokens", {
308
- placement: 'right',
309
- delay: [500, 0],
310
- content: 'Limits the maximum number of tokens generated in a single iteration.',
 
311
  });
312
 
313
  tippy("#inference_stream_output", {
314
- placement: 'right',
315
- delay: [500, 0],
316
- content: 'When enabled, generated text will be displayed in real-time as it is being produced by the model, allowing you to observe the text generation process as it unfolds.',
 
317
  });
318
 
319
  }, 100);
 
140
  return gr_updates
141
 
142
 
143
+ def update_prompt_preview(prompt_template,
144
+ variable_0, variable_1, variable_2, variable_3,
145
+ variable_4, variable_5, variable_6, variable_7):
146
+ variables = [variable_0, variable_1, variable_2, variable_3,
147
+ variable_4, variable_5, variable_6, variable_7]
148
+ prompter = Prompter(prompt_template)
149
+ prompt = prompter.generate_prompt(variables)
150
+ return gr.Textbox.update(value=prompt)
151
+
152
+
153
  def inference_ui():
154
  with gr.Blocks() as inference_ui_blocks:
155
  with gr.Row():
 
164
  elem_id="inference_prompt_template",
165
  )
166
  reload_selections_button = gr.Button(
167
+ "",
168
  elem_id="inference_reload_selections_button"
169
  )
170
  reload_selections_button.style(
 
172
  size="sm")
173
  with gr.Row():
174
  with gr.Column():
175
+ with gr.Column(elem_id="inference_prompt_box"):
176
+ variable_0 = gr.Textbox(lines=2, label="Prompt", placeholder="Tell me about alpecas and llamas.")
177
  variable_1 = gr.Textbox(lines=2, label="", visible=False)
178
  variable_2 = gr.Textbox(lines=2, label="", visible=False)
179
  variable_3 = gr.Textbox(lines=2, label="", visible=False)
 
182
  variable_6 = gr.Textbox(lines=2, label="", visible=False)
183
  variable_7 = gr.Textbox(lines=2, label="", visible=False)
184
 
185
+ with gr.Accordion("Preview", open=False, elem_id="inference_preview_prompt_container"):
186
+ preview_prompt = gr.Textbox(
187
+ show_label=False, interactive=False, elem_id="inference_preview_prompt")
188
+
189
  with gr.Column():
190
  with gr.Row():
191
  generate_btn = gr.Button(
 
238
  )
239
  with gr.Column():
240
  inference_output = gr.Textbox(
241
+ lines=12, label="Output", elem_id="inference_output")
242
 
243
  reload_selections_button.click(
244
  reload_selections,
 
269
  stop_btn.click(fn=None, inputs=None, outputs=None,
270
  cancels=[generate_event])
271
 
272
+ prompt_template.change(fn=update_prompt_preview, inputs=[prompt_template,
273
+ variable_0, variable_1, variable_2, variable_3,
274
+ variable_4, variable_5, variable_6, variable_7,], outputs=preview_prompt)
275
+ variable_0.change(fn=update_prompt_preview, inputs=[prompt_template,
276
+ variable_0, variable_1, variable_2, variable_3,
277
+ variable_4, variable_5, variable_6, variable_7,], outputs=preview_prompt)
278
+ variable_1.change(fn=update_prompt_preview, inputs=[prompt_template,
279
+ variable_0, variable_1, variable_2, variable_3,
280
+ variable_4, variable_5, variable_6, variable_7,], outputs=preview_prompt)
281
+ variable_2.change(fn=update_prompt_preview, inputs=[prompt_template,
282
+ variable_0, variable_1, variable_2, variable_3,
283
+ variable_4, variable_5, variable_6, variable_7,], outputs=preview_prompt)
284
+ variable_3.change(fn=update_prompt_preview, inputs=[prompt_template,
285
+ variable_0, variable_1, variable_2, variable_3,
286
+ variable_4, variable_5, variable_6, variable_7,], outputs=preview_prompt)
287
+ variable_4.change(fn=update_prompt_preview, inputs=[prompt_template,
288
+ variable_0, variable_1, variable_2, variable_3,
289
+ variable_4, variable_5, variable_6, variable_7,], outputs=preview_prompt)
290
+ variable_5.change(fn=update_prompt_preview, inputs=[prompt_template,
291
+ variable_0, variable_1, variable_2, variable_3,
292
+ variable_4, variable_5, variable_6, variable_7,], outputs=preview_prompt)
293
+ variable_6.change(fn=update_prompt_preview, inputs=[prompt_template,
294
+ variable_0, variable_1, variable_2, variable_3,
295
+ variable_4, variable_5, variable_6, variable_7,], outputs=preview_prompt)
296
+ variable_7.change(fn=update_prompt_preview, inputs=[prompt_template,
297
+ variable_0, variable_1, variable_2, variable_3,
298
+ variable_4, variable_5, variable_6, variable_7,], outputs=preview_prompt)
299
+
300
  inference_ui_blocks.load(_js="""
301
  function inference_ui_blocks_js() {
302
  // Auto load options
 
311
  setTimeout(function () {
312
 
313
  tippy("#inference_prompt_template", {
314
+ placement: 'bottom-start',
315
+ delay: [500, 0],
316
+ animation: 'scale-subtle',
317
+ content: 'Templates are loaded from the "templates" folder of your data directory. Be sure to select the template that matches your selected LoRA model to get the best results.',
318
+ });
319
+
320
+ tippy("#inference_reload_selections_button", {
321
+ placement: 'bottom-end',
322
+ delay: [500, 0],
323
+ animation: 'scale-subtle',
324
+ content: 'Press to reload LoRA Model and Prompt Template selections.',
325
+ });
326
+
327
+ document.querySelector('#inference_preview_prompt_container .label-wrap').addEventListener('click', function () {
328
+ tippy("#inference_preview_prompt", {
329
+ placement: 'right',
330
+ delay: [500, 0],
331
+ animation: 'scale-subtle',
332
+ content: 'This is the input that will actually be sent to the language model.',
333
+ });
334
  });
335
 
336
  tippy("#inference_temperature", {
337
+ placement: 'right',
338
+ delay: [500, 0],
339
+ animation: 'scale-subtle',
340
+ content: 'Controls randomness: Lowering results in less random completions. Higher values (e.g., 1.0) make the model generate more diverse and random outputs. As the temperature approaches zero, the model will become deterministic and repetitive.',
341
  });
342
 
343
  tippy("#inference_top_p", {
344
+ placement: 'right',
345
+ delay: [500, 0],
346
+ animation: 'scale-subtle',
347
+ content: 'Controls diversity via nucleus sampling: only the tokens whose cumulative probability exceeds "top_p" are considered. 0.5 means half of all likelihood-weighted options are considered.',
348
  });
349
 
350
  tippy("#inference_top_k", {
351
+ placement: 'right',
352
+ delay: [500, 0],
353
+ animation: 'scale-subtle',
354
+ content: 'Controls diversity of the generated text by only considering the "top_k" tokens with the highest probabilities. This method can lead to more focused and coherent outputs by reducing the impact of low probability tokens.',
355
  });
356
 
357
  tippy("#inference_beams", {
358
+ placement: 'right',
359
+ delay: [500, 0],
360
+ animation: 'scale-subtle',
361
+ content: 'Number of candidate sequences explored in parallel during text generation using beam search. A higher value increases the chances of finding high-quality, coherent output, but may slow down the generation process.',
362
  });
363
 
364
  tippy("#inference_repetition_penalty", {
365
+ placement: 'right',
366
+ delay: [500, 0],
367
+ animation: 'scale-subtle',
368
+ content: 'Applies a penalty to the probability of tokens that have already been generated, discouraging the model from repeating the same words or phrases. The penalty is applied by dividing the token probability by a factor based on the number of times the token has appeared in the generated text.',
369
  });
370
 
371
  tippy("#inference_max_new_tokens", {
372
+ placement: 'right',
373
+ delay: [500, 0],
374
+ animation: 'scale-subtle',
375
+ content: 'Limits the maximum number of tokens generated in a single iteration.',
376
  });
377
 
378
  tippy("#inference_stream_output", {
379
+ placement: 'right',
380
+ delay: [500, 0],
381
+ animation: 'scale-subtle',
382
+ content: 'When enabled, generated text will be displayed in real-time as it is being produced by the model, allowing you to observe the text generation process as it unfolds.',
383
  });
384
 
385
  }, 100);
llama_lora/ui/main_page.py CHANGED
@@ -89,7 +89,27 @@ def main_page_custom_css():
89
  font-weight: 100;
90
  }
91
 
 
 
 
 
 
 
 
 
 
 
92
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  @media screen and (min-width: 640px) {
95
  #inference_lora_model {
@@ -104,14 +124,16 @@ def main_page_custom_css():
104
  border-bottom-left-radius: 0;
105
  border-left: 0;
106
 
107
- margin-right: -148px;
108
- padding-right: 148px;
109
  }
110
 
111
  #inference_reload_selections_button {
112
  margin: 16px;
113
- width: 100px;
114
- min-width: 100px;
 
 
115
  z-index: 1;
116
  }
117
  }
@@ -131,5 +153,7 @@ def main_page_custom_css():
131
  margin-top: -16px;
132
  }
133
  }
 
 
134
  """
135
  return css
 
89
  font-weight: 100;
90
  }
91
 
92
+ #inference_prompt_box > *:first-child {
93
+ border-bottom-left-radius: 0;
94
+ border-bottom-right-radius: 0;
95
+ }
96
+ #inference_prompt_box > *:last-child {
97
+ margin-top: -16px;
98
+ border-top: 0;
99
+ border-top-left-radius: 0;
100
+ border-top-right-radius: 0;
101
+ }
102
 
103
+ #inference_preview_prompt_container .label-wrap {
104
+ user-select: none;
105
+ }
106
+
107
+ #inference_preview_prompt {
108
+ padding: 0;
109
+ }
110
+ #inference_preview_prompt textarea {
111
+ border: 0;
112
+ }
113
 
114
  @media screen and (min-width: 640px) {
115
  #inference_lora_model {
 
124
  border-bottom-left-radius: 0;
125
  border-left: 0;
126
 
127
+ margin-right: -90px;
128
+ padding-right: 80px;
129
  }
130
 
131
  #inference_reload_selections_button {
132
  margin: 16px;
133
+ margin-bottom: auto;
134
+ height: 42px !important;
135
+ min-width: 42px !important;
136
+ width: 42px !important;
137
  z-index: 1;
138
  }
139
  }
 
153
  margin-top: -16px;
154
  }
155
  }
156
+
157
+ .tippy-box[data-animation=scale-subtle][data-placement^=top]{transform-origin:bottom}.tippy-box[data-animation=scale-subtle][data-placement^=bottom]{transform-origin:top}.tippy-box[data-animation=scale-subtle][data-placement^=left]{transform-origin:right}.tippy-box[data-animation=scale-subtle][data-placement^=right]{transform-origin:left}.tippy-box[data-animation=scale-subtle][data-state=hidden]{transform:scale(.8);opacity:0}
158
  """
159
  return css