zetavg commited on
Commit
80c2789
1 Parent(s): e9c5abc
llama_lora/ui/finetune_ui.py CHANGED
@@ -360,7 +360,7 @@ Train data (first 10):
360
 
361
  Global.should_stop_training = False
362
 
363
- return "Done. " + Global.train_fn(
364
  get_base_model(), # base_model
365
  get_tokenizer(), # tokenizer
366
  os.path.join(Global.data_dir, "lora_models",
@@ -535,13 +535,13 @@ def finetune_ui():
535
  )
536
 
537
  gradient_accumulation_steps = gr.Slider(
538
- minimum=1, maximum=10, step=1, value=32,
539
  label="Gradient Accumulation Steps",
540
  info="The number of steps to accumulate gradients before updating model parameters. This can be used to simulate a larger effective batch size without increasing memory usage."
541
  )
542
 
543
  epochs = gr.Slider(
544
- minimum=1, maximum=100, step=1, value=1,
545
  label="Epochs",
546
  info="The number of times to iterate over the entire training dataset. A larger number of epochs may improve model performance but also increase the risk of overfitting.")
547
 
 
360
 
361
  Global.should_stop_training = False
362
 
363
+ return Global.train_fn(
364
  get_base_model(), # base_model
365
  get_tokenizer(), # tokenizer
366
  os.path.join(Global.data_dir, "lora_models",
 
535
  )
536
 
537
  gradient_accumulation_steps = gr.Slider(
538
+ minimum=1, maximum=10, step=1, value=1,
539
  label="Gradient Accumulation Steps",
540
  info="The number of steps to accumulate gradients before updating model parameters. This can be used to simulate a larger effective batch size without increasing memory usage."
541
  )
542
 
543
  epochs = gr.Slider(
544
+ minimum=1, maximum=100, step=1, value=3,
545
  label="Epochs",
546
  info="The number of times to iterate over the entire training dataset. A larger number of epochs may improve model performance but also increase the risk of overfitting.")
547
 
llama_lora/ui/inference_ui.py CHANGED
@@ -326,122 +326,144 @@ def inference_ui():
326
  function inference_ui_blocks_js() {
327
  // Auto load options
328
  setTimeout(function () {
329
- document.getElementById("inference_reload_selections_button").click();
330
 
331
  // Workaround default value not shown.
332
- document.querySelector('#inference_lora_model input').value = "tloen/alpaca-lora-7b";
 
333
  }, 100);
334
 
335
  // Add tooltips
336
  setTimeout(function () {
337
-
338
- tippy("#inference_lora_model", {
339
  placement: 'bottom-start',
340
  delay: [500, 0],
341
  animation: 'scale-subtle',
342
- content: 'Select a LoRA model form your data directory, or type in a model name on HF (e.g.: <code>tloen/alpaca-lora-7b</code>).',
 
343
  allowHTML: true,
344
  });
345
 
346
- tippy("#inference_prompt_template", {
347
  placement: 'bottom-start',
348
  delay: [500, 0],
349
  animation: 'scale-subtle',
350
- content: 'Templates are loaded from the "templates" folder of your data directory. Be sure to select the template that matches your selected LoRA model to get the best results.',
 
351
  });
352
 
353
- tippy("#inference_reload_selections_button", {
354
  placement: 'bottom-end',
355
  delay: [500, 0],
356
  animation: 'scale-subtle',
357
  content: 'Press to reload LoRA Model and Prompt Template selections.',
358
  });
359
 
360
- document.querySelector('#inference_preview_prompt_container .label-wrap').addEventListener('click', function () {
361
- tippy("#inference_preview_prompt", {
 
 
 
 
 
 
 
 
 
 
 
362
  placement: 'right',
363
  delay: [500, 0],
364
  animation: 'scale-subtle',
365
- content: 'This is the prompt that will be sent to the language model.',
 
366
  });
367
- });
368
-
369
- tippy("#inference_temperature", {
370
- placement: 'right',
371
- delay: [500, 0],
372
- animation: 'scale-subtle',
373
- content: 'Controls randomness: Lowering results in less random completions. Higher values (e.g., 1.0) make the model generate more diverse and random outputs. As the temperature approaches zero, the model will become deterministic and repetitive.',
374
- });
375
 
376
- tippy("#inference_top_p", {
377
- placement: 'right',
378
- delay: [500, 0],
379
- animation: 'scale-subtle',
380
- content: 'Controls diversity via nucleus sampling: only the tokens whose cumulative probability exceeds "top_p" are considered. 0.5 means half of all likelihood-weighted options are considered.',
381
- });
382
-
383
- tippy("#inference_top_k", {
384
- placement: 'right',
385
- delay: [500, 0],
386
- animation: 'scale-subtle',
387
- content: 'Controls diversity of the generated text by only considering the "top_k" tokens with the highest probabilities. This method can lead to more focused and coherent outputs by reducing the impact of low probability tokens.',
388
- });
389
 
390
- tippy("#inference_beams", {
391
- placement: 'right',
392
- delay: [500, 0],
393
- animation: 'scale-subtle',
394
- content: 'Number of candidate sequences explored in parallel during text generation using beam search. A higher value increases the chances of finding high-quality, coherent output, but may slow down the generation process.',
395
- });
 
396
 
397
- tippy("#inference_repetition_penalty", {
398
- placement: 'right',
399
- delay: [500, 0],
400
- animation: 'scale-subtle',
401
- content: 'Applies a penalty to the probability of tokens that have already been generated, discouraging the model from repeating the same words or phrases. The penalty is applied by dividing the token probability by a factor based on the number of times the token has appeared in the generated text.',
402
- });
 
403
 
404
- tippy("#inference_max_new_tokens", {
405
- placement: 'right',
406
- delay: [500, 0],
407
- animation: 'scale-subtle',
408
- content: 'Limits the maximum number of tokens generated in a single iteration.',
409
- });
 
410
 
411
- tippy("#inference_stream_output", {
412
- placement: 'right',
413
- delay: [500, 0],
414
- animation: 'scale-subtle',
415
- content: 'When enabled, generated text will be displayed in real-time as it is being produced by the model, allowing you to observe the text generation process as it unfolds.',
416
- });
 
417
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
  }, 100);
419
 
420
  // Show/hide generate and stop button base on the state.
421
  setTimeout(function () {
422
  // Make the '#inference_output > .wrap' element appear
423
- document.getElementById("inference_stop_btn").click();
424
 
425
  setTimeout(function () {
426
  const output_wrap_element = document.querySelector(
427
- "#inference_output > .wrap"
428
  );
429
  function handle_output_wrap_element_class_change() {
430
- if (Array.from(output_wrap_element.classList).includes("hide")) {
431
- document.getElementById("inference_generate_btn").style.display =
432
- "block";
433
- document.getElementById("inference_stop_btn").style.display = "none";
434
  } else {
435
- document.getElementById("inference_generate_btn").style.display =
436
- "none";
437
- document.getElementById("inference_stop_btn").style.display = "block";
438
  }
439
  }
440
  new MutationObserver(function (mutationsList, observer) {
441
  handle_output_wrap_element_class_change();
442
  }).observe(output_wrap_element, {
443
  attributes: true,
444
- attributeFilter: ["class"],
445
  });
446
  handle_output_wrap_element_class_change();
447
  }, 500);
 
326
  function inference_ui_blocks_js() {
327
  // Auto load options
328
  setTimeout(function () {
329
+ document.getElementById('inference_reload_selections_button').click();
330
 
331
  // Workaround default value not shown.
332
+ document.querySelector('#inference_lora_model input').value =
333
+ 'tloen/alpaca-lora-7b';
334
  }, 100);
335
 
336
  // Add tooltips
337
  setTimeout(function () {
338
+ tippy('#inference_lora_model', {
 
339
  placement: 'bottom-start',
340
  delay: [500, 0],
341
  animation: 'scale-subtle',
342
+ content:
343
+ 'Select a LoRA model form your data directory, or type in a model name on HF (e.g.: <code>tloen/alpaca-lora-7b</code>).',
344
  allowHTML: true,
345
  });
346
 
347
+ tippy('#inference_prompt_template', {
348
  placement: 'bottom-start',
349
  delay: [500, 0],
350
  animation: 'scale-subtle',
351
+ content:
352
+ 'Templates are loaded from the "templates" folder of your data directory. Be sure to select the template that matches your selected LoRA model to get the best results.',
353
  });
354
 
355
+ tippy('#inference_reload_selections_button', {
356
  placement: 'bottom-end',
357
  delay: [500, 0],
358
  animation: 'scale-subtle',
359
  content: 'Press to reload LoRA Model and Prompt Template selections.',
360
  });
361
 
362
+ document
363
+ .querySelector('#inference_preview_prompt_container .label-wrap')
364
+ .addEventListener('click', function () {
365
+ tippy('#inference_preview_prompt', {
366
+ placement: 'right',
367
+ delay: [500, 0],
368
+ animation: 'scale-subtle',
369
+ content: 'This is the prompt that will be sent to the language model.',
370
+ });
371
+ });
372
+
373
+ function setTooltipForOptions() {
374
+ tippy('#inference_temperature', {
375
  placement: 'right',
376
  delay: [500, 0],
377
  animation: 'scale-subtle',
378
+ content:
379
+ 'Controls randomness: Lowering results in less random completions. Higher values (e.g., 1.0) make the model generate more diverse and random outputs. As the temperature approaches zero, the model will become deterministic and repetitive.',
380
  });
 
 
 
 
 
 
 
 
381
 
382
+ tippy('#inference_top_p', {
383
+ placement: 'right',
384
+ delay: [500, 0],
385
+ animation: 'scale-subtle',
386
+ content:
387
+ 'Controls diversity via nucleus sampling: only the tokens whose cumulative probability exceeds "top_p" are considered. 0.5 means half of all likelihood-weighted options are considered.',
388
+ });
 
 
 
 
 
 
389
 
390
+ tippy('#inference_top_k', {
391
+ placement: 'right',
392
+ delay: [500, 0],
393
+ animation: 'scale-subtle',
394
+ content:
395
+ 'Controls diversity of the generated text by only considering the "top_k" tokens with the highest probabilities. This method can lead to more focused and coherent outputs by reducing the impact of low probability tokens.',
396
+ });
397
 
398
+ tippy('#inference_beams', {
399
+ placement: 'right',
400
+ delay: [500, 0],
401
+ animation: 'scale-subtle',
402
+ content:
403
+ 'Number of candidate sequences explored in parallel during text generation using beam search. A higher value increases the chances of finding high-quality, coherent output, but may slow down the generation process.',
404
+ });
405
 
406
+ tippy('#inference_repetition_penalty', {
407
+ placement: 'right',
408
+ delay: [500, 0],
409
+ animation: 'scale-subtle',
410
+ content:
411
+ 'Applies a penalty to the probability of tokens that have already been generated, discouraging the model from repeating the same words or phrases. The penalty is applied by dividing the token probability by a factor based on the number of times the token has appeared in the generated text.',
412
+ });
413
 
414
+ tippy('#inference_max_new_tokens', {
415
+ placement: 'right',
416
+ delay: [500, 0],
417
+ animation: 'scale-subtle',
418
+ content:
419
+ 'Limits the maximum number of tokens generated in a single iteration.',
420
+ });
421
 
422
+ tippy('#inference_stream_output', {
423
+ placement: 'right',
424
+ delay: [500, 0],
425
+ animation: 'scale-subtle',
426
+ content:
427
+ 'When enabled, generated text will be displayed in real-time as it is being produced by the model, allowing you to observe the text generation process as it unfolds.',
428
+ });
429
+ }
430
+ setTooltipForOptions();
431
+
432
+ const inference_options_accordion_toggle = document.querySelector(
433
+ '#inference_options_accordion .label-wrap'
434
+ );
435
+ if (inference_options_accordion_toggle) {
436
+ inference_options_accordion_toggle.addEventListener('click', function () {
437
+ setTooltipForOptions();
438
+ });
439
+ }
440
  }, 100);
441
 
442
  // Show/hide generate and stop button base on the state.
443
  setTimeout(function () {
444
  // Make the '#inference_output > .wrap' element appear
445
+ document.getElementById('inference_stop_btn').click();
446
 
447
  setTimeout(function () {
448
  const output_wrap_element = document.querySelector(
449
+ '#inference_output > .wrap'
450
  );
451
  function handle_output_wrap_element_class_change() {
452
+ if (Array.from(output_wrap_element.classList).includes('hide')) {
453
+ document.getElementById('inference_generate_btn').style.display =
454
+ 'block';
455
+ document.getElementById('inference_stop_btn').style.display = 'none';
456
  } else {
457
+ document.getElementById('inference_generate_btn').style.display =
458
+ 'none';
459
+ document.getElementById('inference_stop_btn').style.display = 'block';
460
  }
461
  }
462
  new MutationObserver(function (mutationsList, observer) {
463
  handle_output_wrap_element_class_change();
464
  }).observe(output_wrap_element, {
465
  attributes: true,
466
+ attributeFilter: ['class'],
467
  });
468
  handle_output_wrap_element_class_change();
469
  }, 500);
llama_lora/ui/main_page.py CHANGED
@@ -182,6 +182,7 @@ def main_page_custom_css():
182
 
183
  #finetune_dataset_from_data_dir {
184
  border: 0;
 
185
  }
186
 
187
  @media screen and (min-width: 640px) {
 
182
 
183
  #finetune_dataset_from_data_dir {
184
  border: 0;
185
+ box-shadow: none;
186
  }
187
 
188
  @media screen and (min-width: 640px) {
templates/alpaca_sample.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "description": "Sample Alpaca-LoRA template which declares the variables used.",
3
  "variables": ["instruction", "input"],
 
4
  "prompt_with_instruction_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
5
  "prompt_with_instruction": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
6
  "response_split": "### Response:"
 
1
  {
2
  "description": "Sample Alpaca-LoRA template which declares the variables used.",
3
  "variables": ["instruction", "input"],
4
+ "default": "prompt_with_instruction",
5
  "prompt_with_instruction_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
6
  "prompt_with_instruction": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
7
  "response_split": "### Response:"