oceansweep's picture
Upload 127 files
1be405f verified
raw
history blame
14.5 kB
# Llamafile_tab.py
# Description: Gradio interface for configuring and launching Llamafile with Local LLMs
# Imports
import os
import logging
from typing import Tuple, Optional
import gradio as gr
from App_Function_Libraries.Local_LLM.Local_LLM_Inference_Engine_Lib import (
download_llm_model,
llm_models,
start_llamafile,
get_gguf_llamafile_files
)
#
#######################################################################################################################
#
# Functions:
def create_chat_with_llamafile_tab():
# Function to update model path based on selection
def on_local_model_change(selected_model: str, search_directory: str) -> str:
if selected_model and isinstance(search_directory, str):
model_path = os.path.abspath(os.path.join(search_directory, selected_model))
logging.debug(f"Selected model path: {model_path}") # Debug print for selected model path
return model_path
return "Invalid selection or directory."
# Function to update the dropdown with available models
def update_dropdowns(search_directory: str) -> Tuple[dict, str]:
logging.debug(f"User-entered directory: {search_directory}") # Debug print for directory
if not os.path.isdir(search_directory):
logging.debug(f"Directory does not exist: {search_directory}") # Debug print for non-existing directory
return gr.update(choices=[], value=None), "Directory does not exist."
logging.debug(f"Directory exists: {search_directory}, scanning for files...") # Confirm directory exists
model_files = get_gguf_llamafile_files(search_directory)
if not model_files:
logging.debug(f"No model files found in {search_directory}") # Debug print for no files found
return gr.update(choices=[], value=None), "No model files found in the specified directory."
# Update the dropdown choices with the model files found
logging.debug(f"Models loaded from {search_directory}: {model_files}") # Debug: Print model files loaded
return gr.update(choices=model_files, value=None), f"Models loaded from {search_directory}."
def download_preset_model(selected_model: str) -> Tuple[str, str]:
"""
Downloads the selected preset model.
Args:
selected_model (str): The key of the selected preset model.
Returns:
Tuple[str, str]: Status message and the path to the downloaded model.
"""
model_info = llm_models.get(selected_model)
if not model_info:
return "Invalid model selection.", ""
try:
model_path = download_llm_model(
model_name=model_info["name"],
model_url=model_info["url"],
model_filename=model_info["filename"],
model_hash=model_info["hash"]
)
return f"Model '{model_info['name']}' downloaded successfully.", model_path
except Exception as e:
logging.error(f"Error downloading model: {e}")
return f"Failed to download model: {e}", ""
with gr.TabItem("Local LLM with Llamafile"):
gr.Markdown("# Settings for Llamafile")
with gr.Row():
with gr.Column():
am_noob = gr.Checkbox(label="Enable Sane Defaults", value=False, visible=True)
advanced_mode_toggle = gr.Checkbox(label="Advanced Mode - Show All Settings", value=False)
# Advanced Inputs
verbose_checked = gr.Checkbox(label="Enable Verbose Output", value=False, visible=False)
threads_checked = gr.Checkbox(label="Set CPU Threads", value=False, visible=False)
threads_value = gr.Number(label="Number of CPU Threads", value=None, precision=0, visible=False)
threads_batched_checked = gr.Checkbox(label="Enable Batched Inference", value=False, visible=False)
threads_batched_value = gr.Number(label="Batch Size for Inference", value=None, precision=0, visible=False)
model_alias_checked = gr.Checkbox(label="Set Model Alias", value=False, visible=False)
model_alias_value = gr.Textbox(label="Model Alias", value="", visible=False)
ctx_size_checked = gr.Checkbox(label="Set Prompt Context Size", value=False, visible=False)
ctx_size_value = gr.Number(label="Prompt Context Size", value=8124, precision=0, visible=False)
ngl_checked = gr.Checkbox(label="Enable GPU Layers", value=False, visible=True)
ngl_value = gr.Number(label="Number of GPU Layers", value=None, precision=0, visible=True)
batch_size_checked = gr.Checkbox(label="Set Batch Size", value=False, visible=False)
batch_size_value = gr.Number(label="Batch Size", value=512, visible=False)
memory_f32_checked = gr.Checkbox(label="Use 32-bit Floating Point", value=False, visible=False)
numa_checked = gr.Checkbox(label="Enable NUMA", value=False, visible=False)
server_timeout_value = gr.Number(label="Server Timeout", value=600, precision=0, visible=False)
host_checked = gr.Checkbox(label="Set IP to Listen On", value=False, visible=False)
host_value = gr.Textbox(label="Host IP Address", value="", visible=False)
port_checked = gr.Checkbox(label="Set Server Port", value=False, visible=False)
port_value = gr.Number(label="Port Number", value=8080, precision=0, visible=False)
api_key_checked = gr.Checkbox(label="Set API Key", value=False, visible=False)
api_key_value = gr.Textbox(label="API Key", value="", visible=False)
http_threads_checked = gr.Checkbox(label="Set HTTP Server Threads", value=False, visible=False)
http_threads_value = gr.Number(label="Number of HTTP Server Threads", value=None, precision=0, visible=False)
hf_repo_checked = gr.Checkbox(label="Use Huggingface Repo Model", value=False, visible=False)
hf_repo_value = gr.Textbox(label="Huggingface Repo Name", value="", visible=False)
hf_file_checked = gr.Checkbox(label="Set Huggingface Model File", value=False, visible=False)
hf_file_value = gr.Textbox(label="Huggingface Model File", value="", visible=False)
with gr.Column():
# Model Selection Section
gr.Markdown("## Model Selection")
# Option 1: Select from Local Filesystem
with gr.Row():
search_directory = gr.Textbox(label="Model Directory",
placeholder="Enter directory path(currently '.\Models')",
value=".\Models",
interactive=True)
# Initial population of local models
initial_dropdown_update, _ = update_dropdowns(".\Models")
refresh_button = gr.Button("Refresh Models")
local_model_dropdown = gr.Dropdown(label="Select Model from Directory", choices=[])
# Display selected model path
model_value = gr.Textbox(label="Selected Model File Path", value="", interactive=False)
# Option 2: Download Preset Models
gr.Markdown("## Download Preset Models")
preset_model_dropdown = gr.Dropdown(
label="Select a Preset Model",
choices=list(llm_models.keys()),
value=None,
interactive=True,
info="Choose a preset model to download."
)
download_preset_button = gr.Button("Download Selected Preset")
with gr.Row():
with gr.Column():
start_button = gr.Button("Start Llamafile")
stop_button = gr.Button("Stop Llamafile (doesn't work)")
output_display = gr.Markdown()
# Show/hide advanced inputs based on toggle
def update_visibility(show_advanced: bool):
components = [
verbose_checked, threads_checked, threads_value,
http_threads_checked, http_threads_value,
hf_repo_checked, hf_repo_value,
hf_file_checked, hf_file_value,
ctx_size_checked, ctx_size_value,
ngl_checked, ngl_value,
host_checked, host_value,
port_checked, port_value
]
return [gr.update(visible=show_advanced) for _ in components]
def on_start_button_click(
am_noob: bool,
verbose_checked: bool,
threads_checked: bool,
threads_value: Optional[int],
threads_batched_checked: bool,
threads_batched_value: Optional[int],
model_alias_checked: bool,
model_alias_value: str,
http_threads_checked: bool,
http_threads_value: Optional[int],
model_value: str,
hf_repo_checked: bool,
hf_repo_value: str,
hf_file_checked: bool,
hf_file_value: str,
ctx_size_checked: bool,
ctx_size_value: Optional[int],
ngl_checked: bool,
ngl_value: Optional[int],
batch_size_checked: bool,
batch_size_value: Optional[int],
memory_f32_checked: bool,
numa_checked: bool,
server_timeout_value: Optional[int],
host_checked: bool,
host_value: str,
port_checked: bool,
port_value: Optional[int],
api_key_checked: bool,
api_key_value: str
) -> str:
"""
Event handler for the Start Llamafile button.
"""
try:
result = start_llamafile(
am_noob,
verbose_checked,
threads_checked,
threads_value,
threads_batched_checked,
threads_batched_value,
model_alias_checked,
model_alias_value,
http_threads_checked,
http_threads_value,
model_value,
hf_repo_checked,
hf_repo_value,
hf_file_checked,
hf_file_value,
ctx_size_checked,
ctx_size_value,
ngl_checked,
ngl_value,
batch_size_checked,
batch_size_value,
memory_f32_checked,
numa_checked,
server_timeout_value,
host_checked,
host_value,
port_checked,
port_value,
api_key_checked,
api_key_value
)
return result
except Exception as e:
logging.error(f"Error starting Llamafile: {e}")
return f"Failed to start Llamafile: {e}"
advanced_mode_toggle.change(
fn=update_visibility,
inputs=[advanced_mode_toggle],
outputs=[
verbose_checked, threads_checked, threads_value,
http_threads_checked, http_threads_value,
hf_repo_checked, hf_repo_value,
hf_file_checked, hf_file_value,
ctx_size_checked, ctx_size_value,
ngl_checked, ngl_value,
host_checked, host_value,
port_checked, port_value
]
)
start_button.click(
fn=on_start_button_click,
inputs=[
am_noob,
verbose_checked,
threads_checked,
threads_value,
threads_batched_checked,
threads_batched_value,
model_alias_checked,
model_alias_value,
http_threads_checked,
http_threads_value,
model_value,
hf_repo_checked,
hf_repo_value,
hf_file_checked,
hf_file_value,
ctx_size_checked,
ctx_size_value,
ngl_checked,
ngl_value,
batch_size_checked,
batch_size_value,
memory_f32_checked,
numa_checked,
server_timeout_value,
host_checked,
host_value,
port_checked,
port_value,
api_key_checked,
api_key_value
],
outputs=output_display
)
download_preset_button.click(
fn=download_preset_model,
inputs=[preset_model_dropdown],
outputs=[output_display, model_value]
)
# Click event for refreshing models
refresh_button.click(
fn=update_dropdowns,
inputs=[search_directory], # Ensure that the directory path (string) is passed
outputs=[local_model_dropdown, output_display] # Update dropdown and status
)
# Event to update model_value when a model is selected from the dropdown
local_model_dropdown.change(
fn=on_local_model_change, # Function that calculates the model path
inputs=[local_model_dropdown, search_directory], # Inputs: selected model and directory
outputs=[model_value] # Output: Update the model_value textbox with the selected model path
)
#
#
#######################################################################################################################