Spaces:
Running
Running
# Llamafile_tab.py | |
# Description: Gradio interface for configuring and launching Llamafile with Local LLMs | |
# Imports | |
import os | |
import logging | |
from typing import Tuple, Optional | |
import gradio as gr | |
from App_Function_Libraries.Local_LLM.Local_LLM_Inference_Engine_Lib import ( | |
download_llm_model, | |
llm_models, | |
start_llamafile, | |
get_gguf_llamafile_files | |
) | |
# | |
####################################################################################################################### | |
# | |
# Functions: | |
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
MODELS_DIR = os.path.join(BASE_DIR, "Models") | |
def create_chat_with_llamafile_tab(): | |
# Function to update model path based on selection | |
def on_local_model_change(selected_model: str, search_directory: str) -> str: | |
if selected_model and isinstance(search_directory, str): | |
model_path = os.path.abspath(os.path.join(search_directory, selected_model)) | |
logging.debug(f"Selected model path: {model_path}") # Debug print for selected model path | |
return model_path | |
return "Invalid selection or directory." | |
# Function to update the dropdown with available models | |
def update_dropdowns(search_directory: str) -> Tuple[dict, str]: | |
logging.debug(f"User-entered directory: {search_directory}") # Debug print for directory | |
if not os.path.isdir(search_directory): | |
logging.debug(f"Directory does not exist: {search_directory}") # Debug print for non-existing directory | |
return gr.update(choices=[], value=None), "Directory does not exist." | |
try: | |
logging.debug(f"Directory exists: {search_directory}, scanning for files...") # Confirm directory exists | |
model_files = get_gguf_llamafile_files(search_directory) | |
logging.debug("Completed scanning for model files.") | |
except Exception as e: | |
logging.error(f"Error scanning directory: {e}") | |
return gr.update(choices=[], value=None), f"Error scanning directory: {e}" | |
if not model_files: | |
logging.debug(f"No model files found in {search_directory}") # Debug print for no files found | |
return gr.update(choices=[], value=None), "No model files found in the specified directory." | |
# Update the dropdown choices with the model files found | |
logging.debug(f"Models loaded from {search_directory}: {model_files}") # Debug: Print model files loaded | |
return gr.update(choices=model_files, value=None), f"Models loaded from {search_directory}." | |
def download_preset_model(selected_model: str) -> Tuple[str, str]: | |
""" | |
Downloads the selected preset model. | |
Args: | |
selected_model (str): The key of the selected preset model. | |
Returns: | |
Tuple[str, str]: Status message and the path to the downloaded model. | |
""" | |
model_info = llm_models.get(selected_model) | |
if not model_info: | |
return "Invalid model selection.", "" | |
try: | |
model_path = download_llm_model( | |
model_name=model_info["name"], | |
model_url=model_info["url"], | |
model_filename=model_info["filename"], | |
model_hash=model_info["hash"] | |
) | |
return f"Model '{model_info['name']}' downloaded successfully.", model_path | |
except Exception as e: | |
logging.error(f"Error downloading model: {e}") | |
return f"Failed to download model: {e}", "" | |
with gr.TabItem("Local LLM with Llamafile", visible=True): | |
gr.Markdown("# Settings for Llamafile") | |
with gr.Row(): | |
with gr.Column(): | |
am_noob = gr.Checkbox(label="Enable Sane Defaults", value=False, visible=True) | |
advanced_mode_toggle = gr.Checkbox(label="Advanced Mode - Show All Settings", value=False) | |
# Advanced Inputs | |
verbose_checked = gr.Checkbox(label="Enable Verbose Output", value=False, visible=False) | |
threads_checked = gr.Checkbox(label="Set CPU Threads", value=False, visible=False) | |
threads_value = gr.Number(label="Number of CPU Threads", value=None, precision=0, visible=False) | |
threads_batched_checked = gr.Checkbox(label="Enable Batched Inference", value=False, visible=False) | |
threads_batched_value = gr.Number(label="Batch Size for Inference", value=None, precision=0, visible=False) | |
model_alias_checked = gr.Checkbox(label="Set Model Alias", value=False, visible=False) | |
model_alias_value = gr.Textbox(label="Model Alias", value="", visible=False) | |
ctx_size_checked = gr.Checkbox(label="Set Prompt Context Size", value=False, visible=False) | |
ctx_size_value = gr.Number(label="Prompt Context Size", value=8124, precision=0, visible=False) | |
ngl_checked = gr.Checkbox(label="Enable GPU Layers", value=False, visible=True) | |
ngl_value = gr.Number(label="Number of GPU Layers", value=None, precision=0, visible=True) | |
batch_size_checked = gr.Checkbox(label="Set Batch Size", value=False, visible=False) | |
batch_size_value = gr.Number(label="Batch Size", value=512, visible=False) | |
memory_f32_checked = gr.Checkbox(label="Use 32-bit Floating Point", value=False, visible=False) | |
numa_checked = gr.Checkbox(label="Enable NUMA", value=False, visible=False) | |
server_timeout_value = gr.Number(label="Server Timeout", value=600, precision=0, visible=False) | |
host_checked = gr.Checkbox(label="Set IP to Listen On", value=False, visible=False) | |
host_value = gr.Textbox(label="Host IP Address", value="", visible=False) | |
port_checked = gr.Checkbox(label="Set Server Port", value=False, visible=False) | |
port_value = gr.Number(label="Port Number", value=8080, precision=0, visible=False) | |
api_key_checked = gr.Checkbox(label="Set API Key", value=False, visible=False) | |
api_key_value = gr.Textbox(label="API Key", value="", visible=False) | |
http_threads_checked = gr.Checkbox(label="Set HTTP Server Threads", value=False, visible=False) | |
http_threads_value = gr.Number(label="Number of HTTP Server Threads", value=None, precision=0, visible=False) | |
hf_repo_checked = gr.Checkbox(label="Use Huggingface Repo Model", value=False, visible=False) | |
hf_repo_value = gr.Textbox(label="Huggingface Repo Name", value="", visible=False) | |
hf_file_checked = gr.Checkbox(label="Set Huggingface Model File", value=False, visible=False) | |
hf_file_value = gr.Textbox(label="Huggingface Model File", value="", visible=False) | |
with gr.Column(): | |
# Model Selection Section | |
gr.Markdown("## Model Selection") | |
# Option 1: Select from Local Filesystem | |
with gr.Row(): | |
search_directory = gr.Textbox( | |
label="Model Directory", | |
placeholder="Enter directory path (currently './Models')", | |
value=MODELS_DIR, | |
interactive=True | |
) | |
# Initial population of local models | |
initial_dropdown_update, _ = update_dropdowns(MODELS_DIR) | |
logging.debug(f"Scanning directory: {MODELS_DIR}") | |
refresh_button = gr.Button("Refresh Models") | |
local_model_dropdown = gr.Dropdown( | |
label="Select Model from Directory", | |
choices=initial_dropdown_update["choices"], | |
value=None | |
) | |
# Display selected model path | |
model_value = gr.Textbox(label="Selected Model File Path", value="", interactive=False) | |
# Option 2: Download Preset Models | |
gr.Markdown("## Download Preset Models") | |
preset_model_dropdown = gr.Dropdown( | |
label="Select a Preset Model", | |
choices=list(llm_models.keys()), | |
value=None, | |
interactive=True, | |
info="Choose a preset model to download." | |
) | |
download_preset_button = gr.Button("Download Selected Preset") | |
with gr.Row(): | |
with gr.Column(): | |
start_button = gr.Button("Start Llamafile") | |
stop_button = gr.Button("Stop Llamafile (doesn't work)") | |
output_display = gr.Markdown() | |
# Show/hide advanced inputs based on toggle | |
def update_visibility(show_advanced: bool): | |
components = [ | |
verbose_checked, threads_checked, threads_value, | |
http_threads_checked, http_threads_value, | |
hf_repo_checked, hf_repo_value, | |
hf_file_checked, hf_file_value, | |
ctx_size_checked, ctx_size_value, | |
ngl_checked, ngl_value, | |
host_checked, host_value, | |
port_checked, port_value | |
] | |
return [gr.update(visible=show_advanced) for _ in components] | |
def on_start_button_click( | |
am_noob: bool, | |
verbose_checked: bool, | |
threads_checked: bool, | |
threads_value: Optional[int], | |
threads_batched_checked: bool, | |
threads_batched_value: Optional[int], | |
model_alias_checked: bool, | |
model_alias_value: str, | |
http_threads_checked: bool, | |
http_threads_value: Optional[int], | |
model_value: str, | |
hf_repo_checked: bool, | |
hf_repo_value: str, | |
hf_file_checked: bool, | |
hf_file_value: str, | |
ctx_size_checked: bool, | |
ctx_size_value: Optional[int], | |
ngl_checked: bool, | |
ngl_value: Optional[int], | |
batch_size_checked: bool, | |
batch_size_value: Optional[int], | |
memory_f32_checked: bool, | |
numa_checked: bool, | |
server_timeout_value: Optional[int], | |
host_checked: bool, | |
host_value: str, | |
port_checked: bool, | |
port_value: Optional[int], | |
api_key_checked: bool, | |
api_key_value: str | |
) -> str: | |
""" | |
Event handler for the Start Llamafile button. | |
""" | |
try: | |
result = start_llamafile( | |
am_noob, | |
verbose_checked, | |
threads_checked, | |
threads_value, | |
threads_batched_checked, | |
threads_batched_value, | |
model_alias_checked, | |
model_alias_value, | |
http_threads_checked, | |
http_threads_value, | |
model_value, | |
hf_repo_checked, | |
hf_repo_value, | |
hf_file_checked, | |
hf_file_value, | |
ctx_size_checked, | |
ctx_size_value, | |
ngl_checked, | |
ngl_value, | |
batch_size_checked, | |
batch_size_value, | |
memory_f32_checked, | |
numa_checked, | |
server_timeout_value, | |
host_checked, | |
host_value, | |
port_checked, | |
port_value, | |
api_key_checked, | |
api_key_value | |
) | |
return result | |
except Exception as e: | |
logging.error(f"Error starting Llamafile: {e}") | |
return f"Failed to start Llamafile: {e}" | |
advanced_mode_toggle.change( | |
fn=update_visibility, | |
inputs=[advanced_mode_toggle], | |
outputs=[ | |
verbose_checked, threads_checked, threads_value, | |
http_threads_checked, http_threads_value, | |
hf_repo_checked, hf_repo_value, | |
hf_file_checked, hf_file_value, | |
ctx_size_checked, ctx_size_value, | |
ngl_checked, ngl_value, | |
host_checked, host_value, | |
port_checked, port_value | |
] | |
) | |
start_button.click( | |
fn=on_start_button_click, | |
inputs=[ | |
am_noob, | |
verbose_checked, | |
threads_checked, | |
threads_value, | |
threads_batched_checked, | |
threads_batched_value, | |
model_alias_checked, | |
model_alias_value, | |
http_threads_checked, | |
http_threads_value, | |
model_value, | |
hf_repo_checked, | |
hf_repo_value, | |
hf_file_checked, | |
hf_file_value, | |
ctx_size_checked, | |
ctx_size_value, | |
ngl_checked, | |
ngl_value, | |
batch_size_checked, | |
batch_size_value, | |
memory_f32_checked, | |
numa_checked, | |
server_timeout_value, | |
host_checked, | |
host_value, | |
port_checked, | |
port_value, | |
api_key_checked, | |
api_key_value | |
], | |
outputs=output_display | |
) | |
download_preset_button.click( | |
fn=download_preset_model, | |
inputs=[preset_model_dropdown], | |
outputs=[output_display, model_value] | |
) | |
# Click event for refreshing models | |
refresh_button.click( | |
fn=update_dropdowns, | |
inputs=[search_directory], # Ensure that the directory path (string) is passed | |
outputs=[local_model_dropdown, output_display] # Update dropdown and status | |
) | |
# Event to update model_value when a model is selected from the dropdown | |
local_model_dropdown.change( | |
fn=on_local_model_change, # Function that calculates the model path | |
inputs=[local_model_dropdown, search_directory], # Inputs: selected model and directory | |
outputs=[model_value] # Output: Update the model_value textbox with the selected model path | |
) | |
# | |
# | |
####################################################################################################################### |