Spaces:
Running
Running
oceansweep
commited on
Commit
•
c5b0bb7
1
Parent(s):
34fa93e
Upload 169 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- App_Function_Libraries/Audio/Audio_Files.py +131 -226
- App_Function_Libraries/Audio/Audio_Transcription_Lib.py +1 -1
- App_Function_Libraries/Benchmarks_Evaluations/InfiniteBench/InifiniteBench/__pycache__/test_chat_API_Calls.cpython-312-pytest-7.2.1.pyc +0 -0
- App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py +1 -1
- App_Function_Libraries/Books/Book_Ingestion_Lib.py +226 -88
- App_Function_Libraries/Chat/Chat_Functions.py +453 -0
- App_Function_Libraries/Chat/__init__.py +0 -0
- App_Function_Libraries/Chunk_Lib.py +238 -60
- App_Function_Libraries/DB/Character_Chat_DB.py +1059 -701
- App_Function_Libraries/DB/DB_Backups.py +160 -0
- App_Function_Libraries/DB/DB_Manager.py +159 -54
- App_Function_Libraries/DB/Prompts_DB.py +626 -0
- App_Function_Libraries/DB/RAG_QA_Chat_DB.py +845 -54
- App_Function_Libraries/DB/SQLite_DB.py +139 -583
- App_Function_Libraries/Gradio_Related.py +600 -420
- App_Function_Libraries/Gradio_UI/Anki_tab.py +921 -0
- App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py +135 -43
- App_Function_Libraries/Gradio_UI/Backup_Functionality.py +2 -13
- App_Function_Libraries/Gradio_UI/Backup_RAG_Notes_Character_Chat_tab.py +195 -0
- App_Function_Libraries/Gradio_UI/Book_Ingestion_tab.py +84 -40
- App_Function_Libraries/Gradio_UI/Character_Chat_tab.py +281 -46
- App_Function_Libraries/Gradio_UI/Character_interaction_tab.py +34 -13
- App_Function_Libraries/Gradio_UI/Chat_ui.py +691 -366
- App_Function_Libraries/Gradio_UI/Embeddings_tab.py +281 -74
- App_Function_Libraries/Gradio_UI/Evaluations_Benchmarks_tab.py +17 -2
- App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py +140 -30
- App_Function_Libraries/Gradio_UI/Export_Functionality.py +747 -119
- App_Function_Libraries/Gradio_UI/Gradio_Shared.py +0 -5
- App_Function_Libraries/Gradio_UI/Import_Functionality.py +466 -17
- App_Function_Libraries/Gradio_UI/Keywords.py +301 -8
- App_Function_Libraries/Gradio_UI/Live_Recording.py +19 -0
- App_Function_Libraries/Gradio_UI/Llamafile_tab.py +327 -0
- App_Function_Libraries/Gradio_UI/Media_edit.py +218 -20
- App_Function_Libraries/Gradio_UI/Media_wiki_tab.py +7 -0
- App_Function_Libraries/Gradio_UI/Mind_Map_tab.py +128 -0
- App_Function_Libraries/Gradio_UI/PDF_ingestion_tab.py +286 -75
- App_Function_Libraries/Gradio_UI/Plaintext_tab_import.py +79 -75
- App_Function_Libraries/Gradio_UI/Podcast_tab.py +167 -52
- App_Function_Libraries/Gradio_UI/Prompt_Suggestion_tab.py +19 -6
- App_Function_Libraries/Gradio_UI/Prompts_tab.py +297 -0
- App_Function_Libraries/Gradio_UI/RAG_Chat_tab.py +18 -3
- App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py +447 -178
- App_Function_Libraries/Gradio_UI/Re_summarize_tab.py +60 -10
- App_Function_Libraries/Gradio_UI/Search_Tab.py +6 -6
- App_Function_Libraries/Gradio_UI/Semantic_Scholar_tab.py +184 -0
- App_Function_Libraries/Gradio_UI/Video_transcription_tab.py +149 -29
- App_Function_Libraries/Gradio_UI/View_DB_Items_tab.py +606 -121
- App_Function_Libraries/Gradio_UI/Website_scraping_tab.py +754 -554
- App_Function_Libraries/Gradio_UI/Workflows_tab.py +190 -0
- App_Function_Libraries/Gradio_UI/Writing_tab.py +76 -61
App_Function_Libraries/Audio/Audio_Files.py
CHANGED
@@ -117,16 +117,15 @@ def process_audio_files(audio_urls, audio_file, whisper_model, api_name, api_key
|
|
117 |
progress = []
|
118 |
all_transcriptions = []
|
119 |
all_summaries = []
|
120 |
-
#
|
|
|
121 |
def format_transcription_with_timestamps(segments):
|
122 |
if keep_timestamps:
|
123 |
formatted_segments = []
|
124 |
for segment in segments:
|
125 |
start = segment.get('Time_Start', 0)
|
126 |
end = segment.get('Time_End', 0)
|
127 |
-
text = segment.get('Text', '').strip()
|
128 |
-
|
129 |
-
# Add the formatted timestamp and text to the list, followed by a newline
|
130 |
formatted_segments.append(f"[{start:.2f}-{end:.2f}] {text}")
|
131 |
|
132 |
# Join the segments with a newline to ensure proper formatting
|
@@ -191,205 +190,64 @@ def process_audio_files(audio_urls, audio_file, whisper_model, api_name, api_key
|
|
191 |
'language': chunk_language
|
192 |
}
|
193 |
|
194 |
-
# Process
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
# Download and process audio file
|
201 |
-
audio_file_path = download_audio_file(url, use_cookies, cookies)
|
202 |
-
if not os.path.exists(audio_file_path):
|
203 |
-
update_progress(f"Downloaded file not found: {audio_file_path}")
|
204 |
-
failed_count += 1
|
205 |
-
log_counter(
|
206 |
-
metric_name="audio_files_failed_total",
|
207 |
-
labels={"whisper_model": whisper_model, "api_name": api_name},
|
208 |
-
value=1
|
209 |
-
)
|
210 |
-
continue
|
211 |
-
|
212 |
-
temp_files.append(audio_file_path)
|
213 |
-
update_progress("Audio file downloaded successfully.")
|
214 |
-
|
215 |
-
# Re-encode MP3 to fix potential issues
|
216 |
-
reencoded_mp3_path = reencode_mp3(audio_file_path)
|
217 |
-
if not os.path.exists(reencoded_mp3_path):
|
218 |
-
update_progress(f"Re-encoded file not found: {reencoded_mp3_path}")
|
219 |
-
failed_count += 1
|
220 |
-
log_counter(
|
221 |
-
metric_name="audio_files_failed_total",
|
222 |
-
labels={"whisper_model": whisper_model, "api_name": api_name},
|
223 |
-
value=1
|
224 |
-
)
|
225 |
-
continue
|
226 |
-
|
227 |
-
temp_files.append(reencoded_mp3_path)
|
228 |
-
|
229 |
-
# Convert re-encoded MP3 to WAV
|
230 |
-
wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
|
231 |
-
if not os.path.exists(wav_file_path):
|
232 |
-
update_progress(f"Converted WAV file not found: {wav_file_path}")
|
233 |
-
failed_count += 1
|
234 |
-
log_counter(
|
235 |
-
metric_name="audio_files_failed_total",
|
236 |
-
labels={"whisper_model": whisper_model, "api_name": api_name},
|
237 |
-
value=1
|
238 |
-
)
|
239 |
-
continue
|
240 |
-
|
241 |
-
temp_files.append(wav_file_path)
|
242 |
-
|
243 |
-
# Initialize transcription
|
244 |
-
transcription = ""
|
245 |
-
|
246 |
-
# Transcribe audio
|
247 |
-
if diarize:
|
248 |
-
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=True)
|
249 |
-
else:
|
250 |
-
segments = speech_to_text(wav_file_path, whisper_model=whisper_model)
|
251 |
-
|
252 |
-
# Handle segments nested under 'segments' key
|
253 |
-
if isinstance(segments, dict) and 'segments' in segments:
|
254 |
-
segments = segments['segments']
|
255 |
-
|
256 |
-
if isinstance(segments, list):
|
257 |
-
# Log first 5 segments for debugging
|
258 |
-
logging.debug(f"Segments before formatting: {segments[:5]}")
|
259 |
-
transcription = format_transcription_with_timestamps(segments)
|
260 |
-
logging.debug(f"Formatted transcription (first 500 chars): {transcription[:500]}")
|
261 |
-
update_progress("Audio transcribed successfully.")
|
262 |
-
else:
|
263 |
-
update_progress("Unexpected segments format received from speech_to_text.")
|
264 |
-
logging.error(f"Unexpected segments format: {segments}")
|
265 |
-
failed_count += 1
|
266 |
-
log_counter(
|
267 |
-
metric_name="audio_files_failed_total",
|
268 |
-
labels={"whisper_model": whisper_model, "api_name": api_name},
|
269 |
-
value=1
|
270 |
-
)
|
271 |
-
continue
|
272 |
-
|
273 |
-
if not transcription.strip():
|
274 |
-
update_progress("Transcription is empty.")
|
275 |
-
failed_count += 1
|
276 |
-
log_counter(
|
277 |
-
metric_name="audio_files_failed_total",
|
278 |
-
labels={"whisper_model": whisper_model, "api_name": api_name},
|
279 |
-
value=1
|
280 |
-
)
|
281 |
-
else:
|
282 |
-
# Apply chunking
|
283 |
-
chunked_text = improved_chunking_process(transcription, chunk_options)
|
284 |
-
|
285 |
-
# Summarize
|
286 |
-
logging.debug(f"Audio Transcription API Name: {api_name}")
|
287 |
-
if api_name:
|
288 |
-
try:
|
289 |
-
summary = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
|
290 |
-
update_progress("Audio summarized successfully.")
|
291 |
-
except Exception as e:
|
292 |
-
logging.error(f"Error during summarization: {str(e)}")
|
293 |
-
summary = "Summary generation failed"
|
294 |
-
failed_count += 1
|
295 |
-
log_counter(
|
296 |
-
metric_name="audio_files_failed_total",
|
297 |
-
labels={"whisper_model": whisper_model, "api_name": api_name},
|
298 |
-
value=1
|
299 |
-
)
|
300 |
-
else:
|
301 |
-
summary = "No summary available (API not provided)"
|
302 |
|
303 |
-
|
304 |
-
|
|
|
|
|
305 |
|
306 |
-
|
307 |
-
title = custom_title if custom_title else os.path.basename(wav_file_path)
|
308 |
-
|
309 |
-
# Add to database
|
310 |
-
add_media_with_keywords(
|
311 |
-
url=url,
|
312 |
-
title=title,
|
313 |
-
media_type='audio',
|
314 |
-
content=transcription,
|
315 |
-
keywords=custom_keywords,
|
316 |
-
prompt=custom_prompt_input,
|
317 |
-
summary=summary,
|
318 |
-
transcription_model=whisper_model,
|
319 |
-
author="Unknown",
|
320 |
-
ingestion_date=datetime.now().strftime('%Y-%m-%d')
|
321 |
-
)
|
322 |
-
update_progress("Audio file processed and added to database.")
|
323 |
-
processed_count += 1
|
324 |
-
log_counter(
|
325 |
-
metric_name="audio_files_processed_total",
|
326 |
-
labels={"whisper_model": whisper_model, "api_name": api_name},
|
327 |
-
value=1
|
328 |
-
)
|
329 |
-
|
330 |
-
# Process uploaded file if provided
|
331 |
-
if audio_file:
|
332 |
-
url = generate_unique_id()
|
333 |
-
if os.path.getsize(audio_file.name) > MAX_FILE_SIZE:
|
334 |
-
update_progress(
|
335 |
-
f"Uploaded file size exceeds the maximum limit of {MAX_FILE_SIZE / (1024 * 1024):.2f}MB. Skipping this file.")
|
336 |
-
else:
|
337 |
-
try:
|
338 |
-
# Re-encode MP3 to fix potential issues
|
339 |
-
reencoded_mp3_path = reencode_mp3(audio_file.name)
|
340 |
-
if not os.path.exists(reencoded_mp3_path):
|
341 |
-
update_progress(f"Re-encoded file not found: {reencoded_mp3_path}")
|
342 |
-
return update_progress("Processing failed: Re-encoded file not found"), "", ""
|
343 |
|
|
|
|
|
344 |
temp_files.append(reencoded_mp3_path)
|
345 |
|
346 |
-
# Convert re-encoded MP3 to WAV
|
347 |
wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
|
348 |
-
if not os.path.exists(wav_file_path):
|
349 |
-
update_progress(f"Converted WAV file not found: {wav_file_path}")
|
350 |
-
return update_progress("Processing failed: Converted WAV file not found"), "", ""
|
351 |
-
|
352 |
temp_files.append(wav_file_path)
|
353 |
|
354 |
-
#
|
355 |
-
|
356 |
-
|
357 |
-
if diarize:
|
358 |
-
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=True)
|
359 |
-
else:
|
360 |
-
segments = speech_to_text(wav_file_path, whisper_model=whisper_model)
|
361 |
|
362 |
-
# Handle segments
|
363 |
if isinstance(segments, dict) and 'segments' in segments:
|
364 |
segments = segments['segments']
|
365 |
|
366 |
-
if isinstance(segments, list):
|
367 |
-
|
368 |
-
else:
|
369 |
-
update_progress("Unexpected segments format received from speech_to_text.")
|
370 |
-
logging.error(f"Unexpected segments format: {segments}")
|
371 |
|
372 |
-
|
|
|
|
|
373 |
|
374 |
-
|
375 |
-
|
|
|
|
|
|
|
376 |
try:
|
377 |
-
|
|
|
|
|
|
|
378 |
update_progress("Audio summarized successfully.")
|
379 |
except Exception as e:
|
380 |
-
logging.error(f"
|
381 |
summary = "Summary generation failed"
|
382 |
-
else:
|
383 |
-
summary = "No summary available (API not provided)"
|
384 |
|
|
|
385 |
all_transcriptions.append(transcription)
|
386 |
all_summaries.append(summary)
|
387 |
|
388 |
-
#
|
389 |
title = custom_title if custom_title else os.path.basename(wav_file_path)
|
390 |
-
|
391 |
add_media_with_keywords(
|
392 |
-
url=
|
393 |
title=title,
|
394 |
media_type='audio',
|
395 |
content=transcription,
|
@@ -400,65 +258,112 @@ def process_audio_files(audio_urls, audio_file, whisper_model, api_name, api_key
|
|
400 |
author="Unknown",
|
401 |
ingestion_date=datetime.now().strftime('%Y-%m-%d')
|
402 |
)
|
403 |
-
|
404 |
processed_count += 1
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
value=1
|
409 |
-
)
|
410 |
except Exception as e:
|
411 |
-
update_progress(f"Error processing uploaded file: {str(e)}")
|
412 |
-
logging.error(f"Error processing uploaded file: {str(e)}")
|
413 |
failed_count += 1
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
value=1
|
418 |
-
)
|
419 |
-
return update_progress("Processing failed: Error processing uploaded file"), "", ""
|
420 |
-
# Final cleanup
|
421 |
-
if not keep_original:
|
422 |
-
cleanup_files()
|
423 |
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
labels={"whisper_model": whisper_model, "api_name": api_name}
|
431 |
-
)
|
432 |
|
433 |
-
|
434 |
-
|
435 |
-
metric_name="total_audio_files_processed",
|
436 |
-
labels={"whisper_model": whisper_model, "api_name": api_name},
|
437 |
-
value=processed_count
|
438 |
-
)
|
439 |
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
445 |
|
|
|
|
|
|
|
|
|
|
|
|
|
446 |
|
447 |
-
|
448 |
-
|
449 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
|
451 |
return final_progress, final_transcriptions, final_summaries
|
452 |
|
453 |
except Exception as e:
|
454 |
-
logging.error(f"Error
|
455 |
-
log_counter(
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
)
|
460 |
-
cleanup_files()
|
461 |
-
return update_progress(f"Processing failed: {str(e)}"), "", ""
|
462 |
|
463 |
|
464 |
def format_transcription_with_timestamps(segments, keep_timestamps):
|
|
|
117 |
progress = []
|
118 |
all_transcriptions = []
|
119 |
all_summaries = []
|
120 |
+
temp_files = [] # Keep track of temporary files
|
121 |
+
|
122 |
def format_transcription_with_timestamps(segments):
|
123 |
if keep_timestamps:
|
124 |
formatted_segments = []
|
125 |
for segment in segments:
|
126 |
start = segment.get('Time_Start', 0)
|
127 |
end = segment.get('Time_End', 0)
|
128 |
+
text = segment.get('Text', '').strip()
|
|
|
|
|
129 |
formatted_segments.append(f"[{start:.2f}-{end:.2f}] {text}")
|
130 |
|
131 |
# Join the segments with a newline to ensure proper formatting
|
|
|
190 |
'language': chunk_language
|
191 |
}
|
192 |
|
193 |
+
# Process URLs if provided
|
194 |
+
if audio_urls:
|
195 |
+
urls = [url.strip() for url in audio_urls.split('\n') if url.strip()]
|
196 |
+
for i, url in enumerate(urls):
|
197 |
+
try:
|
198 |
+
update_progress(f"Processing URL {i + 1}/{len(urls)}: {url}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
+
# Download and process audio file
|
201 |
+
audio_file_path = download_audio_file(url, use_cookies, cookies)
|
202 |
+
if not audio_file_path:
|
203 |
+
raise FileNotFoundError(f"Failed to download audio from URL: {url}")
|
204 |
|
205 |
+
temp_files.append(audio_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
|
207 |
+
# Process the audio file
|
208 |
+
reencoded_mp3_path = reencode_mp3(audio_file_path)
|
209 |
temp_files.append(reencoded_mp3_path)
|
210 |
|
|
|
211 |
wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
|
|
|
|
|
|
|
|
|
212 |
temp_files.append(wav_file_path)
|
213 |
|
214 |
+
# Transcribe audio
|
215 |
+
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=diarize)
|
|
|
|
|
|
|
|
|
|
|
216 |
|
217 |
+
# Handle segments format
|
218 |
if isinstance(segments, dict) and 'segments' in segments:
|
219 |
segments = segments['segments']
|
220 |
|
221 |
+
if not isinstance(segments, list):
|
222 |
+
raise ValueError("Unexpected segments format received from speech_to_text")
|
|
|
|
|
|
|
223 |
|
224 |
+
transcription = format_transcription_with_timestamps(segments)
|
225 |
+
if not transcription.strip():
|
226 |
+
raise ValueError("Empty transcription generated")
|
227 |
|
228 |
+
# Initialize summary with default value
|
229 |
+
summary = "No summary available"
|
230 |
+
|
231 |
+
# Attempt summarization if API is provided
|
232 |
+
if api_name and api_name.lower() != "none":
|
233 |
try:
|
234 |
+
chunked_text = improved_chunking_process(transcription, chunk_options)
|
235 |
+
summary_result = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
|
236 |
+
if summary_result:
|
237 |
+
summary = summary_result
|
238 |
update_progress("Audio summarized successfully.")
|
239 |
except Exception as e:
|
240 |
+
logging.error(f"Summarization failed: {str(e)}")
|
241 |
summary = "Summary generation failed"
|
|
|
|
|
242 |
|
243 |
+
# Add to results
|
244 |
all_transcriptions.append(transcription)
|
245 |
all_summaries.append(summary)
|
246 |
|
247 |
+
# Add to database
|
248 |
title = custom_title if custom_title else os.path.basename(wav_file_path)
|
|
|
249 |
add_media_with_keywords(
|
250 |
+
url=url,
|
251 |
title=title,
|
252 |
media_type='audio',
|
253 |
content=transcription,
|
|
|
258 |
author="Unknown",
|
259 |
ingestion_date=datetime.now().strftime('%Y-%m-%d')
|
260 |
)
|
261 |
+
|
262 |
processed_count += 1
|
263 |
+
update_progress(f"Successfully processed URL {i + 1}")
|
264 |
+
log_counter("audio_files_processed_total", 1, {"whisper_model": whisper_model, "api_name": api_name})
|
265 |
+
|
|
|
|
|
266 |
except Exception as e:
|
|
|
|
|
267 |
failed_count += 1
|
268 |
+
update_progress(f"Failed to process URL {i + 1}: {str(e)}")
|
269 |
+
log_counter("audio_files_failed_total", 1, {"whisper_model": whisper_model, "api_name": api_name})
|
270 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
|
272 |
+
# Process uploaded file if provided
|
273 |
+
if audio_file:
|
274 |
+
try:
|
275 |
+
update_progress("Processing uploaded file...")
|
276 |
+
if os.path.getsize(audio_file.name) > MAX_FILE_SIZE:
|
277 |
+
raise ValueError(f"File size exceeds maximum limit of {MAX_FILE_SIZE / (1024 * 1024):.2f}MB")
|
|
|
|
|
278 |
|
279 |
+
reencoded_mp3_path = reencode_mp3(audio_file.name)
|
280 |
+
temp_files.append(reencoded_mp3_path)
|
|
|
|
|
|
|
|
|
281 |
|
282 |
+
wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
|
283 |
+
temp_files.append(wav_file_path)
|
284 |
+
|
285 |
+
# Transcribe audio
|
286 |
+
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=diarize)
|
287 |
+
|
288 |
+
if isinstance(segments, dict) and 'segments' in segments:
|
289 |
+
segments = segments['segments']
|
290 |
+
|
291 |
+
if not isinstance(segments, list):
|
292 |
+
raise ValueError("Unexpected segments format received from speech_to_text")
|
293 |
|
294 |
+
transcription = format_transcription_with_timestamps(segments)
|
295 |
+
if not transcription.strip():
|
296 |
+
raise ValueError("Empty transcription generated")
|
297 |
+
|
298 |
+
# Initialize summary with default value
|
299 |
+
summary = "No summary available"
|
300 |
|
301 |
+
# Attempt summarization if API is provided
|
302 |
+
if api_name and api_name.lower() != "none":
|
303 |
+
try:
|
304 |
+
chunked_text = improved_chunking_process(transcription, chunk_options)
|
305 |
+
summary_result = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
|
306 |
+
if summary_result:
|
307 |
+
summary = summary_result
|
308 |
+
update_progress("Audio summarized successfully.")
|
309 |
+
except Exception as e:
|
310 |
+
logging.error(f"Summarization failed: {str(e)}")
|
311 |
+
summary = "Summary generation failed"
|
312 |
+
|
313 |
+
# Add to results
|
314 |
+
all_transcriptions.append(transcription)
|
315 |
+
all_summaries.append(summary)
|
316 |
+
|
317 |
+
# Add to database
|
318 |
+
title = custom_title if custom_title else os.path.basename(wav_file_path)
|
319 |
+
add_media_with_keywords(
|
320 |
+
url="Uploaded File",
|
321 |
+
title=title,
|
322 |
+
media_type='audio',
|
323 |
+
content=transcription,
|
324 |
+
keywords=custom_keywords,
|
325 |
+
prompt=custom_prompt_input,
|
326 |
+
summary=summary,
|
327 |
+
transcription_model=whisper_model,
|
328 |
+
author="Unknown",
|
329 |
+
ingestion_date=datetime.now().strftime('%Y-%m-%d')
|
330 |
+
)
|
331 |
+
|
332 |
+
processed_count += 1
|
333 |
+
update_progress("Successfully processed uploaded file")
|
334 |
+
log_counter("audio_files_processed_total", 1, {"whisper_model": whisper_model, "api_name": api_name})
|
335 |
+
|
336 |
+
except Exception as e:
|
337 |
+
failed_count += 1
|
338 |
+
update_progress(f"Failed to process uploaded file: {str(e)}")
|
339 |
+
log_counter("audio_files_failed_total", 1, {"whisper_model": whisper_model, "api_name": api_name})
|
340 |
+
|
341 |
+
# Cleanup temporary files
|
342 |
+
if not keep_original:
|
343 |
+
cleanup_files()
|
344 |
+
|
345 |
+
# Log processing metrics
|
346 |
+
processing_time = time.time() - start_time
|
347 |
+
log_histogram("audio_processing_time_seconds", processing_time,
|
348 |
+
{"whisper_model": whisper_model, "api_name": api_name})
|
349 |
+
log_counter("total_audio_files_processed", processed_count,
|
350 |
+
{"whisper_model": whisper_model, "api_name": api_name})
|
351 |
+
log_counter("total_audio_files_failed", failed_count,
|
352 |
+
{"whisper_model": whisper_model, "api_name": api_name})
|
353 |
+
|
354 |
+
# Prepare final output
|
355 |
+
final_progress = update_progress(f"Processing complete. Processed: {processed_count}, Failed: {failed_count}")
|
356 |
+
final_transcriptions = "\n\n".join(all_transcriptions) if all_transcriptions else "No transcriptions available"
|
357 |
+
final_summaries = "\n\n".join(all_summaries) if all_summaries else "No summaries available"
|
358 |
|
359 |
return final_progress, final_transcriptions, final_summaries
|
360 |
|
361 |
except Exception as e:
|
362 |
+
logging.error(f"Error in process_audio_files: {str(e)}")
|
363 |
+
log_counter("audio_files_failed_total", 1, {"whisper_model": whisper_model, "api_name": api_name})
|
364 |
+
if not keep_original:
|
365 |
+
cleanup_files()
|
366 |
+
return update_progress(f"Processing failed: {str(e)}"), "No transcriptions available", "No summaries available"
|
|
|
|
|
|
|
367 |
|
368 |
|
369 |
def format_transcription_with_timestamps(segments, keep_timestamps):
|
App_Function_Libraries/Audio/Audio_Transcription_Lib.py
CHANGED
@@ -332,4 +332,4 @@ def save_audio_temp(audio_data, sample_rate=16000):
|
|
332 |
|
333 |
#
|
334 |
#
|
335 |
-
#######################################################################################################################
|
|
|
332 |
|
333 |
#
|
334 |
#
|
335 |
+
#######################################################################################################################
|
App_Function_Libraries/Benchmarks_Evaluations/InfiniteBench/InifiniteBench/__pycache__/test_chat_API_Calls.cpython-312-pytest-7.2.1.pyc
ADDED
Binary file (7.15 kB). View file
|
|
App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py
CHANGED
@@ -24,7 +24,7 @@ from tenacity import (
|
|
24 |
wait_random_exponential,
|
25 |
)
|
26 |
|
27 |
-
from App_Function_Libraries.Chat import chat_api_call
|
28 |
|
29 |
#
|
30 |
#######################################################################################################################
|
|
|
24 |
wait_random_exponential,
|
25 |
)
|
26 |
|
27 |
+
from App_Function_Libraries.Chat.Chat_Functions import chat_api_call
|
28 |
|
29 |
#
|
30 |
#######################################################################################################################
|
App_Function_Libraries/Books/Book_Ingestion_Lib.py
CHANGED
@@ -18,6 +18,9 @@ import tempfile
|
|
18 |
import zipfile
|
19 |
from datetime import datetime
|
20 |
import logging
|
|
|
|
|
|
|
21 |
#
|
22 |
# External Imports
|
23 |
import ebooklib
|
@@ -241,109 +244,244 @@ def process_zip_file(zip_file,
|
|
241 |
return "\n".join(results)
|
242 |
|
243 |
|
244 |
-
def
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
system_prompt,
|
249 |
-
custom_prompt,
|
250 |
-
auto_summarize,
|
251 |
-
api_name,
|
252 |
-
api_key,
|
253 |
-
max_chunk_size,
|
254 |
-
chunk_overlap,
|
255 |
-
custom_chapter_pattern
|
256 |
-
):
|
257 |
try:
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
if isinstance(max_chunk_size, str):
|
262 |
-
max_chunk_size = int(max_chunk_size) if max_chunk_size.strip() else 4000
|
263 |
-
elif not isinstance(max_chunk_size, int):
|
264 |
-
max_chunk_size = 4000 # Default value if not a string or int
|
265 |
-
|
266 |
-
# Handle chunk_overlap
|
267 |
-
if isinstance(chunk_overlap, str):
|
268 |
-
chunk_overlap = int(chunk_overlap) if chunk_overlap.strip() else 0
|
269 |
-
elif not isinstance(chunk_overlap, int):
|
270 |
-
chunk_overlap = 0 # Default value if not a string or int
|
271 |
-
|
272 |
-
chunk_options = {
|
273 |
-
'method': 'chapter',
|
274 |
-
'max_size': max_chunk_size,
|
275 |
-
'overlap': chunk_overlap,
|
276 |
-
'custom_chapter_pattern': custom_chapter_pattern if custom_chapter_pattern else None
|
277 |
-
}
|
278 |
|
279 |
-
|
280 |
-
|
281 |
-
return "No file uploaded."
|
282 |
|
283 |
-
|
284 |
-
if not os.path.exists(file_path):
|
285 |
-
log_counter("file_import_error", labels={"error": "File not found", "file_name": file.name})
|
286 |
-
return "Uploaded file not found."
|
287 |
|
288 |
-
|
|
|
|
|
|
|
|
|
289 |
|
290 |
-
|
291 |
-
status = import_epub(
|
292 |
-
file_path,
|
293 |
-
title,
|
294 |
-
author,
|
295 |
-
keywords,
|
296 |
-
custom_prompt=custom_prompt,
|
297 |
-
system_prompt=system_prompt,
|
298 |
-
summary=None,
|
299 |
-
auto_summarize=auto_summarize,
|
300 |
-
api_name=api_name,
|
301 |
-
api_key=api_key,
|
302 |
-
chunk_options=chunk_options,
|
303 |
-
custom_chapter_pattern=custom_chapter_pattern
|
304 |
-
)
|
305 |
-
log_counter("epub_import_success", labels={"file_name": file.name})
|
306 |
-
result = f"📚 EPUB Imported Successfully:\n{status}"
|
307 |
-
elif file.name.lower().endswith('.zip'):
|
308 |
-
status = process_zip_file(
|
309 |
-
zip_file=file,
|
310 |
-
title=title,
|
311 |
-
author=author,
|
312 |
-
keywords=keywords,
|
313 |
-
custom_prompt=custom_prompt,
|
314 |
-
system_prompt=system_prompt,
|
315 |
-
summary=None,
|
316 |
-
auto_summarize=auto_summarize,
|
317 |
-
api_name=api_name,
|
318 |
-
api_key=api_key,
|
319 |
-
chunk_options=chunk_options
|
320 |
-
)
|
321 |
-
log_counter("zip_import_success", labels={"file_name": file.name})
|
322 |
-
result = f"📦 ZIP Processed Successfully:\n{status}"
|
323 |
-
elif file.name.lower().endswith(('.chm', '.html', '.pdf', '.xml', '.opml')):
|
324 |
-
file_type = file.name.split('.')[-1].upper()
|
325 |
-
log_counter("unsupported_file_type", labels={"file_type": file_type})
|
326 |
-
result = f"{file_type} file import is not yet supported."
|
327 |
-
else:
|
328 |
-
log_counter("unsupported_file_type", labels={"file_type": file.name.split('.')[-1]})
|
329 |
-
result = "❌ Unsupported file type. Please upload an `.epub` file or a `.zip` file containing `.epub` files."
|
330 |
|
331 |
-
|
332 |
-
|
333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
return result
|
336 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
except ValueError as ve:
|
338 |
logging.exception(f"Error parsing input values: {str(ve)}")
|
339 |
-
log_counter("file_import_error", labels={"error": "Invalid input", "file_name": file.name})
|
340 |
return f"❌ Error: Invalid input for chunk size or overlap. Please enter valid numbers."
|
341 |
except Exception as e:
|
342 |
logging.exception(f"Error during file import: {str(e)}")
|
343 |
-
log_counter("file_import_error", labels={"error": str(e), "file_name": file.name})
|
344 |
return f"❌ Error during import: {str(e)}"
|
345 |
|
346 |
|
|
|
347 |
def read_epub(file_path):
|
348 |
"""
|
349 |
Reads and extracts text from an EPUB file.
|
@@ -424,9 +562,9 @@ def ingest_text_file(file_path, title=None, author=None, keywords=None):
|
|
424 |
|
425 |
# Add the text file to the database
|
426 |
add_media_with_keywords(
|
427 |
-
url=
|
428 |
title=title,
|
429 |
-
media_type='
|
430 |
content=content,
|
431 |
keywords=keywords,
|
432 |
prompt='No prompt for text files',
|
|
|
18 |
import zipfile
|
19 |
from datetime import datetime
|
20 |
import logging
|
21 |
+
import xml.etree.ElementTree as ET
|
22 |
+
import html2text
|
23 |
+
import csv
|
24 |
#
|
25 |
# External Imports
|
26 |
import ebooklib
|
|
|
244 |
return "\n".join(results)
|
245 |
|
246 |
|
247 |
+
def import_html(file_path, title=None, author=None, keywords=None, **kwargs):
|
248 |
+
"""
|
249 |
+
Imports an HTML file and converts it to markdown format.
|
250 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
try:
|
252 |
+
logging.info(f"Importing HTML file from {file_path}")
|
253 |
+
h = html2text.HTML2Text()
|
254 |
+
h.ignore_links = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
|
256 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
257 |
+
html_content = file.read()
|
|
|
258 |
|
259 |
+
markdown_content = h.handle(html_content)
|
|
|
|
|
|
|
260 |
|
261 |
+
# Extract title from HTML if not provided
|
262 |
+
if not title:
|
263 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
264 |
+
title_tag = soup.find('title')
|
265 |
+
title = title_tag.string if title_tag else os.path.basename(file_path)
|
266 |
|
267 |
+
return process_markdown_content(markdown_content, file_path, title, author, keywords, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
+
except Exception as e:
|
270 |
+
logging.exception(f"Error importing HTML file: {str(e)}")
|
271 |
+
raise
|
272 |
+
|
273 |
+
|
274 |
+
def import_xml(file_path, title=None, author=None, keywords=None, **kwargs):
|
275 |
+
"""
|
276 |
+
Imports an XML file and converts it to markdown format.
|
277 |
+
"""
|
278 |
+
try:
|
279 |
+
logging.info(f"Importing XML file from {file_path}")
|
280 |
+
tree = ET.parse(file_path)
|
281 |
+
root = tree.getroot()
|
282 |
+
|
283 |
+
# Convert XML to markdown
|
284 |
+
markdown_content = xml_to_markdown(root)
|
285 |
+
|
286 |
+
return process_markdown_content(markdown_content, file_path, title, author, keywords, **kwargs)
|
287 |
+
|
288 |
+
except Exception as e:
|
289 |
+
logging.exception(f"Error importing XML file: {str(e)}")
|
290 |
+
raise
|
291 |
+
|
292 |
+
|
293 |
+
def import_opml(file_path, title=None, author=None, keywords=None, **kwargs):
|
294 |
+
"""
|
295 |
+
Imports an OPML file and converts it to markdown format.
|
296 |
+
"""
|
297 |
+
try:
|
298 |
+
logging.info(f"Importing OPML file from {file_path}")
|
299 |
+
tree = ET.parse(file_path)
|
300 |
+
root = tree.getroot()
|
301 |
+
|
302 |
+
# Extract title from OPML if not provided
|
303 |
+
if not title:
|
304 |
+
title_elem = root.find(".//title")
|
305 |
+
title = title_elem.text if title_elem is not None else os.path.basename(file_path)
|
306 |
+
|
307 |
+
# Convert OPML to markdown
|
308 |
+
markdown_content = opml_to_markdown(root)
|
309 |
+
|
310 |
+
return process_markdown_content(markdown_content, file_path, title, author, keywords, **kwargs)
|
311 |
+
|
312 |
+
except Exception as e:
|
313 |
+
logging.exception(f"Error importing OPML file: {str(e)}")
|
314 |
+
raise
|
315 |
+
|
316 |
+
|
317 |
+
def xml_to_markdown(element, level=0):
|
318 |
+
"""
|
319 |
+
Recursively converts XML elements to markdown format.
|
320 |
+
"""
|
321 |
+
markdown = ""
|
322 |
+
|
323 |
+
# Add element name as heading
|
324 |
+
if level > 0:
|
325 |
+
markdown += f"{'#' * min(level, 6)} {element.tag}\n\n"
|
326 |
+
|
327 |
+
# Add element text if it exists
|
328 |
+
if element.text and element.text.strip():
|
329 |
+
markdown += f"{element.text.strip()}\n\n"
|
330 |
+
|
331 |
+
# Process child elements
|
332 |
+
for child in element:
|
333 |
+
markdown += xml_to_markdown(child, level + 1)
|
334 |
+
|
335 |
+
return markdown
|
336 |
|
337 |
+
|
338 |
+
def opml_to_markdown(root):
|
339 |
+
"""
|
340 |
+
Converts OPML structure to markdown format.
|
341 |
+
"""
|
342 |
+
markdown = "# Table of Contents\n\n"
|
343 |
+
|
344 |
+
def process_outline(outline, level=0):
|
345 |
+
result = ""
|
346 |
+
for item in outline.findall("outline"):
|
347 |
+
text = item.get("text", "")
|
348 |
+
result += f"{' ' * level}- {text}\n"
|
349 |
+
result += process_outline(item, level + 1)
|
350 |
return result
|
351 |
|
352 |
+
body = root.find(".//body")
|
353 |
+
if body is not None:
|
354 |
+
markdown += process_outline(body)
|
355 |
+
|
356 |
+
return markdown
|
357 |
+
|
358 |
+
|
359 |
+
def process_markdown_content(markdown_content, file_path, title, author, keywords, **kwargs):
|
360 |
+
"""
|
361 |
+
Processes markdown content and adds it to the database.
|
362 |
+
"""
|
363 |
+
info_dict = {
|
364 |
+
'title': title or os.path.basename(file_path),
|
365 |
+
'uploader': author or "Unknown",
|
366 |
+
'ingestion_date': datetime.now().strftime('%Y-%m-%d')
|
367 |
+
}
|
368 |
+
|
369 |
+
# Create segments (you may want to adjust the chunking method)
|
370 |
+
segments = [{'Text': markdown_content}]
|
371 |
+
|
372 |
+
# Add to database
|
373 |
+
result = add_media_to_database(
|
374 |
+
url=file_path,
|
375 |
+
info_dict=info_dict,
|
376 |
+
segments=segments,
|
377 |
+
summary=kwargs.get('summary', "No summary provided"),
|
378 |
+
keywords=keywords.split(',') if keywords else [],
|
379 |
+
custom_prompt_input=kwargs.get('custom_prompt'),
|
380 |
+
whisper_model="Imported",
|
381 |
+
media_type="document",
|
382 |
+
overwrite=False
|
383 |
+
)
|
384 |
+
|
385 |
+
return f"Document '{title}' imported successfully. Database result: {result}"
|
386 |
+
|
387 |
+
|
388 |
+
def import_file_handler(files,
|
389 |
+
author,
|
390 |
+
keywords,
|
391 |
+
system_prompt,
|
392 |
+
custom_prompt,
|
393 |
+
auto_summarize,
|
394 |
+
api_name,
|
395 |
+
api_key,
|
396 |
+
max_chunk_size,
|
397 |
+
chunk_overlap,
|
398 |
+
custom_chapter_pattern):
|
399 |
+
try:
|
400 |
+
if not files:
|
401 |
+
return "No files uploaded."
|
402 |
+
|
403 |
+
# Convert single file to list for consistent processing
|
404 |
+
if not isinstance(files, list):
|
405 |
+
files = [files]
|
406 |
+
|
407 |
+
results = []
|
408 |
+
for file in files:
|
409 |
+
log_counter("file_import_attempt", labels={"file_name": file.name})
|
410 |
+
|
411 |
+
# Handle max_chunk_size and chunk_overlap
|
412 |
+
chunk_size = int(max_chunk_size) if isinstance(max_chunk_size, (str, int)) else 4000
|
413 |
+
overlap = int(chunk_overlap) if isinstance(chunk_overlap, (str, int)) else 0
|
414 |
+
|
415 |
+
chunk_options = {
|
416 |
+
'method': 'chapter',
|
417 |
+
'max_size': chunk_size,
|
418 |
+
'overlap': overlap,
|
419 |
+
'custom_chapter_pattern': custom_chapter_pattern if custom_chapter_pattern else None
|
420 |
+
}
|
421 |
+
|
422 |
+
file_path = file.name
|
423 |
+
if not os.path.exists(file_path):
|
424 |
+
results.append(f"❌ File not found: {file.name}")
|
425 |
+
continue
|
426 |
+
|
427 |
+
start_time = datetime.now()
|
428 |
+
|
429 |
+
# Extract title from filename
|
430 |
+
title = os.path.splitext(os.path.basename(file_path))[0]
|
431 |
+
|
432 |
+
if file_path.lower().endswith('.epub'):
|
433 |
+
status = import_epub(
|
434 |
+
file_path,
|
435 |
+
title=title, # Use filename as title
|
436 |
+
author=author,
|
437 |
+
keywords=keywords,
|
438 |
+
custom_prompt=custom_prompt,
|
439 |
+
system_prompt=system_prompt,
|
440 |
+
summary=None,
|
441 |
+
auto_summarize=auto_summarize,
|
442 |
+
api_name=api_name,
|
443 |
+
api_key=api_key,
|
444 |
+
chunk_options=chunk_options,
|
445 |
+
custom_chapter_pattern=custom_chapter_pattern
|
446 |
+
)
|
447 |
+
log_counter("epub_import_success", labels={"file_name": file.name})
|
448 |
+
results.append(f"📚 {file.name}: {status}")
|
449 |
+
|
450 |
+
elif file_path.lower().endswith('.zip'):
|
451 |
+
status = process_zip_file(
|
452 |
+
zip_file=file,
|
453 |
+
title=None, # Let each file use its own name
|
454 |
+
author=author,
|
455 |
+
keywords=keywords,
|
456 |
+
custom_prompt=custom_prompt,
|
457 |
+
system_prompt=system_prompt,
|
458 |
+
summary=None,
|
459 |
+
auto_summarize=auto_summarize,
|
460 |
+
api_name=api_name,
|
461 |
+
api_key=api_key,
|
462 |
+
chunk_options=chunk_options
|
463 |
+
)
|
464 |
+
log_counter("zip_import_success", labels={"file_name": file.name})
|
465 |
+
results.append(f"📦 {file.name}: {status}")
|
466 |
+
else:
|
467 |
+
results.append(f"❌ Unsupported file type: {file.name}")
|
468 |
+
continue
|
469 |
+
|
470 |
+
end_time = datetime.now()
|
471 |
+
processing_time = (end_time - start_time).total_seconds()
|
472 |
+
log_histogram("file_import_duration", processing_time, labels={"file_name": file.name})
|
473 |
+
|
474 |
+
return "\n\n".join(results)
|
475 |
+
|
476 |
except ValueError as ve:
|
477 |
logging.exception(f"Error parsing input values: {str(ve)}")
|
|
|
478 |
return f"❌ Error: Invalid input for chunk size or overlap. Please enter valid numbers."
|
479 |
except Exception as e:
|
480 |
logging.exception(f"Error during file import: {str(e)}")
|
|
|
481 |
return f"❌ Error during import: {str(e)}"
|
482 |
|
483 |
|
484 |
+
|
485 |
def read_epub(file_path):
|
486 |
"""
|
487 |
Reads and extracts text from an EPUB file.
|
|
|
562 |
|
563 |
# Add the text file to the database
|
564 |
add_media_with_keywords(
|
565 |
+
url="its_a_book",
|
566 |
title=title,
|
567 |
+
media_type='book',
|
568 |
content=content,
|
569 |
keywords=keywords,
|
570 |
prompt='No prompt for text files',
|
App_Function_Libraries/Chat/Chat_Functions.py
ADDED
@@ -0,0 +1,453 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Chat_Functions.py
|
2 |
+
# Chat functions for interacting with the LLMs as chatbots
|
3 |
+
import base64
|
4 |
+
# Imports
|
5 |
+
import json
|
6 |
+
import logging
|
7 |
+
import os
|
8 |
+
import re
|
9 |
+
import sqlite3
|
10 |
+
import tempfile
|
11 |
+
import time
|
12 |
+
from datetime import datetime
|
13 |
+
from pathlib import Path
|
14 |
+
#
|
15 |
+
# External Imports
|
16 |
+
#
|
17 |
+
# Local Imports
|
18 |
+
from App_Function_Libraries.DB.DB_Manager import start_new_conversation, delete_messages_in_conversation, save_message
|
19 |
+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_db_connection, get_conversation_name
|
20 |
+
from App_Function_Libraries.LLM_API_Calls import chat_with_openai, chat_with_anthropic, chat_with_cohere, \
|
21 |
+
chat_with_groq, chat_with_openrouter, chat_with_deepseek, chat_with_mistral, chat_with_huggingface
|
22 |
+
from App_Function_Libraries.LLM_API_Calls_Local import chat_with_aphrodite, chat_with_local_llm, chat_with_ollama, \
|
23 |
+
chat_with_kobold, chat_with_llama, chat_with_oobabooga, chat_with_tabbyapi, chat_with_vllm, chat_with_custom_openai
|
24 |
+
from App_Function_Libraries.DB.SQLite_DB import load_media_content
|
25 |
+
from App_Function_Libraries.Utils.Utils import generate_unique_filename, load_and_log_configs
|
26 |
+
from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
|
27 |
+
#
|
28 |
+
####################################################################################################
|
29 |
+
#
|
30 |
+
# Functions:
|
31 |
+
|
32 |
+
def approximate_token_count(history):
|
33 |
+
total_text = ''
|
34 |
+
for user_msg, bot_msg in history:
|
35 |
+
if user_msg:
|
36 |
+
total_text += user_msg + ' '
|
37 |
+
if bot_msg:
|
38 |
+
total_text += bot_msg + ' '
|
39 |
+
total_tokens = len(total_text.split())
|
40 |
+
return total_tokens
|
41 |
+
|
42 |
+
def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message=None):
|
43 |
+
log_counter("chat_api_call_attempt", labels={"api_endpoint": api_endpoint})
|
44 |
+
start_time = time.time()
|
45 |
+
if not api_key:
|
46 |
+
api_key = None
|
47 |
+
model = None
|
48 |
+
try:
|
49 |
+
logging.info(f"Debug - Chat API Call - API Endpoint: {api_endpoint}")
|
50 |
+
logging.info(f"Debug - Chat API Call - API Key: {api_key}")
|
51 |
+
logging.info(f"Debug - Chat chat_api_call - API Endpoint: {api_endpoint}")
|
52 |
+
if api_endpoint.lower() == 'openai':
|
53 |
+
response = chat_with_openai(api_key, input_data, prompt, temp, system_message)
|
54 |
+
|
55 |
+
elif api_endpoint.lower() == 'anthropic':
|
56 |
+
# Retrieve the model from config
|
57 |
+
loaded_config_data = load_and_log_configs()
|
58 |
+
model = loaded_config_data['models']['anthropic'] if loaded_config_data else None
|
59 |
+
response = chat_with_anthropic(
|
60 |
+
api_key=api_key,
|
61 |
+
input_data=input_data,
|
62 |
+
model=model,
|
63 |
+
custom_prompt_arg=prompt,
|
64 |
+
system_prompt=system_message
|
65 |
+
)
|
66 |
+
|
67 |
+
elif api_endpoint.lower() == "cohere":
|
68 |
+
response = chat_with_cohere(
|
69 |
+
api_key,
|
70 |
+
input_data,
|
71 |
+
model=model,
|
72 |
+
custom_prompt_arg=prompt,
|
73 |
+
system_prompt=system_message,
|
74 |
+
temp=temp
|
75 |
+
)
|
76 |
+
|
77 |
+
elif api_endpoint.lower() == "groq":
|
78 |
+
response = chat_with_groq(api_key, input_data, prompt, temp, system_message)
|
79 |
+
|
80 |
+
elif api_endpoint.lower() == "openrouter":
|
81 |
+
response = chat_with_openrouter(api_key, input_data, prompt, temp, system_message)
|
82 |
+
|
83 |
+
elif api_endpoint.lower() == "deepseek":
|
84 |
+
response = chat_with_deepseek(api_key, input_data, prompt, temp, system_message)
|
85 |
+
|
86 |
+
elif api_endpoint.lower() == "mistral":
|
87 |
+
response = chat_with_mistral(api_key, input_data, prompt, temp, system_message)
|
88 |
+
|
89 |
+
elif api_endpoint.lower() == "llama.cpp":
|
90 |
+
response = chat_with_llama(input_data, prompt, temp, None, api_key, system_message)
|
91 |
+
elif api_endpoint.lower() == "kobold":
|
92 |
+
response = chat_with_kobold(input_data, api_key, prompt, temp, system_message)
|
93 |
+
|
94 |
+
elif api_endpoint.lower() == "ooba":
|
95 |
+
response = chat_with_oobabooga(input_data, api_key, prompt, temp, system_message)
|
96 |
+
|
97 |
+
elif api_endpoint.lower() == "tabbyapi":
|
98 |
+
response = chat_with_tabbyapi(input_data, prompt, temp, system_message)
|
99 |
+
|
100 |
+
elif api_endpoint.lower() == "vllm":
|
101 |
+
response = chat_with_vllm(input_data, prompt, system_message)
|
102 |
+
|
103 |
+
elif api_endpoint.lower() == "local-llm":
|
104 |
+
response = chat_with_local_llm(input_data, prompt, temp, system_message)
|
105 |
+
|
106 |
+
elif api_endpoint.lower() == "huggingface":
|
107 |
+
response = chat_with_huggingface(api_key, input_data, prompt, temp) # , system_message)
|
108 |
+
|
109 |
+
elif api_endpoint.lower() == "ollama":
|
110 |
+
response = chat_with_ollama(input_data, prompt, None, api_key, temp, system_message)
|
111 |
+
|
112 |
+
elif api_endpoint.lower() == "aphrodite":
|
113 |
+
response = chat_with_aphrodite(input_data, prompt, temp, system_message)
|
114 |
+
|
115 |
+
elif api_endpoint.lower() == "custom-openai-api":
|
116 |
+
response = chat_with_custom_openai(api_key, input_data, prompt, temp, system_message)
|
117 |
+
|
118 |
+
else:
|
119 |
+
raise ValueError(f"Unsupported API endpoint: {api_endpoint}")
|
120 |
+
|
121 |
+
call_duration = time.time() - start_time
|
122 |
+
log_histogram("chat_api_call_duration", call_duration, labels={"api_endpoint": api_endpoint})
|
123 |
+
log_counter("chat_api_call_success", labels={"api_endpoint": api_endpoint})
|
124 |
+
return response
|
125 |
+
|
126 |
+
except Exception as e:
|
127 |
+
log_counter("chat_api_call_error", labels={"api_endpoint": api_endpoint, "error": str(e)})
|
128 |
+
logging.error(f"Error in chat function: {str(e)}")
|
129 |
+
return f"An error occurred: {str(e)}"
|
130 |
+
|
131 |
+
|
132 |
+
def chat(message, history, media_content, selected_parts, api_endpoint, api_key, prompt, temperature,
|
133 |
+
system_message=None):
|
134 |
+
log_counter("chat_attempt", labels={"api_endpoint": api_endpoint})
|
135 |
+
start_time = time.time()
|
136 |
+
try:
|
137 |
+
logging.info(f"Debug - Chat Function - Message: {message}")
|
138 |
+
logging.info(f"Debug - Chat Function - Media Content: {media_content}")
|
139 |
+
logging.info(f"Debug - Chat Function - Selected Parts: {selected_parts}")
|
140 |
+
logging.info(f"Debug - Chat Function - API Endpoint: {api_endpoint}")
|
141 |
+
# logging.info(f"Debug - Chat Function - Prompt: {prompt}")
|
142 |
+
|
143 |
+
# Ensure selected_parts is a list
|
144 |
+
if not isinstance(selected_parts, (list, tuple)):
|
145 |
+
selected_parts = [selected_parts] if selected_parts else []
|
146 |
+
|
147 |
+
# logging.debug(f"Debug - Chat Function - Selected Parts (after check): {selected_parts}")
|
148 |
+
|
149 |
+
# Combine the selected parts of the media content
|
150 |
+
combined_content = "\n\n".join(
|
151 |
+
[f"{part.capitalize()}: {media_content.get(part, '')}" for part in selected_parts if part in media_content])
|
152 |
+
# Print first 500 chars
|
153 |
+
# logging.debug(f"Debug - Chat Function - Combined Content: {combined_content[:500]}...")
|
154 |
+
|
155 |
+
# Prepare the input for the API
|
156 |
+
input_data = f"{combined_content}\n\n" if combined_content else ""
|
157 |
+
for old_message, old_response in history:
|
158 |
+
input_data += f"{old_message}\nAssistant: {old_response}\n\n"
|
159 |
+
input_data += f"{message}\n"
|
160 |
+
|
161 |
+
if system_message:
|
162 |
+
print(f"System message: {system_message}")
|
163 |
+
logging.debug(f"Debug - Chat Function - System Message: {system_message}")
|
164 |
+
temperature = float(temperature) if temperature else 0.7
|
165 |
+
temp = temperature
|
166 |
+
|
167 |
+
logging.debug(f"Debug - Chat Function - Temperature: {temperature}")
|
168 |
+
logging.debug(f"Debug - Chat Function - API Key: {api_key[:10]}")
|
169 |
+
logging.debug(f"Debug - Chat Function - Prompt: {prompt}")
|
170 |
+
|
171 |
+
# Use the existing API request code based on the selected endpoint
|
172 |
+
response = chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message)
|
173 |
+
|
174 |
+
chat_duration = time.time() - start_time
|
175 |
+
log_histogram("chat_duration", chat_duration, labels={"api_endpoint": api_endpoint})
|
176 |
+
log_counter("chat_success", labels={"api_endpoint": api_endpoint})
|
177 |
+
return response
|
178 |
+
except Exception as e:
|
179 |
+
log_counter("chat_error", labels={"api_endpoint": api_endpoint, "error": str(e)})
|
180 |
+
logging.error(f"Error in chat function: {str(e)}")
|
181 |
+
return f"An error occurred: {str(e)}"
|
182 |
+
|
183 |
+
|
184 |
+
def save_chat_history_to_db_wrapper(chatbot, conversation_id, media_content, media_name=None):
|
185 |
+
log_counter("save_chat_history_to_db_attempt")
|
186 |
+
start_time = time.time()
|
187 |
+
logging.info(f"Attempting to save chat history. Media content type: {type(media_content)}")
|
188 |
+
|
189 |
+
try:
|
190 |
+
# First check if we can access the database
|
191 |
+
try:
|
192 |
+
with get_db_connection() as conn:
|
193 |
+
cursor = conn.cursor()
|
194 |
+
cursor.execute("SELECT 1")
|
195 |
+
except sqlite3.DatabaseError as db_error:
|
196 |
+
logging.error(f"Database is corrupted or inaccessible: {str(db_error)}")
|
197 |
+
return conversation_id, "Database error: The database file appears to be corrupted. Please contact support."
|
198 |
+
|
199 |
+
# Now attempt the save
|
200 |
+
if not conversation_id:
|
201 |
+
# Only for new conversations, not updates
|
202 |
+
media_id = None
|
203 |
+
if isinstance(media_content, dict) and 'content' in media_content:
|
204 |
+
try:
|
205 |
+
content = media_content['content']
|
206 |
+
content_json = content if isinstance(content, dict) else json.loads(content)
|
207 |
+
media_id = content_json.get('webpage_url')
|
208 |
+
media_name = media_name or content_json.get('title', 'Unnamed Media')
|
209 |
+
except (json.JSONDecodeError, AttributeError) as e:
|
210 |
+
logging.error(f"Error processing media content: {str(e)}")
|
211 |
+
media_id = "unknown_media"
|
212 |
+
media_name = media_name or "Unnamed Media"
|
213 |
+
else:
|
214 |
+
media_id = "unknown_media"
|
215 |
+
media_name = media_name or "Unnamed Media"
|
216 |
+
|
217 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
218 |
+
conversation_title = f"{media_name}_{timestamp}"
|
219 |
+
conversation_id = start_new_conversation(title=conversation_title, media_id=media_id)
|
220 |
+
logging.info(f"Created new conversation with ID: {conversation_id}")
|
221 |
+
|
222 |
+
# For both new and existing conversations
|
223 |
+
try:
|
224 |
+
delete_messages_in_conversation(conversation_id)
|
225 |
+
for user_msg, assistant_msg in chatbot:
|
226 |
+
if user_msg:
|
227 |
+
save_message(conversation_id, "user", user_msg)
|
228 |
+
if assistant_msg:
|
229 |
+
save_message(conversation_id, "assistant", assistant_msg)
|
230 |
+
except sqlite3.DatabaseError as db_error:
|
231 |
+
logging.error(f"Database error during message save: {str(db_error)}")
|
232 |
+
return conversation_id, "Database error: Unable to save messages. Please try again or contact support."
|
233 |
+
|
234 |
+
save_duration = time.time() - start_time
|
235 |
+
log_histogram("save_chat_history_to_db_duration", save_duration)
|
236 |
+
log_counter("save_chat_history_to_db_success")
|
237 |
+
|
238 |
+
return conversation_id, "Chat history saved successfully!"
|
239 |
+
|
240 |
+
except Exception as e:
|
241 |
+
log_counter("save_chat_history_to_db_error", labels={"error": str(e)})
|
242 |
+
error_message = f"Failed to save chat history: {str(e)}"
|
243 |
+
logging.error(error_message, exc_info=True)
|
244 |
+
return conversation_id, error_message
|
245 |
+
|
246 |
+
|
247 |
+
def save_chat_history(history, conversation_id, media_content):
|
248 |
+
log_counter("save_chat_history_attempt")
|
249 |
+
start_time = time.time()
|
250 |
+
try:
|
251 |
+
content, conversation_name = generate_chat_history_content(history, conversation_id, media_content)
|
252 |
+
|
253 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
254 |
+
safe_conversation_name = re.sub(r'[^a-zA-Z0-9_-]', '_', conversation_name)
|
255 |
+
base_filename = f"{safe_conversation_name}_{timestamp}.json"
|
256 |
+
|
257 |
+
# Create a temporary file
|
258 |
+
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file:
|
259 |
+
temp_file.write(content)
|
260 |
+
temp_file_path = temp_file.name
|
261 |
+
|
262 |
+
# Generate a unique filename
|
263 |
+
unique_filename = generate_unique_filename(os.path.dirname(temp_file_path), base_filename)
|
264 |
+
final_path = os.path.join(os.path.dirname(temp_file_path), unique_filename)
|
265 |
+
|
266 |
+
# Rename the temporary file to the unique filename
|
267 |
+
os.rename(temp_file_path, final_path)
|
268 |
+
|
269 |
+
save_duration = time.time() - start_time
|
270 |
+
log_histogram("save_chat_history_duration", save_duration)
|
271 |
+
log_counter("save_chat_history_success")
|
272 |
+
return final_path
|
273 |
+
except Exception as e:
|
274 |
+
log_counter("save_chat_history_error", labels={"error": str(e)})
|
275 |
+
logging.error(f"Error saving chat history: {str(e)}")
|
276 |
+
return None
|
277 |
+
|
278 |
+
|
279 |
+
def generate_chat_history_content(history, conversation_id, media_content):
|
280 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
281 |
+
|
282 |
+
conversation_name = get_conversation_name(conversation_id)
|
283 |
+
|
284 |
+
if not conversation_name:
|
285 |
+
media_name = extract_media_name(media_content)
|
286 |
+
if media_name:
|
287 |
+
conversation_name = f"{media_name}-chat"
|
288 |
+
else:
|
289 |
+
conversation_name = f"chat-{timestamp}" # Fallback name
|
290 |
+
|
291 |
+
chat_data = {
|
292 |
+
"conversation_id": conversation_id,
|
293 |
+
"conversation_name": conversation_name,
|
294 |
+
"timestamp": timestamp,
|
295 |
+
"history": [
|
296 |
+
{
|
297 |
+
"role": "user" if i % 2 == 0 else "bot",
|
298 |
+
"content": msg[0] if isinstance(msg, tuple) else msg
|
299 |
+
}
|
300 |
+
for i, msg in enumerate(history)
|
301 |
+
]
|
302 |
+
}
|
303 |
+
|
304 |
+
return json.dumps(chat_data, indent=2), conversation_name
|
305 |
+
|
306 |
+
|
307 |
+
def extract_media_name(media_content):
|
308 |
+
if isinstance(media_content, dict):
|
309 |
+
content = media_content.get('content', {})
|
310 |
+
if isinstance(content, str):
|
311 |
+
try:
|
312 |
+
content = json.loads(content)
|
313 |
+
except json.JSONDecodeError:
|
314 |
+
logging.warning("Failed to parse media_content JSON string")
|
315 |
+
return None
|
316 |
+
|
317 |
+
# Try to extract title from the content
|
318 |
+
if isinstance(content, dict):
|
319 |
+
return content.get('title') or content.get('name')
|
320 |
+
|
321 |
+
logging.warning(f"Unexpected media_content format: {type(media_content)}")
|
322 |
+
return None
|
323 |
+
|
324 |
+
|
325 |
+
def update_chat_content(selected_item, use_content, use_summary, use_prompt, item_mapping):
|
326 |
+
log_counter("update_chat_content_attempt")
|
327 |
+
start_time = time.time()
|
328 |
+
logging.debug(f"Debug - Update Chat Content - Selected Item: {selected_item}\n")
|
329 |
+
logging.debug(f"Debug - Update Chat Content - Use Content: {use_content}\n\n\n\n")
|
330 |
+
logging.debug(f"Debug - Update Chat Content - Use Summary: {use_summary}\n\n")
|
331 |
+
logging.debug(f"Debug - Update Chat Content - Use Prompt: {use_prompt}\n\n")
|
332 |
+
logging.debug(f"Debug - Update Chat Content - Item Mapping: {item_mapping}\n\n")
|
333 |
+
|
334 |
+
if selected_item and selected_item in item_mapping:
|
335 |
+
media_id = item_mapping[selected_item]
|
336 |
+
content = load_media_content(media_id)
|
337 |
+
selected_parts = []
|
338 |
+
if use_content and "content" in content:
|
339 |
+
selected_parts.append("content")
|
340 |
+
if use_summary and "summary" in content:
|
341 |
+
selected_parts.append("summary")
|
342 |
+
if use_prompt and "prompt" in content:
|
343 |
+
selected_parts.append("prompt")
|
344 |
+
|
345 |
+
# Modified debug print
|
346 |
+
if isinstance(content, dict):
|
347 |
+
print(f"Debug - Update Chat Content - Content keys: {list(content.keys())}")
|
348 |
+
for key, value in content.items():
|
349 |
+
print(f"Debug - Update Chat Content - {key} (first 500 char): {str(value)[:500]}\n\n\n\n")
|
350 |
+
else:
|
351 |
+
print(f"Debug - Update Chat Content - Content(first 500 char): {str(content)[:500]}\n\n\n\n")
|
352 |
+
|
353 |
+
print(f"Debug - Update Chat Content - Selected Parts: {selected_parts}")
|
354 |
+
update_duration = time.time() - start_time
|
355 |
+
log_histogram("update_chat_content_duration", update_duration)
|
356 |
+
log_counter("update_chat_content_success")
|
357 |
+
return content, selected_parts
|
358 |
+
else:
|
359 |
+
log_counter("update_chat_content_error", labels={"error": str("No item selected or item not in mapping")})
|
360 |
+
print(f"Debug - Update Chat Content - No item selected or item not in mapping")
|
361 |
+
return {}, []
|
362 |
+
|
363 |
+
#
|
364 |
+
# End of Chat functions
|
365 |
+
#######################################################################################################################
|
366 |
+
|
367 |
+
|
368 |
+
#######################################################################################################################
|
369 |
+
#
|
370 |
+
# Character Card Functions
|
371 |
+
|
372 |
+
CHARACTERS_FILE = Path('.', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
|
373 |
+
|
374 |
+
|
375 |
+
def save_character(character_data):
|
376 |
+
log_counter("save_character_attempt")
|
377 |
+
start_time = time.time()
|
378 |
+
characters_file = os.path.join(os.path.dirname(__file__), '..', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
|
379 |
+
characters_dir = os.path.dirname(characters_file)
|
380 |
+
|
381 |
+
try:
|
382 |
+
if os.path.exists(characters_file):
|
383 |
+
with open(characters_file, 'r') as f:
|
384 |
+
characters = json.load(f)
|
385 |
+
else:
|
386 |
+
characters = {}
|
387 |
+
|
388 |
+
char_name = character_data['name']
|
389 |
+
|
390 |
+
# Save the image separately if it exists
|
391 |
+
if 'image' in character_data:
|
392 |
+
img_data = base64.b64decode(character_data['image'])
|
393 |
+
img_filename = f"{char_name.replace(' ', '_')}.png"
|
394 |
+
img_path = os.path.join(characters_dir, img_filename)
|
395 |
+
with open(img_path, 'wb') as f:
|
396 |
+
f.write(img_data)
|
397 |
+
character_data['image_path'] = os.path.abspath(img_path)
|
398 |
+
del character_data['image'] # Remove the base64 image data from the JSON
|
399 |
+
|
400 |
+
characters[char_name] = character_data
|
401 |
+
|
402 |
+
with open(characters_file, 'w') as f:
|
403 |
+
json.dump(characters, f, indent=2)
|
404 |
+
|
405 |
+
save_duration = time.time() - start_time
|
406 |
+
log_histogram("save_character_duration", save_duration)
|
407 |
+
log_counter("save_character_success")
|
408 |
+
logging.info(f"Character '{char_name}' saved successfully.")
|
409 |
+
except Exception as e:
|
410 |
+
log_counter("save_character_error", labels={"error": str(e)})
|
411 |
+
logging.error(f"Error saving character: {str(e)}")
|
412 |
+
|
413 |
+
|
414 |
+
def load_characters():
|
415 |
+
log_counter("load_characters_attempt")
|
416 |
+
start_time = time.time()
|
417 |
+
try:
|
418 |
+
characters_file = os.path.join(os.path.dirname(__file__), '..', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
|
419 |
+
if os.path.exists(characters_file):
|
420 |
+
with open(characters_file, 'r') as f:
|
421 |
+
characters = json.load(f)
|
422 |
+
logging.debug(f"Loaded {len(characters)} characters from {characters_file}")
|
423 |
+
load_duration = time.time() - start_time
|
424 |
+
log_histogram("load_characters_duration", load_duration)
|
425 |
+
log_counter("load_characters_success", labels={"character_count": len(characters)})
|
426 |
+
return characters
|
427 |
+
else:
|
428 |
+
logging.warning(f"Characters file not found: {characters_file}")
|
429 |
+
return {}
|
430 |
+
except Exception as e:
|
431 |
+
log_counter("load_characters_error", labels={"error": str(e)})
|
432 |
+
return {}
|
433 |
+
|
434 |
+
|
435 |
+
|
436 |
+
def get_character_names():
|
437 |
+
log_counter("get_character_names_attempt")
|
438 |
+
start_time = time.time()
|
439 |
+
try:
|
440 |
+
characters = load_characters()
|
441 |
+
names = list(characters.keys())
|
442 |
+
get_names_duration = time.time() - start_time
|
443 |
+
log_histogram("get_character_names_duration", get_names_duration)
|
444 |
+
log_counter("get_character_names_success", labels={"name_count": len(names)})
|
445 |
+
return names
|
446 |
+
except Exception as e:
|
447 |
+
log_counter("get_character_names_error", labels={"error": str(e)})
|
448 |
+
logging.error(f"Error getting character names: {str(e)}")
|
449 |
+
return []
|
450 |
+
|
451 |
+
#
|
452 |
+
# End of Chat.py
|
453 |
+
##########################################################################################################################
|
App_Function_Libraries/Chat/__init__.py
ADDED
File without changes
|
App_Function_Libraries/Chunk_Lib.py
CHANGED
@@ -11,6 +11,7 @@ import json
|
|
11 |
import logging
|
12 |
import re
|
13 |
from typing import Any, Dict, List, Optional, Tuple
|
|
|
14 |
#
|
15 |
# Import 3rd party
|
16 |
from openai import OpenAI
|
@@ -23,7 +24,6 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
23 |
from sklearn.metrics.pairwise import cosine_similarity
|
24 |
#
|
25 |
# Import Local
|
26 |
-
from App_Function_Libraries.Tokenization_Methods_Lib import openai_tokenize
|
27 |
from App_Function_Libraries.Utils.Utils import load_comprehensive_config
|
28 |
#
|
29 |
#######################################################################################################################
|
@@ -106,6 +106,7 @@ def load_document(file_path: str) -> str:
|
|
106 |
|
107 |
def improved_chunking_process(text: str, chunk_options: Dict[str, Any] = None) -> List[Dict[str, Any]]:
|
108 |
logging.debug("Improved chunking process started...")
|
|
|
109 |
|
110 |
# Extract JSON metadata if present
|
111 |
json_content = {}
|
@@ -125,49 +126,70 @@ def improved_chunking_process(text: str, chunk_options: Dict[str, Any] = None) -
|
|
125 |
text = text[len(header_text):].strip()
|
126 |
logging.debug(f"Extracted header text: {header_text}")
|
127 |
|
128 |
-
|
|
|
129 |
if chunk_options:
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
-
if language is None:
|
138 |
-
|
|
|
|
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
144 |
|
145 |
chunks_with_metadata = []
|
146 |
total_chunks = len(chunks)
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
|
|
164 |
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
|
170 |
-
|
|
|
|
|
|
|
|
|
171 |
|
172 |
|
173 |
def multi_level_chunking(text: str, method: str, max_size: int, overlap: int, language: str) -> List[str]:
|
@@ -220,24 +242,35 @@ def determine_chunk_position(relative_position: float) -> str:
|
|
220 |
|
221 |
def chunk_text_by_words(text: str, max_words: int = 300, overlap: int = 0, language: str = None) -> List[str]:
|
222 |
logging.debug("chunk_text_by_words...")
|
223 |
-
|
224 |
-
language = detect_language(text)
|
225 |
-
|
226 |
-
if language.startswith('zh'): # Chinese
|
227 |
-
import jieba
|
228 |
-
words = list(jieba.cut(text))
|
229 |
-
elif language == 'ja': # Japanese
|
230 |
-
import fugashi
|
231 |
-
tagger = fugashi.Tagger()
|
232 |
-
words = [word.surface for word in tagger(text)]
|
233 |
-
else: # Default to simple splitting for other languages
|
234 |
-
words = text.split()
|
235 |
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
|
243 |
def chunk_text_by_sentences(text: str, max_sentences: int = 10, overlap: int = 0, language: str = None) -> List[str]:
|
@@ -338,24 +371,24 @@ def get_chunk_metadata(chunk: str, full_text: str, chunk_type: str = "generic",
|
|
338 |
"""
|
339 |
chunk_length = len(chunk)
|
340 |
start_index = full_text.find(chunk)
|
341 |
-
end_index = start_index + chunk_length if start_index != -1 else
|
342 |
|
343 |
# Calculate a hash for the chunk
|
344 |
chunk_hash = hashlib.md5(chunk.encode()).hexdigest()
|
345 |
|
346 |
metadata = {
|
347 |
-
'start_index': start_index,
|
348 |
-
'end_index': end_index,
|
349 |
-
'word_count': len(chunk.split()),
|
350 |
-
'char_count': chunk_length,
|
351 |
'chunk_type': chunk_type,
|
352 |
'language': language,
|
353 |
'chunk_hash': chunk_hash,
|
354 |
-
'relative_position': start_index / len(full_text) if len(full_text) > 0 and start_index != -1 else 0
|
355 |
}
|
356 |
|
357 |
if chunk_type == "chapter":
|
358 |
-
metadata['chapter_number'] = chapter_number
|
359 |
metadata['chapter_pattern'] = chapter_pattern
|
360 |
|
361 |
return metadata
|
@@ -943,6 +976,151 @@ def chunk_ebook_by_chapters(text: str, chunk_options: Dict[str, Any]) -> List[Di
|
|
943 |
#
|
944 |
# End of ebook chapter chunking
|
945 |
#######################################################################################################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
946 |
|
947 |
#######################################################################################################################
|
948 |
#
|
|
|
11 |
import logging
|
12 |
import re
|
13 |
from typing import Any, Dict, List, Optional, Tuple
|
14 |
+
import xml.etree.ElementTree as ET
|
15 |
#
|
16 |
# Import 3rd party
|
17 |
from openai import OpenAI
|
|
|
24 |
from sklearn.metrics.pairwise import cosine_similarity
|
25 |
#
|
26 |
# Import Local
|
|
|
27 |
from App_Function_Libraries.Utils.Utils import load_comprehensive_config
|
28 |
#
|
29 |
#######################################################################################################################
|
|
|
106 |
|
107 |
def improved_chunking_process(text: str, chunk_options: Dict[str, Any] = None) -> List[Dict[str, Any]]:
|
108 |
logging.debug("Improved chunking process started...")
|
109 |
+
logging.debug(f"Received chunk_options: {chunk_options}")
|
110 |
|
111 |
# Extract JSON metadata if present
|
112 |
json_content = {}
|
|
|
126 |
text = text[len(header_text):].strip()
|
127 |
logging.debug(f"Extracted header text: {header_text}")
|
128 |
|
129 |
+
# Make a copy of chunk_options and ensure values are correct types
|
130 |
+
options = {}
|
131 |
if chunk_options:
|
132 |
+
try:
|
133 |
+
options['method'] = str(chunk_options.get('method', 'words'))
|
134 |
+
options['max_size'] = int(chunk_options.get('max_size', 2000))
|
135 |
+
options['overlap'] = int(chunk_options.get('overlap', 0))
|
136 |
+
# Handle language specially - it can be None
|
137 |
+
lang = chunk_options.get('language')
|
138 |
+
options['language'] = str(lang) if lang is not None else None
|
139 |
+
logging.debug(f"Processed options: {options}")
|
140 |
+
except Exception as e:
|
141 |
+
logging.error(f"Error processing chunk options: {e}")
|
142 |
+
raise
|
143 |
+
else:
|
144 |
+
options = {'method': 'words', 'max_size': 2000, 'overlap': 0, 'language': None}
|
145 |
+
logging.debug("Using default options")
|
146 |
|
147 |
+
if options.get('language') is None:
|
148 |
+
detected_lang = detect_language(text)
|
149 |
+
options['language'] = str(detected_lang)
|
150 |
+
logging.debug(f"Detected language: {options['language']}")
|
151 |
|
152 |
+
try:
|
153 |
+
if options['method'] == 'json':
|
154 |
+
chunks = chunk_text_by_json(text, max_size=options['max_size'], overlap=options['overlap'])
|
155 |
+
else:
|
156 |
+
chunks = chunk_text(text, options['method'], options['max_size'], options['overlap'], options['language'])
|
157 |
+
logging.debug(f"Created {len(chunks)} chunks using method {options['method']}")
|
158 |
+
except Exception as e:
|
159 |
+
logging.error(f"Error in chunking process: {e}")
|
160 |
+
raise
|
161 |
|
162 |
chunks_with_metadata = []
|
163 |
total_chunks = len(chunks)
|
164 |
+
try:
|
165 |
+
for i, chunk in enumerate(chunks):
|
166 |
+
metadata = {
|
167 |
+
'chunk_index': i + 1,
|
168 |
+
'total_chunks': total_chunks,
|
169 |
+
'chunk_method': options['method'],
|
170 |
+
'max_size': options['max_size'],
|
171 |
+
'overlap': options['overlap'],
|
172 |
+
'language': options['language'],
|
173 |
+
'relative_position': float((i + 1) / total_chunks)
|
174 |
+
}
|
175 |
+
metadata.update(json_content)
|
176 |
+
metadata['header_text'] = header_text
|
177 |
+
|
178 |
+
if options['method'] == 'json':
|
179 |
+
chunk_text_content = json.dumps(chunk['json'], ensure_ascii=False)
|
180 |
+
else:
|
181 |
+
chunk_text_content = chunk
|
182 |
|
183 |
+
chunks_with_metadata.append({
|
184 |
+
'text': chunk_text_content,
|
185 |
+
'metadata': metadata
|
186 |
+
})
|
187 |
|
188 |
+
logging.debug(f"Successfully created metadata for all chunks")
|
189 |
+
return chunks_with_metadata
|
190 |
+
except Exception as e:
|
191 |
+
logging.error(f"Error creating chunk metadata: {e}")
|
192 |
+
raise
|
193 |
|
194 |
|
195 |
def multi_level_chunking(text: str, method: str, max_size: int, overlap: int, language: str) -> List[str]:
|
|
|
242 |
|
243 |
def chunk_text_by_words(text: str, max_words: int = 300, overlap: int = 0, language: str = None) -> List[str]:
|
244 |
logging.debug("chunk_text_by_words...")
|
245 |
+
logging.debug(f"Parameters: max_words={max_words}, overlap={overlap}, language={language}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
+
try:
|
248 |
+
if language is None:
|
249 |
+
language = detect_language(text)
|
250 |
+
logging.debug(f"Detected language: {language}")
|
251 |
+
|
252 |
+
if language.startswith('zh'): # Chinese
|
253 |
+
import jieba
|
254 |
+
words = list(jieba.cut(text))
|
255 |
+
elif language == 'ja': # Japanese
|
256 |
+
import fugashi
|
257 |
+
tagger = fugashi.Tagger()
|
258 |
+
words = [word.surface for word in tagger(text)]
|
259 |
+
else: # Default to simple splitting for other languages
|
260 |
+
words = text.split()
|
261 |
+
|
262 |
+
logging.debug(f"Total words: {len(words)}")
|
263 |
+
|
264 |
+
chunks = []
|
265 |
+
for i in range(0, len(words), max_words - overlap):
|
266 |
+
chunk = ' '.join(words[i:i + max_words])
|
267 |
+
chunks.append(chunk)
|
268 |
+
logging.debug(f"Created chunk {len(chunks)} with {len(chunk.split())} words")
|
269 |
+
|
270 |
+
return post_process_chunks(chunks)
|
271 |
+
except Exception as e:
|
272 |
+
logging.error(f"Error in chunk_text_by_words: {e}")
|
273 |
+
raise
|
274 |
|
275 |
|
276 |
def chunk_text_by_sentences(text: str, max_sentences: int = 10, overlap: int = 0, language: str = None) -> List[str]:
|
|
|
371 |
"""
|
372 |
chunk_length = len(chunk)
|
373 |
start_index = full_text.find(chunk)
|
374 |
+
end_index = start_index + chunk_length if start_index != -1 else -1
|
375 |
|
376 |
# Calculate a hash for the chunk
|
377 |
chunk_hash = hashlib.md5(chunk.encode()).hexdigest()
|
378 |
|
379 |
metadata = {
|
380 |
+
'start_index': int(start_index),
|
381 |
+
'end_index': int(end_index),
|
382 |
+
'word_count': int(len(chunk.split())),
|
383 |
+
'char_count': int(chunk_length),
|
384 |
'chunk_type': chunk_type,
|
385 |
'language': language,
|
386 |
'chunk_hash': chunk_hash,
|
387 |
+
'relative_position': float(start_index / len(full_text) if len(full_text) > 0 and start_index != -1 else 0)
|
388 |
}
|
389 |
|
390 |
if chunk_type == "chapter":
|
391 |
+
metadata['chapter_number'] = int(chapter_number) if chapter_number is not None else None
|
392 |
metadata['chapter_pattern'] = chapter_pattern
|
393 |
|
394 |
return metadata
|
|
|
976 |
#
|
977 |
# End of ebook chapter chunking
|
978 |
#######################################################################################################################
|
979 |
+
#
|
980 |
+
# XML Chunking
|
981 |
+
|
982 |
+
def extract_xml_structure(element, path=""):
|
983 |
+
"""
|
984 |
+
Recursively extract XML structure and content.
|
985 |
+
Returns a list of (path, text) tuples.
|
986 |
+
"""
|
987 |
+
results = []
|
988 |
+
current_path = f"{path}/{element.tag}" if path else element.tag
|
989 |
+
|
990 |
+
# Get direct text content
|
991 |
+
if element.text and element.text.strip():
|
992 |
+
results.append((current_path, element.text.strip()))
|
993 |
+
|
994 |
+
# Process attributes if any
|
995 |
+
if element.attrib:
|
996 |
+
for key, value in element.attrib.items():
|
997 |
+
results.append((f"{current_path}/@{key}", value))
|
998 |
+
|
999 |
+
# Process child elements
|
1000 |
+
for child in element:
|
1001 |
+
results.extend(extract_xml_structure(child, current_path))
|
1002 |
+
|
1003 |
+
return results
|
1004 |
+
|
1005 |
+
|
1006 |
+
def chunk_xml(xml_text: str, chunk_options: Dict[str, Any]) -> List[Dict[str, Any]]:
|
1007 |
+
"""
|
1008 |
+
Enhanced XML chunking that preserves structure and hierarchy.
|
1009 |
+
Processes XML content into chunks while maintaining structural context.
|
1010 |
+
|
1011 |
+
Args:
|
1012 |
+
xml_text (str): The XML content as a string
|
1013 |
+
chunk_options (Dict[str, Any]): Configuration options including:
|
1014 |
+
- max_size (int): Maximum chunk size (default: 1000)
|
1015 |
+
- overlap (int): Number of overlapping elements (default: 0)
|
1016 |
+
- method (str): Chunking method (default: 'xml')
|
1017 |
+
- language (str): Content language (default: 'english')
|
1018 |
+
|
1019 |
+
Returns:
|
1020 |
+
List[Dict[str, Any]]: List of chunks, each containing:
|
1021 |
+
- text: The chunk content
|
1022 |
+
- metadata: Chunk metadata including XML paths and chunking info
|
1023 |
+
"""
|
1024 |
+
logging.debug("Starting XML chunking process...")
|
1025 |
+
|
1026 |
+
try:
|
1027 |
+
# Parse XML content
|
1028 |
+
root = ET.fromstring(xml_text)
|
1029 |
+
chunks = []
|
1030 |
+
|
1031 |
+
# Get chunking parameters with defaults
|
1032 |
+
max_size = chunk_options.get('max_size', 1000)
|
1033 |
+
overlap = chunk_options.get('overlap', 0)
|
1034 |
+
language = chunk_options.get('language', 'english')
|
1035 |
+
|
1036 |
+
logging.debug(f"Chunking parameters - max_size: {max_size}, overlap: {overlap}, language: {language}")
|
1037 |
+
|
1038 |
+
# Extract full structure with hierarchy
|
1039 |
+
xml_content = extract_xml_structure(root)
|
1040 |
+
logging.debug(f"Extracted {len(xml_content)} XML elements")
|
1041 |
+
|
1042 |
+
# Initialize chunking variables
|
1043 |
+
current_chunk = []
|
1044 |
+
current_size = 0
|
1045 |
+
chunk_count = 0
|
1046 |
+
|
1047 |
+
# Process XML content into chunks
|
1048 |
+
for path, content in xml_content:
|
1049 |
+
# Calculate content size (by words)
|
1050 |
+
content_size = len(content.split())
|
1051 |
+
|
1052 |
+
# Check if adding this content would exceed max_size
|
1053 |
+
if current_size + content_size > max_size and current_chunk:
|
1054 |
+
# Create chunk from current content
|
1055 |
+
chunk_text = '\n'.join(f"{p}: {c}" for p, c in current_chunk)
|
1056 |
+
chunk_count += 1
|
1057 |
+
|
1058 |
+
# Create chunk with metadata
|
1059 |
+
chunks.append({
|
1060 |
+
'text': chunk_text,
|
1061 |
+
'metadata': {
|
1062 |
+
'paths': [p for p, _ in current_chunk],
|
1063 |
+
'chunk_method': 'xml',
|
1064 |
+
'chunk_index': chunk_count,
|
1065 |
+
'max_size': max_size,
|
1066 |
+
'overlap': overlap,
|
1067 |
+
'language': language,
|
1068 |
+
'root_tag': root.tag,
|
1069 |
+
'xml_attributes': dict(root.attrib)
|
1070 |
+
}
|
1071 |
+
})
|
1072 |
+
|
1073 |
+
# Handle overlap if specified
|
1074 |
+
if overlap > 0:
|
1075 |
+
# Keep last few items for overlap
|
1076 |
+
overlap_items = current_chunk[-overlap:]
|
1077 |
+
current_chunk = overlap_items
|
1078 |
+
current_size = sum(len(c.split()) for _, c in overlap_items)
|
1079 |
+
logging.debug(f"Created overlap chunk with {len(overlap_items)} items")
|
1080 |
+
else:
|
1081 |
+
current_chunk = []
|
1082 |
+
current_size = 0
|
1083 |
+
|
1084 |
+
# Add current content to chunk
|
1085 |
+
current_chunk.append((path, content))
|
1086 |
+
current_size += content_size
|
1087 |
+
|
1088 |
+
# Process final chunk if content remains
|
1089 |
+
if current_chunk:
|
1090 |
+
chunk_text = '\n'.join(f"{p}: {c}" for p, c in current_chunk)
|
1091 |
+
chunk_count += 1
|
1092 |
+
|
1093 |
+
chunks.append({
|
1094 |
+
'text': chunk_text,
|
1095 |
+
'metadata': {
|
1096 |
+
'paths': [p for p, _ in current_chunk],
|
1097 |
+
'chunk_method': 'xml',
|
1098 |
+
'chunk_index': chunk_count,
|
1099 |
+
'max_size': max_size,
|
1100 |
+
'overlap': overlap,
|
1101 |
+
'language': language,
|
1102 |
+
'root_tag': root.tag,
|
1103 |
+
'xml_attributes': dict(root.attrib)
|
1104 |
+
}
|
1105 |
+
})
|
1106 |
+
|
1107 |
+
# Update total chunks count in metadata
|
1108 |
+
for chunk in chunks:
|
1109 |
+
chunk['metadata']['total_chunks'] = chunk_count
|
1110 |
+
|
1111 |
+
logging.debug(f"XML chunking complete. Created {len(chunks)} chunks")
|
1112 |
+
return chunks
|
1113 |
+
|
1114 |
+
except ET.ParseError as e:
|
1115 |
+
logging.error(f"XML parsing error: {str(e)}")
|
1116 |
+
raise
|
1117 |
+
except Exception as e:
|
1118 |
+
logging.error(f"Unexpected error during XML chunking: {str(e)}")
|
1119 |
+
raise
|
1120 |
+
|
1121 |
+
#
|
1122 |
+
# End of XML Chunking
|
1123 |
+
#######################################################################################################################
|
1124 |
|
1125 |
#######################################################################################################################
|
1126 |
#
|
App_Function_Libraries/DB/Character_Chat_DB.py
CHANGED
@@ -1,701 +1,1059 @@
|
|
1 |
-
# character_chat_db.py
|
2 |
-
# Database functions for managing character cards and chat histories.
|
3 |
-
# #
|
4 |
-
# Imports
|
5 |
-
import configparser
|
6 |
-
import sqlite3
|
7 |
-
import json
|
8 |
-
import os
|
9 |
-
import sys
|
10 |
-
from typing import List, Dict, Optional, Tuple, Any, Union
|
11 |
-
|
12 |
-
from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
|
13 |
-
import logging
|
14 |
-
|
15 |
-
#
|
16 |
-
#######################################################################################################################
|
17 |
-
#
|
18 |
-
#
|
19 |
-
|
20 |
-
def ensure_database_directory():
|
21 |
-
os.makedirs(get_database_dir(), exist_ok=True)
|
22 |
-
|
23 |
-
ensure_database_directory()
|
24 |
-
|
25 |
-
|
26 |
-
# Construct the path to the config file
|
27 |
-
config_path = get_project_relative_path('Config_Files/config.txt')
|
28 |
-
|
29 |
-
# Read the config file
|
30 |
-
config = configparser.ConfigParser()
|
31 |
-
config.read(config_path)
|
32 |
-
|
33 |
-
# Get the chat db path from the config, or use the default if not specified
|
34 |
-
chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
|
35 |
-
print(f"Chat Database path: {chat_DB_PATH}")
|
36 |
-
|
37 |
-
########################################################################################################
|
38 |
-
#
|
39 |
-
# Functions
|
40 |
-
|
41 |
-
# FIXME - Setup properly and test/add documentation for its existence...
|
42 |
-
def initialize_database():
|
43 |
-
"""Initialize the SQLite database with required tables and FTS5 virtual tables."""
|
44 |
-
conn = None
|
45 |
-
try:
|
46 |
-
conn = sqlite3.connect(chat_DB_PATH)
|
47 |
-
cursor = conn.cursor()
|
48 |
-
|
49 |
-
# Enable foreign key constraints
|
50 |
-
cursor.execute("PRAGMA foreign_keys = ON;")
|
51 |
-
|
52 |
-
# Create CharacterCards table with V2 fields
|
53 |
-
cursor.execute("""
|
54 |
-
CREATE TABLE IF NOT EXISTS CharacterCards (
|
55 |
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
56 |
-
name TEXT UNIQUE NOT NULL,
|
57 |
-
description TEXT,
|
58 |
-
personality TEXT,
|
59 |
-
scenario TEXT,
|
60 |
-
image BLOB,
|
61 |
-
post_history_instructions TEXT,
|
62 |
-
first_mes TEXT,
|
63 |
-
mes_example TEXT,
|
64 |
-
creator_notes TEXT,
|
65 |
-
system_prompt TEXT,
|
66 |
-
alternate_greetings TEXT,
|
67 |
-
tags TEXT,
|
68 |
-
creator TEXT,
|
69 |
-
character_version TEXT,
|
70 |
-
extensions TEXT,
|
71 |
-
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
72 |
-
);
|
73 |
-
""")
|
74 |
-
|
75 |
-
# Create
|
76 |
-
cursor.execute("""
|
77 |
-
CREATE TABLE IF NOT EXISTS
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
);
|
86 |
-
""")
|
87 |
-
|
88 |
-
# Create FTS5
|
89 |
-
cursor.
|
90 |
-
CREATE
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
)
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
#
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
#
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
#
|
295 |
-
#
|
296 |
-
#
|
297 |
-
#
|
298 |
-
#
|
299 |
-
#
|
300 |
-
#
|
301 |
-
#
|
302 |
-
#
|
303 |
-
#
|
304 |
-
#
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
)
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
return
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
cursor
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
return
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
return
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
-
""
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
|
645 |
-
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
|
701 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# character_chat_db.py
|
2 |
+
# Database functions for managing character cards and chat histories.
|
3 |
+
# #
|
4 |
+
# Imports
|
5 |
+
import configparser
|
6 |
+
import sqlite3
|
7 |
+
import json
|
8 |
+
import os
|
9 |
+
import sys
|
10 |
+
from typing import List, Dict, Optional, Tuple, Any, Union
|
11 |
+
|
12 |
+
from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
|
13 |
+
from Tests.Chat_APIs.Chat_APIs_Integration_test import logging
|
14 |
+
|
15 |
+
#
|
16 |
+
#######################################################################################################################
|
17 |
+
#
|
18 |
+
#
|
19 |
+
|
20 |
+
def ensure_database_directory():
|
21 |
+
os.makedirs(get_database_dir(), exist_ok=True)
|
22 |
+
|
23 |
+
ensure_database_directory()
|
24 |
+
|
25 |
+
|
26 |
+
# Construct the path to the config file
|
27 |
+
config_path = get_project_relative_path('Config_Files/config.txt')
|
28 |
+
|
29 |
+
# Read the config file
|
30 |
+
config = configparser.ConfigParser()
|
31 |
+
config.read(config_path)
|
32 |
+
|
33 |
+
# Get the chat db path from the config, or use the default if not specified
|
34 |
+
chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
|
35 |
+
print(f"Chat Database path: {chat_DB_PATH}")
|
36 |
+
|
37 |
+
########################################################################################################
|
38 |
+
#
|
39 |
+
# Functions
|
40 |
+
|
41 |
+
# FIXME - Setup properly and test/add documentation for its existence...
|
42 |
+
def initialize_database():
|
43 |
+
"""Initialize the SQLite database with required tables and FTS5 virtual tables."""
|
44 |
+
conn = None
|
45 |
+
try:
|
46 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
47 |
+
cursor = conn.cursor()
|
48 |
+
|
49 |
+
# Enable foreign key constraints
|
50 |
+
cursor.execute("PRAGMA foreign_keys = ON;")
|
51 |
+
|
52 |
+
# Create CharacterCards table with V2 fields
|
53 |
+
cursor.execute("""
|
54 |
+
CREATE TABLE IF NOT EXISTS CharacterCards (
|
55 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
56 |
+
name TEXT UNIQUE NOT NULL,
|
57 |
+
description TEXT,
|
58 |
+
personality TEXT,
|
59 |
+
scenario TEXT,
|
60 |
+
image BLOB,
|
61 |
+
post_history_instructions TEXT,
|
62 |
+
first_mes TEXT,
|
63 |
+
mes_example TEXT,
|
64 |
+
creator_notes TEXT,
|
65 |
+
system_prompt TEXT,
|
66 |
+
alternate_greetings TEXT,
|
67 |
+
tags TEXT,
|
68 |
+
creator TEXT,
|
69 |
+
character_version TEXT,
|
70 |
+
extensions TEXT,
|
71 |
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
72 |
+
);
|
73 |
+
""")
|
74 |
+
|
75 |
+
# Create FTS5 virtual table for CharacterCards
|
76 |
+
cursor.execute("""
|
77 |
+
CREATE VIRTUAL TABLE IF NOT EXISTS CharacterCards_fts USING fts5(
|
78 |
+
name,
|
79 |
+
description,
|
80 |
+
personality,
|
81 |
+
scenario,
|
82 |
+
system_prompt,
|
83 |
+
content='CharacterCards',
|
84 |
+
content_rowid='id'
|
85 |
+
);
|
86 |
+
""")
|
87 |
+
|
88 |
+
# Create triggers to keep FTS5 table in sync with CharacterCards
|
89 |
+
cursor.executescript("""
|
90 |
+
CREATE TRIGGER IF NOT EXISTS CharacterCards_ai AFTER INSERT ON CharacterCards BEGIN
|
91 |
+
INSERT INTO CharacterCards_fts(
|
92 |
+
rowid,
|
93 |
+
name,
|
94 |
+
description,
|
95 |
+
personality,
|
96 |
+
scenario,
|
97 |
+
system_prompt
|
98 |
+
) VALUES (
|
99 |
+
new.id,
|
100 |
+
new.name,
|
101 |
+
new.description,
|
102 |
+
new.personality,
|
103 |
+
new.scenario,
|
104 |
+
new.system_prompt
|
105 |
+
);
|
106 |
+
END;
|
107 |
+
|
108 |
+
CREATE TRIGGER IF NOT EXISTS CharacterCards_ad AFTER DELETE ON CharacterCards BEGIN
|
109 |
+
DELETE FROM CharacterCards_fts WHERE rowid = old.id;
|
110 |
+
END;
|
111 |
+
|
112 |
+
CREATE TRIGGER IF NOT EXISTS CharacterCards_au AFTER UPDATE ON CharacterCards BEGIN
|
113 |
+
UPDATE CharacterCards_fts SET
|
114 |
+
name = new.name,
|
115 |
+
description = new.description,
|
116 |
+
personality = new.personality,
|
117 |
+
scenario = new.scenario,
|
118 |
+
system_prompt = new.system_prompt
|
119 |
+
WHERE rowid = new.id;
|
120 |
+
END;
|
121 |
+
""")
|
122 |
+
|
123 |
+
# Create CharacterChats table
|
124 |
+
cursor.execute("""
|
125 |
+
CREATE TABLE IF NOT EXISTS CharacterChats (
|
126 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
127 |
+
character_id INTEGER NOT NULL,
|
128 |
+
conversation_name TEXT,
|
129 |
+
chat_history TEXT,
|
130 |
+
is_snapshot BOOLEAN DEFAULT FALSE,
|
131 |
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
132 |
+
FOREIGN KEY (character_id) REFERENCES CharacterCards(id) ON DELETE CASCADE
|
133 |
+
);
|
134 |
+
""")
|
135 |
+
|
136 |
+
# Create FTS5 virtual table for CharacterChats
|
137 |
+
cursor.execute("""
|
138 |
+
CREATE VIRTUAL TABLE IF NOT EXISTS CharacterChats_fts USING fts5(
|
139 |
+
conversation_name,
|
140 |
+
chat_history,
|
141 |
+
content='CharacterChats',
|
142 |
+
content_rowid='id'
|
143 |
+
);
|
144 |
+
""")
|
145 |
+
|
146 |
+
# Create triggers to keep FTS5 table in sync with CharacterChats
|
147 |
+
cursor.executescript("""
|
148 |
+
CREATE TRIGGER IF NOT EXISTS CharacterChats_ai AFTER INSERT ON CharacterChats BEGIN
|
149 |
+
INSERT INTO CharacterChats_fts(rowid, conversation_name, chat_history)
|
150 |
+
VALUES (new.id, new.conversation_name, new.chat_history);
|
151 |
+
END;
|
152 |
+
|
153 |
+
CREATE TRIGGER IF NOT EXISTS CharacterChats_ad AFTER DELETE ON CharacterChats BEGIN
|
154 |
+
DELETE FROM CharacterChats_fts WHERE rowid = old.id;
|
155 |
+
END;
|
156 |
+
|
157 |
+
CREATE TRIGGER IF NOT EXISTS CharacterChats_au AFTER UPDATE ON CharacterChats BEGIN
|
158 |
+
UPDATE CharacterChats_fts SET conversation_name = new.conversation_name, chat_history = new.chat_history
|
159 |
+
WHERE rowid = new.id;
|
160 |
+
END;
|
161 |
+
""")
|
162 |
+
|
163 |
+
# Create ChatKeywords table
|
164 |
+
cursor.execute("""
|
165 |
+
CREATE TABLE IF NOT EXISTS ChatKeywords (
|
166 |
+
chat_id INTEGER NOT NULL,
|
167 |
+
keyword TEXT NOT NULL,
|
168 |
+
FOREIGN KEY (chat_id) REFERENCES CharacterChats(id) ON DELETE CASCADE
|
169 |
+
);
|
170 |
+
""")
|
171 |
+
|
172 |
+
# Create indexes for faster searches
|
173 |
+
cursor.execute("""
|
174 |
+
CREATE INDEX IF NOT EXISTS idx_chatkeywords_keyword ON ChatKeywords(keyword);
|
175 |
+
""")
|
176 |
+
cursor.execute("""
|
177 |
+
CREATE INDEX IF NOT EXISTS idx_chatkeywords_chat_id ON ChatKeywords(chat_id);
|
178 |
+
""")
|
179 |
+
|
180 |
+
conn.commit()
|
181 |
+
logging.info("Database initialized successfully.")
|
182 |
+
except sqlite3.Error as e:
|
183 |
+
logging.error(f"SQLite error occurred during database initialization: {e}")
|
184 |
+
if conn:
|
185 |
+
conn.rollback()
|
186 |
+
raise
|
187 |
+
except Exception as e:
|
188 |
+
logging.error(f"Unexpected error occurred during database initialization: {e}")
|
189 |
+
if conn:
|
190 |
+
conn.rollback()
|
191 |
+
raise
|
192 |
+
finally:
|
193 |
+
if conn:
|
194 |
+
conn.close()
|
195 |
+
|
196 |
+
# Call initialize_database() at the start of your application
|
197 |
+
def setup_chat_database():
|
198 |
+
try:
|
199 |
+
initialize_database()
|
200 |
+
except Exception as e:
|
201 |
+
logging.critical(f"Failed to initialize database: {e}")
|
202 |
+
sys.exit(1)
|
203 |
+
|
204 |
+
setup_chat_database()
|
205 |
+
|
206 |
+
|
207 |
+
########################################################################################################
|
208 |
+
#
|
209 |
+
# Character Card handling
|
210 |
+
|
211 |
+
def parse_character_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
|
212 |
+
"""Parse and validate a character card according to V2 specification."""
|
213 |
+
v2_data = {
|
214 |
+
'name': card_data.get('name', ''),
|
215 |
+
'description': card_data.get('description', ''),
|
216 |
+
'personality': card_data.get('personality', ''),
|
217 |
+
'scenario': card_data.get('scenario', ''),
|
218 |
+
'first_mes': card_data.get('first_mes', ''),
|
219 |
+
'mes_example': card_data.get('mes_example', ''),
|
220 |
+
'creator_notes': card_data.get('creator_notes', ''),
|
221 |
+
'system_prompt': card_data.get('system_prompt', ''),
|
222 |
+
'post_history_instructions': card_data.get('post_history_instructions', ''),
|
223 |
+
'alternate_greetings': json.dumps(card_data.get('alternate_greetings', [])),
|
224 |
+
'tags': json.dumps(card_data.get('tags', [])),
|
225 |
+
'creator': card_data.get('creator', ''),
|
226 |
+
'character_version': card_data.get('character_version', ''),
|
227 |
+
'extensions': json.dumps(card_data.get('extensions', {}))
|
228 |
+
}
|
229 |
+
|
230 |
+
# Handle 'image' separately as it might be binary data
|
231 |
+
if 'image' in card_data:
|
232 |
+
v2_data['image'] = card_data['image']
|
233 |
+
|
234 |
+
return v2_data
|
235 |
+
|
236 |
+
|
237 |
+
def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
|
238 |
+
"""Add or update a character card in the database."""
|
239 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
240 |
+
cursor = conn.cursor()
|
241 |
+
try:
|
242 |
+
parsed_card = parse_character_card(card_data)
|
243 |
+
|
244 |
+
# Check if character already exists
|
245 |
+
cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (parsed_card['name'],))
|
246 |
+
row = cursor.fetchone()
|
247 |
+
|
248 |
+
if row:
|
249 |
+
# Update existing character
|
250 |
+
character_id = row[0]
|
251 |
+
update_query = """
|
252 |
+
UPDATE CharacterCards
|
253 |
+
SET description = ?, personality = ?, scenario = ?, image = ?,
|
254 |
+
post_history_instructions = ?, first_mes = ?, mes_example = ?,
|
255 |
+
creator_notes = ?, system_prompt = ?, alternate_greetings = ?,
|
256 |
+
tags = ?, creator = ?, character_version = ?, extensions = ?
|
257 |
+
WHERE id = ?
|
258 |
+
"""
|
259 |
+
cursor.execute(update_query, (
|
260 |
+
parsed_card['description'], parsed_card['personality'], parsed_card['scenario'],
|
261 |
+
parsed_card['image'], parsed_card['post_history_instructions'], parsed_card['first_mes'],
|
262 |
+
parsed_card['mes_example'], parsed_card['creator_notes'], parsed_card['system_prompt'],
|
263 |
+
parsed_card['alternate_greetings'], parsed_card['tags'], parsed_card['creator'],
|
264 |
+
parsed_card['character_version'], parsed_card['extensions'], character_id
|
265 |
+
))
|
266 |
+
else:
|
267 |
+
# Insert new character
|
268 |
+
insert_query = """
|
269 |
+
INSERT INTO CharacterCards (name, description, personality, scenario, image,
|
270 |
+
post_history_instructions, first_mes, mes_example, creator_notes, system_prompt,
|
271 |
+
alternate_greetings, tags, creator, character_version, extensions)
|
272 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
273 |
+
"""
|
274 |
+
cursor.execute(insert_query, (
|
275 |
+
parsed_card['name'], parsed_card['description'], parsed_card['personality'],
|
276 |
+
parsed_card['scenario'], parsed_card['image'], parsed_card['post_history_instructions'],
|
277 |
+
parsed_card['first_mes'], parsed_card['mes_example'], parsed_card['creator_notes'],
|
278 |
+
parsed_card['system_prompt'], parsed_card['alternate_greetings'], parsed_card['tags'],
|
279 |
+
parsed_card['creator'], parsed_card['character_version'], parsed_card['extensions']
|
280 |
+
))
|
281 |
+
character_id = cursor.lastrowid
|
282 |
+
|
283 |
+
conn.commit()
|
284 |
+
return character_id
|
285 |
+
except sqlite3.IntegrityError as e:
|
286 |
+
logging.error(f"Error adding character card: {e}")
|
287 |
+
return None
|
288 |
+
except Exception as e:
|
289 |
+
logging.error(f"Unexpected error adding character card: {e}")
|
290 |
+
return None
|
291 |
+
finally:
|
292 |
+
conn.close()
|
293 |
+
|
294 |
+
# def add_character_card(card_data: Dict) -> Optional[int]:
|
295 |
+
# """Add or update a character card in the database.
|
296 |
+
#
|
297 |
+
# Returns the ID of the inserted character or None if failed.
|
298 |
+
# """
|
299 |
+
# conn = sqlite3.connect(chat_DB_PATH)
|
300 |
+
# cursor = conn.cursor()
|
301 |
+
# try:
|
302 |
+
# # Ensure all required fields are present
|
303 |
+
# required_fields = ['name', 'description', 'personality', 'scenario', 'image', 'post_history_instructions', 'first_message']
|
304 |
+
# for field in required_fields:
|
305 |
+
# if field not in card_data:
|
306 |
+
# card_data[field] = '' # Assign empty string if field is missing
|
307 |
+
#
|
308 |
+
# # Check if character already exists
|
309 |
+
# cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (card_data['name'],))
|
310 |
+
# row = cursor.fetchone()
|
311 |
+
#
|
312 |
+
# if row:
|
313 |
+
# # Update existing character
|
314 |
+
# character_id = row[0]
|
315 |
+
# cursor.execute("""
|
316 |
+
# UPDATE CharacterCards
|
317 |
+
# SET description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
|
318 |
+
# WHERE id = ?
|
319 |
+
# """, (
|
320 |
+
# card_data['description'],
|
321 |
+
# card_data['personality'],
|
322 |
+
# card_data['scenario'],
|
323 |
+
# card_data['image'],
|
324 |
+
# card_data['post_history_instructions'],
|
325 |
+
# card_data['first_message'],
|
326 |
+
# character_id
|
327 |
+
# ))
|
328 |
+
# else:
|
329 |
+
# # Insert new character
|
330 |
+
# cursor.execute("""
|
331 |
+
# INSERT INTO CharacterCards (name, description, personality, scenario, image, post_history_instructions, first_message)
|
332 |
+
# VALUES (?, ?, ?, ?, ?, ?, ?)
|
333 |
+
# """, (
|
334 |
+
# card_data['name'],
|
335 |
+
# card_data['description'],
|
336 |
+
# card_data['personality'],
|
337 |
+
# card_data['scenario'],
|
338 |
+
# card_data['image'],
|
339 |
+
# card_data['post_history_instructions'],
|
340 |
+
# card_data['first_message']
|
341 |
+
# ))
|
342 |
+
# character_id = cursor.lastrowid
|
343 |
+
#
|
344 |
+
# conn.commit()
|
345 |
+
# return cursor.lastrowid
|
346 |
+
# except sqlite3.IntegrityError as e:
|
347 |
+
# logging.error(f"Error adding character card: {e}")
|
348 |
+
# return None
|
349 |
+
# except Exception as e:
|
350 |
+
# logging.error(f"Unexpected error adding character card: {e}")
|
351 |
+
# return None
|
352 |
+
# finally:
|
353 |
+
# conn.close()
|
354 |
+
|
355 |
+
|
356 |
+
def get_character_cards() -> List[Dict]:
|
357 |
+
"""Retrieve all character cards from the database."""
|
358 |
+
logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
|
359 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
360 |
+
cursor = conn.cursor()
|
361 |
+
cursor.execute("SELECT * FROM CharacterCards")
|
362 |
+
rows = cursor.fetchall()
|
363 |
+
columns = [description[0] for description in cursor.description]
|
364 |
+
conn.close()
|
365 |
+
characters = [dict(zip(columns, row)) for row in rows]
|
366 |
+
#logging.debug(f"Characters fetched from DB: {characters}")
|
367 |
+
return characters
|
368 |
+
|
369 |
+
|
370 |
+
def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
371 |
+
"""
|
372 |
+
Retrieve a single character card by its ID.
|
373 |
+
|
374 |
+
Args:
|
375 |
+
character_id: Can be either an integer ID or a dictionary containing character data.
|
376 |
+
|
377 |
+
Returns:
|
378 |
+
A dictionary containing the character card data, or None if not found.
|
379 |
+
"""
|
380 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
381 |
+
cursor = conn.cursor()
|
382 |
+
try:
|
383 |
+
if isinstance(character_id, dict):
|
384 |
+
# If a dictionary is passed, assume it's already a character card
|
385 |
+
return character_id
|
386 |
+
elif isinstance(character_id, int):
|
387 |
+
# If an integer is passed, fetch the character from the database
|
388 |
+
cursor.execute("SELECT * FROM CharacterCards WHERE id = ?", (character_id,))
|
389 |
+
row = cursor.fetchone()
|
390 |
+
if row:
|
391 |
+
columns = [description[0] for description in cursor.description]
|
392 |
+
return dict(zip(columns, row))
|
393 |
+
else:
|
394 |
+
logging.warning(f"Invalid type for character_id: {type(character_id)}")
|
395 |
+
return None
|
396 |
+
except Exception as e:
|
397 |
+
logging.error(f"Error in get_character_card_by_id: {e}")
|
398 |
+
return None
|
399 |
+
finally:
|
400 |
+
conn.close()
|
401 |
+
|
402 |
+
|
403 |
+
def update_character_card(character_id: int, card_data: Dict) -> bool:
|
404 |
+
"""Update an existing character card."""
|
405 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
406 |
+
cursor = conn.cursor()
|
407 |
+
try:
|
408 |
+
cursor.execute("""
|
409 |
+
UPDATE CharacterCards
|
410 |
+
SET name = ?, description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
|
411 |
+
WHERE id = ?
|
412 |
+
""", (
|
413 |
+
card_data.get('name'),
|
414 |
+
card_data.get('description'),
|
415 |
+
card_data.get('personality'),
|
416 |
+
card_data.get('scenario'),
|
417 |
+
card_data.get('image'),
|
418 |
+
card_data.get('post_history_instructions', ''),
|
419 |
+
card_data.get('first_message', "Hello! I'm ready to chat."),
|
420 |
+
character_id
|
421 |
+
))
|
422 |
+
conn.commit()
|
423 |
+
return cursor.rowcount > 0
|
424 |
+
except sqlite3.IntegrityError as e:
|
425 |
+
logging.error(f"Error updating character card: {e}")
|
426 |
+
return False
|
427 |
+
finally:
|
428 |
+
conn.close()
|
429 |
+
|
430 |
+
|
431 |
+
def delete_character_card(character_id: int) -> bool:
|
432 |
+
"""Delete a character card and its associated chats."""
|
433 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
434 |
+
cursor = conn.cursor()
|
435 |
+
try:
|
436 |
+
# Delete associated chats first due to foreign key constraint
|
437 |
+
cursor.execute("DELETE FROM CharacterChats WHERE character_id = ?", (character_id,))
|
438 |
+
cursor.execute("DELETE FROM CharacterCards WHERE id = ?", (character_id,))
|
439 |
+
conn.commit()
|
440 |
+
return cursor.rowcount > 0
|
441 |
+
except sqlite3.Error as e:
|
442 |
+
logging.error(f"Error deleting character card: {e}")
|
443 |
+
return False
|
444 |
+
finally:
|
445 |
+
conn.close()
|
446 |
+
|
447 |
+
|
448 |
+
def add_character_chat(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]], keywords: Optional[List[str]] = None, is_snapshot: bool = False) -> Optional[int]:
|
449 |
+
"""
|
450 |
+
Add a new chat history for a character, optionally associating keywords.
|
451 |
+
|
452 |
+
Args:
|
453 |
+
character_id (int): The ID of the character.
|
454 |
+
conversation_name (str): Name of the conversation.
|
455 |
+
chat_history (List[Tuple[str, str]]): List of (user, bot) message tuples.
|
456 |
+
keywords (Optional[List[str]]): List of keywords to associate with this chat.
|
457 |
+
is_snapshot (bool, optional): Whether this chat is a snapshot.
|
458 |
+
|
459 |
+
Returns:
|
460 |
+
Optional[int]: The ID of the inserted chat or None if failed.
|
461 |
+
"""
|
462 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
463 |
+
cursor = conn.cursor()
|
464 |
+
try:
|
465 |
+
chat_history_json = json.dumps(chat_history)
|
466 |
+
cursor.execute("""
|
467 |
+
INSERT INTO CharacterChats (character_id, conversation_name, chat_history, is_snapshot)
|
468 |
+
VALUES (?, ?, ?, ?)
|
469 |
+
""", (
|
470 |
+
character_id,
|
471 |
+
conversation_name,
|
472 |
+
chat_history_json,
|
473 |
+
is_snapshot
|
474 |
+
))
|
475 |
+
chat_id = cursor.lastrowid
|
476 |
+
|
477 |
+
if keywords:
|
478 |
+
# Insert keywords into ChatKeywords table
|
479 |
+
keyword_records = [(chat_id, keyword.strip().lower()) for keyword in keywords]
|
480 |
+
cursor.executemany("""
|
481 |
+
INSERT INTO ChatKeywords (chat_id, keyword)
|
482 |
+
VALUES (?, ?)
|
483 |
+
""", keyword_records)
|
484 |
+
|
485 |
+
conn.commit()
|
486 |
+
return chat_id
|
487 |
+
except sqlite3.Error as e:
|
488 |
+
logging.error(f"Error adding character chat: {e}")
|
489 |
+
return None
|
490 |
+
finally:
|
491 |
+
conn.close()
|
492 |
+
|
493 |
+
|
494 |
+
def get_character_chats(character_id: Optional[int] = None) -> List[Dict]:
|
495 |
+
"""Retrieve all chats, or chats for a specific character if character_id is provided."""
|
496 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
497 |
+
cursor = conn.cursor()
|
498 |
+
if character_id is not None:
|
499 |
+
cursor.execute("SELECT * FROM CharacterChats WHERE character_id = ?", (character_id,))
|
500 |
+
else:
|
501 |
+
cursor.execute("SELECT * FROM CharacterChats")
|
502 |
+
rows = cursor.fetchall()
|
503 |
+
columns = [description[0] for description in cursor.description]
|
504 |
+
conn.close()
|
505 |
+
return [dict(zip(columns, row)) for row in rows]
|
506 |
+
|
507 |
+
|
508 |
+
def get_character_chat_by_id(chat_id: int) -> Optional[Dict]:
|
509 |
+
"""Retrieve a single chat by its ID."""
|
510 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
511 |
+
cursor = conn.cursor()
|
512 |
+
cursor.execute("SELECT * FROM CharacterChats WHERE id = ?", (chat_id,))
|
513 |
+
row = cursor.fetchone()
|
514 |
+
conn.close()
|
515 |
+
if row:
|
516 |
+
columns = [description[0] for description in cursor.description]
|
517 |
+
chat = dict(zip(columns, row))
|
518 |
+
chat['chat_history'] = json.loads(chat['chat_history'])
|
519 |
+
return chat
|
520 |
+
return None
|
521 |
+
|
522 |
+
|
523 |
+
def search_character_chats(query: str, character_id: Optional[int] = None) -> Tuple[List[Dict], str]:
|
524 |
+
"""
|
525 |
+
Search for character chats using FTS5, optionally filtered by character_id.
|
526 |
+
|
527 |
+
Args:
|
528 |
+
query (str): The search query.
|
529 |
+
character_id (Optional[int]): The ID of the character to filter chats by.
|
530 |
+
|
531 |
+
Returns:
|
532 |
+
Tuple[List[Dict], str]: A list of matching chats and a status message.
|
533 |
+
"""
|
534 |
+
if not query.strip():
|
535 |
+
return [], "Please enter a search query."
|
536 |
+
|
537 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
538 |
+
cursor = conn.cursor()
|
539 |
+
try:
|
540 |
+
if character_id is not None:
|
541 |
+
# Search with character_id filter
|
542 |
+
cursor.execute("""
|
543 |
+
SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
|
544 |
+
FROM CharacterChats_fts
|
545 |
+
JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
|
546 |
+
WHERE CharacterChats_fts MATCH ? AND CharacterChats.character_id = ?
|
547 |
+
ORDER BY rank
|
548 |
+
""", (query, character_id))
|
549 |
+
else:
|
550 |
+
# Search without character_id filter
|
551 |
+
cursor.execute("""
|
552 |
+
SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
|
553 |
+
FROM CharacterChats_fts
|
554 |
+
JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
|
555 |
+
WHERE CharacterChats_fts MATCH ?
|
556 |
+
ORDER BY rank
|
557 |
+
""", (query,))
|
558 |
+
|
559 |
+
rows = cursor.fetchall()
|
560 |
+
columns = [description[0] for description in cursor.description]
|
561 |
+
results = [dict(zip(columns, row)) for row in rows]
|
562 |
+
|
563 |
+
if character_id is not None:
|
564 |
+
status_message = f"Found {len(results)} chat(s) matching '{query}' for the selected character."
|
565 |
+
else:
|
566 |
+
status_message = f"Found {len(results)} chat(s) matching '{query}' across all characters."
|
567 |
+
|
568 |
+
return results, status_message
|
569 |
+
except Exception as e:
|
570 |
+
logging.error(f"Error searching chats with FTS5: {e}")
|
571 |
+
return [], f"Error occurred during search: {e}"
|
572 |
+
finally:
|
573 |
+
conn.close()
|
574 |
+
|
575 |
+
def update_character_chat(chat_id: int, chat_history: List[Tuple[str, str]]) -> bool:
|
576 |
+
"""Update an existing chat history."""
|
577 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
578 |
+
cursor = conn.cursor()
|
579 |
+
try:
|
580 |
+
chat_history_json = json.dumps(chat_history)
|
581 |
+
cursor.execute("""
|
582 |
+
UPDATE CharacterChats
|
583 |
+
SET chat_history = ?
|
584 |
+
WHERE id = ?
|
585 |
+
""", (
|
586 |
+
chat_history_json,
|
587 |
+
chat_id
|
588 |
+
))
|
589 |
+
conn.commit()
|
590 |
+
return cursor.rowcount > 0
|
591 |
+
except sqlite3.Error as e:
|
592 |
+
logging.error(f"Error updating character chat: {e}")
|
593 |
+
return False
|
594 |
+
finally:
|
595 |
+
conn.close()
|
596 |
+
|
597 |
+
|
598 |
+
def delete_character_chat(chat_id: int) -> bool:
|
599 |
+
"""Delete a specific chat."""
|
600 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
601 |
+
cursor = conn.cursor()
|
602 |
+
try:
|
603 |
+
cursor.execute("DELETE FROM CharacterChats WHERE id = ?", (chat_id,))
|
604 |
+
conn.commit()
|
605 |
+
return cursor.rowcount > 0
|
606 |
+
except sqlite3.Error as e:
|
607 |
+
logging.error(f"Error deleting character chat: {e}")
|
608 |
+
return False
|
609 |
+
finally:
|
610 |
+
conn.close()
|
611 |
+
|
612 |
+
|
613 |
+
def fetch_keywords_for_chats(keywords: List[str]) -> List[int]:
|
614 |
+
"""
|
615 |
+
Fetch chat IDs associated with any of the specified keywords.
|
616 |
+
|
617 |
+
Args:
|
618 |
+
keywords (List[str]): List of keywords to search for.
|
619 |
+
|
620 |
+
Returns:
|
621 |
+
List[int]: List of chat IDs associated with the keywords.
|
622 |
+
"""
|
623 |
+
if not keywords:
|
624 |
+
return []
|
625 |
+
|
626 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
627 |
+
cursor = conn.cursor()
|
628 |
+
try:
|
629 |
+
# Construct the WHERE clause to search for each keyword
|
630 |
+
keyword_clauses = " OR ".join(["keyword = ?"] * len(keywords))
|
631 |
+
sql_query = f"SELECT DISTINCT chat_id FROM ChatKeywords WHERE {keyword_clauses}"
|
632 |
+
cursor.execute(sql_query, keywords)
|
633 |
+
rows = cursor.fetchall()
|
634 |
+
chat_ids = [row[0] for row in rows]
|
635 |
+
return chat_ids
|
636 |
+
except Exception as e:
|
637 |
+
logging.error(f"Error in fetch_keywords_for_chats: {e}")
|
638 |
+
return []
|
639 |
+
finally:
|
640 |
+
conn.close()
|
641 |
+
|
642 |
+
|
643 |
+
def save_chat_history_to_character_db(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]]) -> Optional[int]:
|
644 |
+
"""Save chat history to the CharacterChats table.
|
645 |
+
|
646 |
+
Returns the ID of the inserted chat or None if failed.
|
647 |
+
"""
|
648 |
+
return add_character_chat(character_id, conversation_name, chat_history)
|
649 |
+
|
650 |
+
|
651 |
+
def search_db(query: str, fields: List[str], where_clause: str = "", page: int = 1, results_per_page: int = 5) -> List[Dict[str, Any]]:
|
652 |
+
"""
|
653 |
+
Perform a full-text search on specified fields with optional filtering and pagination.
|
654 |
+
|
655 |
+
Args:
|
656 |
+
query (str): The search query.
|
657 |
+
fields (List[str]): List of fields to search in.
|
658 |
+
where_clause (str, optional): Additional SQL WHERE clause to filter results.
|
659 |
+
page (int, optional): Page number for pagination.
|
660 |
+
results_per_page (int, optional): Number of results per page.
|
661 |
+
|
662 |
+
Returns:
|
663 |
+
List[Dict[str, Any]]: List of matching chat records with content and metadata.
|
664 |
+
"""
|
665 |
+
if not query.strip():
|
666 |
+
return []
|
667 |
+
|
668 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
669 |
+
cursor = conn.cursor()
|
670 |
+
try:
|
671 |
+
# Construct the MATCH query for FTS5
|
672 |
+
match_query = " AND ".join(fields) + f" MATCH ?"
|
673 |
+
# Adjust the query with the fields
|
674 |
+
fts_query = f"""
|
675 |
+
SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
|
676 |
+
FROM CharacterChats_fts
|
677 |
+
JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
|
678 |
+
WHERE {match_query}
|
679 |
+
"""
|
680 |
+
if where_clause:
|
681 |
+
fts_query += f" AND ({where_clause})"
|
682 |
+
fts_query += " ORDER BY rank LIMIT ? OFFSET ?"
|
683 |
+
offset = (page - 1) * results_per_page
|
684 |
+
cursor.execute(fts_query, (query, results_per_page, offset))
|
685 |
+
rows = cursor.fetchall()
|
686 |
+
columns = [description[0] for description in cursor.description]
|
687 |
+
results = [dict(zip(columns, row)) for row in rows]
|
688 |
+
return results
|
689 |
+
except Exception as e:
|
690 |
+
logging.error(f"Error in search_db: {e}")
|
691 |
+
return []
|
692 |
+
finally:
|
693 |
+
conn.close()
|
694 |
+
|
695 |
+
|
696 |
+
def perform_full_text_search_chat(query: str, relevant_chat_ids: List[int], page: int = 1, results_per_page: int = 5) -> \
|
697 |
+
List[Dict[str, Any]]:
|
698 |
+
"""
|
699 |
+
Perform a full-text search within the specified chat IDs using FTS5.
|
700 |
+
|
701 |
+
Args:
|
702 |
+
query (str): The user's query.
|
703 |
+
relevant_chat_ids (List[int]): List of chat IDs to search within.
|
704 |
+
page (int): Pagination page number.
|
705 |
+
results_per_page (int): Number of results per page.
|
706 |
+
|
707 |
+
Returns:
|
708 |
+
List[Dict[str, Any]]: List of search results with content and metadata.
|
709 |
+
"""
|
710 |
+
try:
|
711 |
+
# Construct a WHERE clause to limit the search to relevant chat IDs
|
712 |
+
where_clause = " OR ".join([f"media_id = {chat_id}" for chat_id in relevant_chat_ids])
|
713 |
+
if not where_clause:
|
714 |
+
where_clause = "1" # No restriction if no chat IDs
|
715 |
+
|
716 |
+
# Perform full-text search using FTS5
|
717 |
+
fts_results = search_db(query, ["content"], where_clause, page=page, results_per_page=results_per_page)
|
718 |
+
|
719 |
+
filtered_fts_results = [
|
720 |
+
{
|
721 |
+
"content": result['content'],
|
722 |
+
"metadata": {"media_id": result['id']}
|
723 |
+
}
|
724 |
+
for result in fts_results
|
725 |
+
if result['id'] in relevant_chat_ids
|
726 |
+
]
|
727 |
+
return filtered_fts_results
|
728 |
+
except Exception as e:
|
729 |
+
logging.error(f"Error in perform_full_text_search_chat: {str(e)}")
|
730 |
+
return []
|
731 |
+
|
732 |
+
|
733 |
+
def fetch_all_chats() -> List[Dict[str, Any]]:
|
734 |
+
"""
|
735 |
+
Fetch all chat messages from the database.
|
736 |
+
|
737 |
+
Returns:
|
738 |
+
List[Dict[str, Any]]: List of chat messages with relevant metadata.
|
739 |
+
"""
|
740 |
+
try:
|
741 |
+
chats = get_character_chats() # Modify this function to retrieve all chats
|
742 |
+
return chats
|
743 |
+
except Exception as e:
|
744 |
+
logging.error(f"Error fetching all chats: {str(e)}")
|
745 |
+
return []
|
746 |
+
|
747 |
+
|
748 |
+
def search_character_chat(query: str, fts_top_k: int = 10, relevant_media_ids: List[str] = None) -> List[Dict[str, Any]]:
|
749 |
+
"""
|
750 |
+
Perform a full-text search on the Character Chat database.
|
751 |
+
|
752 |
+
Args:
|
753 |
+
query: Search query string.
|
754 |
+
fts_top_k: Maximum number of results to return.
|
755 |
+
relevant_media_ids: Optional list of character IDs to filter results.
|
756 |
+
|
757 |
+
Returns:
|
758 |
+
List of search results with content and metadata.
|
759 |
+
"""
|
760 |
+
if not query.strip():
|
761 |
+
return []
|
762 |
+
|
763 |
+
try:
|
764 |
+
# Construct a WHERE clause to limit the search to relevant character IDs
|
765 |
+
where_clause = ""
|
766 |
+
if relevant_media_ids:
|
767 |
+
placeholders = ','.join(['?'] * len(relevant_media_ids))
|
768 |
+
where_clause = f"CharacterChats.character_id IN ({placeholders})"
|
769 |
+
|
770 |
+
# Perform full-text search using existing search_db function
|
771 |
+
results = search_db(query, ["conversation_name", "chat_history"], where_clause, results_per_page=fts_top_k)
|
772 |
+
|
773 |
+
# Format results
|
774 |
+
formatted_results = []
|
775 |
+
for r in results:
|
776 |
+
formatted_results.append({
|
777 |
+
"content": r['chat_history'],
|
778 |
+
"metadata": {
|
779 |
+
"chat_id": r['id'],
|
780 |
+
"conversation_name": r['conversation_name'],
|
781 |
+
"character_id": r['character_id']
|
782 |
+
}
|
783 |
+
})
|
784 |
+
|
785 |
+
return formatted_results
|
786 |
+
|
787 |
+
except Exception as e:
|
788 |
+
logging.error(f"Error in search_character_chat: {e}")
|
789 |
+
return []
|
790 |
+
|
791 |
+
|
792 |
+
def search_character_cards(query: str, fts_top_k: int = 10, relevant_media_ids: List[str] = None) -> List[Dict[str, Any]]:
|
793 |
+
"""
|
794 |
+
Perform a full-text search on the Character Cards database.
|
795 |
+
|
796 |
+
Args:
|
797 |
+
query: Search query string.
|
798 |
+
fts_top_k: Maximum number of results to return.
|
799 |
+
relevant_media_ids: Optional list of character IDs to filter results.
|
800 |
+
|
801 |
+
Returns:
|
802 |
+
List of search results with content and metadata.
|
803 |
+
"""
|
804 |
+
if not query.strip():
|
805 |
+
return []
|
806 |
+
|
807 |
+
try:
|
808 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
809 |
+
cursor = conn.cursor()
|
810 |
+
|
811 |
+
# Construct the query
|
812 |
+
sql_query = """
|
813 |
+
SELECT CharacterCards.id, CharacterCards.name, CharacterCards.description, CharacterCards.personality, CharacterCards.scenario
|
814 |
+
FROM CharacterCards_fts
|
815 |
+
JOIN CharacterCards ON CharacterCards_fts.rowid = CharacterCards.id
|
816 |
+
WHERE CharacterCards_fts MATCH ?
|
817 |
+
"""
|
818 |
+
|
819 |
+
params = [query]
|
820 |
+
|
821 |
+
# Add filtering by character IDs if provided
|
822 |
+
if relevant_media_ids:
|
823 |
+
placeholders = ','.join(['?'] * len(relevant_media_ids))
|
824 |
+
sql_query += f" AND CharacterCards.id IN ({placeholders})"
|
825 |
+
params.extend(relevant_media_ids)
|
826 |
+
|
827 |
+
sql_query += " LIMIT ?"
|
828 |
+
params.append(fts_top_k)
|
829 |
+
|
830 |
+
cursor.execute(sql_query, params)
|
831 |
+
rows = cursor.fetchall()
|
832 |
+
columns = [description[0] for description in cursor.description]
|
833 |
+
|
834 |
+
results = [dict(zip(columns, row)) for row in rows]
|
835 |
+
|
836 |
+
# Format results
|
837 |
+
formatted_results = []
|
838 |
+
for r in results:
|
839 |
+
content = f"Name: {r['name']}\nDescription: {r['description']}\nPersonality: {r['personality']}\nScenario: {r['scenario']}"
|
840 |
+
formatted_results.append({
|
841 |
+
"content": content,
|
842 |
+
"metadata": {
|
843 |
+
"character_id": r['id'],
|
844 |
+
"name": r['name']
|
845 |
+
}
|
846 |
+
})
|
847 |
+
|
848 |
+
return formatted_results
|
849 |
+
|
850 |
+
except Exception as e:
|
851 |
+
logging.error(f"Error in search_character_cards: {e}")
|
852 |
+
return []
|
853 |
+
finally:
|
854 |
+
conn.close()
|
855 |
+
|
856 |
+
|
857 |
+
def fetch_character_ids_by_keywords(keywords: List[str]) -> List[int]:
|
858 |
+
"""
|
859 |
+
Fetch character IDs associated with any of the specified keywords.
|
860 |
+
|
861 |
+
Args:
|
862 |
+
keywords (List[str]): List of keywords to search for.
|
863 |
+
|
864 |
+
Returns:
|
865 |
+
List[int]: List of character IDs associated with the keywords.
|
866 |
+
"""
|
867 |
+
if not keywords:
|
868 |
+
return []
|
869 |
+
|
870 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
871 |
+
cursor = conn.cursor()
|
872 |
+
try:
|
873 |
+
# Assuming 'tags' column in CharacterCards table stores tags as JSON array
|
874 |
+
placeholders = ','.join(['?'] * len(keywords))
|
875 |
+
sql_query = f"""
|
876 |
+
SELECT DISTINCT id FROM CharacterCards
|
877 |
+
WHERE EXISTS (
|
878 |
+
SELECT 1 FROM json_each(tags)
|
879 |
+
WHERE json_each.value IN ({placeholders})
|
880 |
+
)
|
881 |
+
"""
|
882 |
+
cursor.execute(sql_query, keywords)
|
883 |
+
rows = cursor.fetchall()
|
884 |
+
character_ids = [row[0] for row in rows]
|
885 |
+
return character_ids
|
886 |
+
except Exception as e:
|
887 |
+
logging.error(f"Error in fetch_character_ids_by_keywords: {e}")
|
888 |
+
return []
|
889 |
+
finally:
|
890 |
+
conn.close()
|
891 |
+
|
892 |
+
|
893 |
+
###################################################################
|
894 |
+
#
|
895 |
+
# Character Keywords
|
896 |
+
|
897 |
+
def view_char_keywords():
|
898 |
+
try:
|
899 |
+
with sqlite3.connect(chat_DB_PATH) as conn:
|
900 |
+
cursor = conn.cursor()
|
901 |
+
cursor.execute("""
|
902 |
+
SELECT DISTINCT keyword
|
903 |
+
FROM CharacterCards
|
904 |
+
CROSS JOIN json_each(tags)
|
905 |
+
WHERE json_valid(tags)
|
906 |
+
ORDER BY keyword
|
907 |
+
""")
|
908 |
+
keywords = cursor.fetchall()
|
909 |
+
if keywords:
|
910 |
+
keyword_list = [k[0] for k in keywords]
|
911 |
+
return "### Current Character Keywords:\n" + "\n".join(
|
912 |
+
[f"- {k}" for k in keyword_list])
|
913 |
+
return "No keywords found."
|
914 |
+
except Exception as e:
|
915 |
+
return f"Error retrieving keywords: {str(e)}"
|
916 |
+
|
917 |
+
|
918 |
+
def add_char_keywords(name: str, keywords: str):
|
919 |
+
try:
|
920 |
+
keywords_list = [k.strip() for k in keywords.split(",") if k.strip()]
|
921 |
+
with sqlite3.connect('character_chat.db') as conn:
|
922 |
+
cursor = conn.cursor()
|
923 |
+
cursor.execute(
|
924 |
+
"SELECT tags FROM CharacterCards WHERE name = ?",
|
925 |
+
(name,)
|
926 |
+
)
|
927 |
+
result = cursor.fetchone()
|
928 |
+
if not result:
|
929 |
+
return "Character not found."
|
930 |
+
|
931 |
+
current_tags = result[0] if result[0] else "[]"
|
932 |
+
current_keywords = set(current_tags[1:-1].split(',')) if current_tags != "[]" else set()
|
933 |
+
updated_keywords = current_keywords.union(set(keywords_list))
|
934 |
+
|
935 |
+
cursor.execute(
|
936 |
+
"UPDATE CharacterCards SET tags = ? WHERE name = ?",
|
937 |
+
(str(list(updated_keywords)), name)
|
938 |
+
)
|
939 |
+
conn.commit()
|
940 |
+
return f"Successfully added keywords to character {name}"
|
941 |
+
except Exception as e:
|
942 |
+
return f"Error adding keywords: {str(e)}"
|
943 |
+
|
944 |
+
|
945 |
+
def delete_char_keyword(char_name: str, keyword: str) -> str:
|
946 |
+
"""
|
947 |
+
Delete a keyword from a character's tags.
|
948 |
+
|
949 |
+
Args:
|
950 |
+
char_name (str): The name of the character
|
951 |
+
keyword (str): The keyword to delete
|
952 |
+
|
953 |
+
Returns:
|
954 |
+
str: Success/failure message
|
955 |
+
"""
|
956 |
+
try:
|
957 |
+
with sqlite3.connect(chat_DB_PATH) as conn:
|
958 |
+
cursor = conn.cursor()
|
959 |
+
|
960 |
+
# First, check if the character exists
|
961 |
+
cursor.execute("SELECT tags FROM CharacterCards WHERE name = ?", (char_name,))
|
962 |
+
result = cursor.fetchone()
|
963 |
+
|
964 |
+
if not result:
|
965 |
+
return f"Character '{char_name}' not found."
|
966 |
+
|
967 |
+
# Parse existing tags
|
968 |
+
current_tags = json.loads(result[0]) if result[0] else []
|
969 |
+
|
970 |
+
if keyword not in current_tags:
|
971 |
+
return f"Keyword '{keyword}' not found in character '{char_name}' tags."
|
972 |
+
|
973 |
+
# Remove the keyword
|
974 |
+
updated_tags = [tag for tag in current_tags if tag != keyword]
|
975 |
+
|
976 |
+
# Update the character's tags
|
977 |
+
cursor.execute(
|
978 |
+
"UPDATE CharacterCards SET tags = ? WHERE name = ?",
|
979 |
+
(json.dumps(updated_tags), char_name)
|
980 |
+
)
|
981 |
+
conn.commit()
|
982 |
+
|
983 |
+
logging.info(f"Keyword '{keyword}' deleted from character '{char_name}'")
|
984 |
+
return f"Successfully deleted keyword '{keyword}' from character '{char_name}'."
|
985 |
+
|
986 |
+
except Exception as e:
|
987 |
+
error_msg = f"Error deleting keyword: {str(e)}"
|
988 |
+
logging.error(error_msg)
|
989 |
+
return error_msg
|
990 |
+
|
991 |
+
|
992 |
+
def export_char_keywords_to_csv() -> Tuple[str, str]:
|
993 |
+
"""
|
994 |
+
Export all character keywords to a CSV file with associated metadata.
|
995 |
+
|
996 |
+
Returns:
|
997 |
+
Tuple[str, str]: (status_message, file_path)
|
998 |
+
"""
|
999 |
+
import csv
|
1000 |
+
from tempfile import NamedTemporaryFile
|
1001 |
+
from datetime import datetime
|
1002 |
+
|
1003 |
+
try:
|
1004 |
+
# Create a temporary CSV file
|
1005 |
+
temp_file = NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', newline='')
|
1006 |
+
|
1007 |
+
with sqlite3.connect(chat_DB_PATH) as conn:
|
1008 |
+
cursor = conn.cursor()
|
1009 |
+
|
1010 |
+
# Get all characters and their tags
|
1011 |
+
cursor.execute("""
|
1012 |
+
SELECT
|
1013 |
+
name,
|
1014 |
+
tags,
|
1015 |
+
(SELECT COUNT(*) FROM CharacterChats WHERE CharacterChats.character_id = CharacterCards.id) as chat_count
|
1016 |
+
FROM CharacterCards
|
1017 |
+
WHERE json_valid(tags)
|
1018 |
+
ORDER BY name
|
1019 |
+
""")
|
1020 |
+
|
1021 |
+
results = cursor.fetchall()
|
1022 |
+
|
1023 |
+
# Process the results to create rows for the CSV
|
1024 |
+
csv_rows = []
|
1025 |
+
for name, tags_json, chat_count in results:
|
1026 |
+
tags = json.loads(tags_json) if tags_json else []
|
1027 |
+
for tag in tags:
|
1028 |
+
csv_rows.append([
|
1029 |
+
tag, # keyword
|
1030 |
+
name, # character name
|
1031 |
+
chat_count # number of chats
|
1032 |
+
])
|
1033 |
+
|
1034 |
+
# Write to CSV
|
1035 |
+
writer = csv.writer(temp_file)
|
1036 |
+
writer.writerow(['Keyword', 'Character Name', 'Number of Chats'])
|
1037 |
+
writer.writerows(csv_rows)
|
1038 |
+
|
1039 |
+
temp_file.close()
|
1040 |
+
|
1041 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
1042 |
+
status_msg = f"Successfully exported {len(csv_rows)} character keyword entries to CSV."
|
1043 |
+
logging.info(status_msg)
|
1044 |
+
|
1045 |
+
return status_msg, temp_file.name
|
1046 |
+
|
1047 |
+
except Exception as e:
|
1048 |
+
error_msg = f"Error exporting keywords: {str(e)}"
|
1049 |
+
logging.error(error_msg)
|
1050 |
+
return error_msg, ""
|
1051 |
+
|
1052 |
+
#
|
1053 |
+
# End of Character chat keyword functions
|
1054 |
+
######################################################
|
1055 |
+
|
1056 |
+
|
1057 |
+
#
|
1058 |
+
# End of Character_Chat_DB.py
|
1059 |
+
#######################################################################################################################
|
App_Function_Libraries/DB/DB_Backups.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Backup_Manager.py
|
2 |
+
#
|
3 |
+
# Imports:
|
4 |
+
import os
|
5 |
+
import shutil
|
6 |
+
import sqlite3
|
7 |
+
from datetime import datetime
|
8 |
+
import logging
|
9 |
+
#
|
10 |
+
# Local Imports:
|
11 |
+
from App_Function_Libraries.DB.Character_Chat_DB import chat_DB_PATH
|
12 |
+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_rag_qa_db_path
|
13 |
+
from App_Function_Libraries.Utils.Utils import get_project_relative_path
|
14 |
+
#
|
15 |
+
# End of Imports
|
16 |
+
#######################################################################################################################
|
17 |
+
#
|
18 |
+
# Functions:
|
19 |
+
|
20 |
+
def init_backup_directory(backup_base_dir: str, db_name: str) -> str:
|
21 |
+
"""Initialize backup directory for a specific database."""
|
22 |
+
backup_dir = os.path.join(backup_base_dir, db_name)
|
23 |
+
os.makedirs(backup_dir, exist_ok=True)
|
24 |
+
return backup_dir
|
25 |
+
|
26 |
+
|
27 |
+
def create_backup(db_path: str, backup_dir: str, db_name: str) -> str:
|
28 |
+
"""Create a full backup of the database."""
|
29 |
+
try:
|
30 |
+
db_path = os.path.abspath(db_path)
|
31 |
+
backup_dir = os.path.abspath(backup_dir)
|
32 |
+
|
33 |
+
logging.info(f"Creating backup:")
|
34 |
+
logging.info(f" DB Path: {db_path}")
|
35 |
+
logging.info(f" Backup Dir: {backup_dir}")
|
36 |
+
logging.info(f" DB Name: {db_name}")
|
37 |
+
|
38 |
+
# Create subdirectory based on db_name
|
39 |
+
specific_backup_dir = os.path.join(backup_dir, db_name)
|
40 |
+
os.makedirs(specific_backup_dir, exist_ok=True)
|
41 |
+
|
42 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
43 |
+
backup_file = os.path.join(specific_backup_dir, f"{db_name}_backup_{timestamp}.db")
|
44 |
+
logging.info(f" Full backup path: {backup_file}")
|
45 |
+
|
46 |
+
# Create a backup using SQLite's backup API
|
47 |
+
with sqlite3.connect(db_path) as source, \
|
48 |
+
sqlite3.connect(backup_file) as target:
|
49 |
+
source.backup(target)
|
50 |
+
|
51 |
+
logging.info(f"Backup created successfully: {backup_file}")
|
52 |
+
return f"Backup created: {backup_file}"
|
53 |
+
except Exception as e:
|
54 |
+
error_msg = f"Failed to create backup: {str(e)}"
|
55 |
+
logging.error(error_msg)
|
56 |
+
return error_msg
|
57 |
+
|
58 |
+
|
59 |
+
def create_incremental_backup(db_path: str, backup_dir: str, db_name: str) -> str:
|
60 |
+
"""Create an incremental backup using VACUUM INTO."""
|
61 |
+
try:
|
62 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
63 |
+
backup_file = os.path.join(backup_dir,
|
64 |
+
f"{db_name}_incremental_{timestamp}.sqlib")
|
65 |
+
|
66 |
+
with sqlite3.connect(db_path) as conn:
|
67 |
+
conn.execute(f"VACUUM INTO '{backup_file}'")
|
68 |
+
|
69 |
+
logging.info(f"Incremental backup created: {backup_file}")
|
70 |
+
return f"Incremental backup created: {backup_file}"
|
71 |
+
except Exception as e:
|
72 |
+
error_msg = f"Failed to create incremental backup: {str(e)}"
|
73 |
+
logging.error(error_msg)
|
74 |
+
return error_msg
|
75 |
+
|
76 |
+
|
77 |
+
def list_backups(backup_dir: str) -> str:
|
78 |
+
"""List all available backups."""
|
79 |
+
try:
|
80 |
+
backups = [f for f in os.listdir(backup_dir)
|
81 |
+
if f.endswith(('.db', '.sqlib'))]
|
82 |
+
backups.sort(reverse=True) # Most recent first
|
83 |
+
return "\n".join(backups) if backups else "No backups found"
|
84 |
+
except Exception as e:
|
85 |
+
error_msg = f"Failed to list backups: {str(e)}"
|
86 |
+
logging.error(error_msg)
|
87 |
+
return error_msg
|
88 |
+
|
89 |
+
|
90 |
+
def restore_single_db_backup(db_path: str, backup_dir: str, db_name: str, backup_name: str) -> str:
|
91 |
+
"""Restore database from a backup file."""
|
92 |
+
try:
|
93 |
+
logging.info(f"Restoring backup: {backup_name}")
|
94 |
+
backup_path = os.path.join(backup_dir, backup_name)
|
95 |
+
if not os.path.exists(backup_path):
|
96 |
+
logging.error(f"Backup file not found: {backup_name}")
|
97 |
+
return f"Backup file not found: {backup_name}"
|
98 |
+
|
99 |
+
# Create a timestamp for the current db
|
100 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
101 |
+
current_backup = os.path.join(backup_dir,
|
102 |
+
f"{db_name}_pre_restore_{timestamp}.db")
|
103 |
+
|
104 |
+
# Backup current database before restore
|
105 |
+
logging.info(f"Creating backup of current database: {current_backup}")
|
106 |
+
shutil.copy2(db_path, current_backup)
|
107 |
+
|
108 |
+
# Restore the backup
|
109 |
+
logging.info(f"Restoring database from {backup_name}")
|
110 |
+
shutil.copy2(backup_path, db_path)
|
111 |
+
|
112 |
+
logging.info(f"Database restored from {backup_name}")
|
113 |
+
return f"Database restored from {backup_name}"
|
114 |
+
except Exception as e:
|
115 |
+
error_msg = f"Failed to restore backup: {str(e)}"
|
116 |
+
logging.error(error_msg)
|
117 |
+
return error_msg
|
118 |
+
|
119 |
+
|
120 |
+
def setup_backup_config():
|
121 |
+
"""Setup configuration for database backups."""
|
122 |
+
backup_base_dir = get_project_relative_path('tldw_DB_Backups')
|
123 |
+
logging.info(f"Base backup directory: {os.path.abspath(backup_base_dir)}")
|
124 |
+
|
125 |
+
# RAG Chat DB configuration
|
126 |
+
rag_db_path = get_rag_qa_db_path()
|
127 |
+
rag_backup_dir = os.path.join(backup_base_dir, 'rag_chat')
|
128 |
+
os.makedirs(rag_backup_dir, exist_ok=True)
|
129 |
+
logging.info(f"RAG backup directory: {os.path.abspath(rag_backup_dir)}")
|
130 |
+
|
131 |
+
rag_db_config = {
|
132 |
+
'db_path': rag_db_path,
|
133 |
+
'backup_dir': rag_backup_dir, # Make sure we use the full path
|
134 |
+
'db_name': 'rag_qa'
|
135 |
+
}
|
136 |
+
|
137 |
+
# Character Chat DB configuration
|
138 |
+
char_backup_dir = os.path.join(backup_base_dir, 'character_chat')
|
139 |
+
os.makedirs(char_backup_dir, exist_ok=True)
|
140 |
+
logging.info(f"Character backup directory: {os.path.abspath(char_backup_dir)}")
|
141 |
+
|
142 |
+
char_db_config = {
|
143 |
+
'db_path': chat_DB_PATH,
|
144 |
+
'backup_dir': char_backup_dir, # Make sure we use the full path
|
145 |
+
'db_name': 'chatDB'
|
146 |
+
}
|
147 |
+
|
148 |
+
# Media DB configuration (based on your logs)
|
149 |
+
media_backup_dir = os.path.join(backup_base_dir, 'media')
|
150 |
+
os.makedirs(media_backup_dir, exist_ok=True)
|
151 |
+
logging.info(f"Media backup directory: {os.path.abspath(media_backup_dir)}")
|
152 |
+
|
153 |
+
media_db_config = {
|
154 |
+
'db_path': os.path.join(os.path.dirname(chat_DB_PATH), 'media_summary.db'),
|
155 |
+
'backup_dir': media_backup_dir,
|
156 |
+
'db_name': 'media'
|
157 |
+
}
|
158 |
+
|
159 |
+
return rag_db_config, char_db_config, media_db_config
|
160 |
+
|
App_Function_Libraries/DB/DB_Manager.py
CHANGED
@@ -13,11 +13,14 @@ from elasticsearch import Elasticsearch
|
|
13 |
#
|
14 |
# Import your existing SQLite functions
|
15 |
from App_Function_Libraries.DB.SQLite_DB import DatabaseError
|
|
|
|
|
|
|
|
|
|
|
16 |
from App_Function_Libraries.DB.SQLite_DB import (
|
17 |
update_media_content as sqlite_update_media_content,
|
18 |
-
list_prompts as sqlite_list_prompts,
|
19 |
search_and_display as sqlite_search_and_display,
|
20 |
-
fetch_prompt_details as sqlite_fetch_prompt_details,
|
21 |
keywords_browser_interface as sqlite_keywords_browser_interface,
|
22 |
add_keyword as sqlite_add_keyword,
|
23 |
delete_keyword as sqlite_delete_keyword,
|
@@ -25,31 +28,17 @@ from App_Function_Libraries.DB.SQLite_DB import (
|
|
25 |
ingest_article_to_db as sqlite_ingest_article_to_db,
|
26 |
add_media_to_database as sqlite_add_media_to_database,
|
27 |
import_obsidian_note_to_db as sqlite_import_obsidian_note_to_db,
|
28 |
-
add_prompt as sqlite_add_prompt,
|
29 |
-
delete_chat_message as sqlite_delete_chat_message,
|
30 |
-
update_chat_message as sqlite_update_chat_message,
|
31 |
-
add_chat_message as sqlite_add_chat_message,
|
32 |
-
get_chat_messages as sqlite_get_chat_messages,
|
33 |
-
search_chat_conversations as sqlite_search_chat_conversations,
|
34 |
-
create_chat_conversation as sqlite_create_chat_conversation,
|
35 |
-
save_chat_history_to_database as sqlite_save_chat_history_to_database,
|
36 |
view_database as sqlite_view_database,
|
37 |
get_transcripts as sqlite_get_transcripts,
|
38 |
get_trashed_items as sqlite_get_trashed_items,
|
39 |
user_delete_item as sqlite_user_delete_item,
|
40 |
empty_trash as sqlite_empty_trash,
|
41 |
create_automated_backup as sqlite_create_automated_backup,
|
42 |
-
add_or_update_prompt as sqlite_add_or_update_prompt,
|
43 |
-
load_prompt_details as sqlite_load_prompt_details,
|
44 |
-
load_preset_prompts as sqlite_load_preset_prompts,
|
45 |
-
insert_prompt_to_db as sqlite_insert_prompt_to_db,
|
46 |
-
delete_prompt as sqlite_delete_prompt,
|
47 |
search_and_display_items as sqlite_search_and_display_items,
|
48 |
-
get_conversation_name as sqlite_get_conversation_name,
|
49 |
add_media_with_keywords as sqlite_add_media_with_keywords,
|
50 |
check_media_and_whisper_model as sqlite_check_media_and_whisper_model, \
|
51 |
create_document_version as sqlite_create_document_version,
|
52 |
-
get_document_version as sqlite_get_document_version,
|
53 |
sqlite_update_fts_for_media, get_unprocessed_media as sqlite_get_unprocessed_media, fetch_item_details as sqlite_fetch_item_details, \
|
54 |
search_media_database as sqlite_search_media_database, mark_as_trash as sqlite_mark_as_trash, \
|
55 |
get_media_transcripts as sqlite_get_media_transcripts, get_specific_transcript as sqlite_get_specific_transcript, \
|
@@ -60,23 +49,35 @@ from App_Function_Libraries.DB.SQLite_DB import (
|
|
60 |
delete_specific_prompt as sqlite_delete_specific_prompt,
|
61 |
fetch_keywords_for_media as sqlite_fetch_keywords_for_media, \
|
62 |
update_keywords_for_media as sqlite_update_keywords_for_media, check_media_exists as sqlite_check_media_exists, \
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
get_workflow_chat as sqlite_get_workflow_chat, update_media_content_with_version as sqlite_update_media_content_with_version, \
|
69 |
check_existing_media as sqlite_check_existing_media, get_all_document_versions as sqlite_get_all_document_versions, \
|
70 |
fetch_paginated_data as sqlite_fetch_paginated_data, get_latest_transcription as sqlite_get_latest_transcription, \
|
71 |
mark_media_as_processed as sqlite_mark_media_as_processed,
|
72 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
from App_Function_Libraries.DB.Character_Chat_DB import (
|
74 |
add_character_card as sqlite_add_character_card, get_character_cards as sqlite_get_character_cards, \
|
75 |
get_character_card_by_id as sqlite_get_character_card_by_id, update_character_card as sqlite_update_character_card, \
|
76 |
delete_character_card as sqlite_delete_character_card, add_character_chat as sqlite_add_character_chat, \
|
77 |
get_character_chats as sqlite_get_character_chats, get_character_chat_by_id as sqlite_get_character_chat_by_id, \
|
78 |
-
update_character_chat as sqlite_update_character_chat, delete_character_chat as sqlite_delete_character_chat
|
79 |
-
migrate_chat_to_media_db as sqlite_migrate_chat_to_media_db,
|
80 |
)
|
81 |
#
|
82 |
# Local Imports
|
@@ -214,9 +215,9 @@ print(f"Database path: {db.db_path}")
|
|
214 |
#
|
215 |
# DB Search functions
|
216 |
|
217 |
-
def
|
218 |
if db_type == 'sqlite':
|
219 |
-
return
|
220 |
elif db_type == 'elasticsearch':
|
221 |
# Implement Elasticsearch version when available
|
222 |
raise NotImplementedError("Elasticsearch version of search_db not yet implemented")
|
@@ -500,13 +501,6 @@ def load_prompt_details(*args, **kwargs):
|
|
500 |
# Implement Elasticsearch version
|
501 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
502 |
|
503 |
-
def load_preset_prompts(*args, **kwargs):
|
504 |
-
if db_type == 'sqlite':
|
505 |
-
return sqlite_load_preset_prompts()
|
506 |
-
elif db_type == 'elasticsearch':
|
507 |
-
# Implement Elasticsearch version
|
508 |
-
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
509 |
-
|
510 |
def insert_prompt_to_db(*args, **kwargs):
|
511 |
if db_type == 'sqlite':
|
512 |
return sqlite_insert_prompt_to_db(*args, **kwargs)
|
@@ -539,7 +533,6 @@ def mark_as_trash(media_id: int) -> None:
|
|
539 |
else:
|
540 |
raise ValueError(f"Unsupported database type: {db_type}")
|
541 |
|
542 |
-
|
543 |
def get_latest_transcription(*args, **kwargs):
|
544 |
if db_type == 'sqlite':
|
545 |
return sqlite_get_latest_transcription(*args, **kwargs)
|
@@ -721,62 +714,132 @@ def fetch_keywords_for_media(*args, **kwargs):
|
|
721 |
#
|
722 |
# Chat-related Functions
|
723 |
|
724 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
725 |
if db_type == 'sqlite':
|
726 |
-
return
|
727 |
elif db_type == 'elasticsearch':
|
728 |
# Implement Elasticsearch version
|
729 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
730 |
|
731 |
-
def
|
732 |
if db_type == 'sqlite':
|
733 |
-
return
|
734 |
elif db_type == 'elasticsearch':
|
735 |
# Implement Elasticsearch version
|
736 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
737 |
|
738 |
-
def
|
739 |
if db_type == 'sqlite':
|
740 |
-
return
|
741 |
elif db_type == 'elasticsearch':
|
742 |
# Implement Elasticsearch version
|
743 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
744 |
|
745 |
-
def
|
746 |
if db_type == 'sqlite':
|
747 |
-
return
|
748 |
elif db_type == 'elasticsearch':
|
749 |
# Implement Elasticsearch version
|
750 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
751 |
|
752 |
-
def
|
753 |
if db_type == 'sqlite':
|
754 |
-
return
|
755 |
elif db_type == 'elasticsearch':
|
756 |
# Implement Elasticsearch version
|
757 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
758 |
|
759 |
-
def
|
760 |
if db_type == 'sqlite':
|
761 |
-
return
|
762 |
elif db_type == 'elasticsearch':
|
763 |
# Implement Elasticsearch version
|
764 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
765 |
|
766 |
-
def
|
767 |
if db_type == 'sqlite':
|
768 |
-
return
|
769 |
elif db_type == 'elasticsearch':
|
770 |
# Implement Elasticsearch version
|
771 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
772 |
|
773 |
-
def
|
774 |
if db_type == 'sqlite':
|
775 |
-
return
|
776 |
elif db_type == 'elasticsearch':
|
777 |
# Implement Elasticsearch version
|
778 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
779 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
780 |
#
|
781 |
# End of Chat-related Functions
|
782 |
############################################################################################################
|
@@ -856,12 +919,54 @@ def delete_character_chat(*args, **kwargs):
|
|
856 |
# Implement Elasticsearch version
|
857 |
raise NotImplementedError("Elasticsearch version of delete_character_chat not yet implemented")
|
858 |
|
859 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
860 |
if db_type == 'sqlite':
|
861 |
-
return
|
862 |
elif db_type == 'elasticsearch':
|
863 |
# Implement Elasticsearch version
|
864 |
-
raise NotImplementedError("Elasticsearch version of
|
865 |
|
866 |
#
|
867 |
# End of Character Chat-related Functions
|
|
|
13 |
#
|
14 |
# Import your existing SQLite functions
|
15 |
from App_Function_Libraries.DB.SQLite_DB import DatabaseError
|
16 |
+
from App_Function_Libraries.DB.Prompts_DB import list_prompts as sqlite_list_prompts, \
|
17 |
+
fetch_prompt_details as sqlite_fetch_prompt_details, add_prompt as sqlite_add_prompt, \
|
18 |
+
search_prompts as sqlite_search_prompts, add_or_update_prompt as sqlite_add_or_update_prompt, \
|
19 |
+
load_prompt_details as sqlite_load_prompt_details, insert_prompt_to_db as sqlite_insert_prompt_to_db, \
|
20 |
+
delete_prompt as sqlite_delete_prompt
|
21 |
from App_Function_Libraries.DB.SQLite_DB import (
|
22 |
update_media_content as sqlite_update_media_content,
|
|
|
23 |
search_and_display as sqlite_search_and_display,
|
|
|
24 |
keywords_browser_interface as sqlite_keywords_browser_interface,
|
25 |
add_keyword as sqlite_add_keyword,
|
26 |
delete_keyword as sqlite_delete_keyword,
|
|
|
28 |
ingest_article_to_db as sqlite_ingest_article_to_db,
|
29 |
add_media_to_database as sqlite_add_media_to_database,
|
30 |
import_obsidian_note_to_db as sqlite_import_obsidian_note_to_db,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
view_database as sqlite_view_database,
|
32 |
get_transcripts as sqlite_get_transcripts,
|
33 |
get_trashed_items as sqlite_get_trashed_items,
|
34 |
user_delete_item as sqlite_user_delete_item,
|
35 |
empty_trash as sqlite_empty_trash,
|
36 |
create_automated_backup as sqlite_create_automated_backup,
|
|
|
|
|
|
|
|
|
|
|
37 |
search_and_display_items as sqlite_search_and_display_items,
|
|
|
38 |
add_media_with_keywords as sqlite_add_media_with_keywords,
|
39 |
check_media_and_whisper_model as sqlite_check_media_and_whisper_model, \
|
40 |
create_document_version as sqlite_create_document_version,
|
41 |
+
get_document_version as sqlite_get_document_version, search_media_db as sqlite_search_media_db, add_media_chunk as sqlite_add_media_chunk,
|
42 |
sqlite_update_fts_for_media, get_unprocessed_media as sqlite_get_unprocessed_media, fetch_item_details as sqlite_fetch_item_details, \
|
43 |
search_media_database as sqlite_search_media_database, mark_as_trash as sqlite_mark_as_trash, \
|
44 |
get_media_transcripts as sqlite_get_media_transcripts, get_specific_transcript as sqlite_get_specific_transcript, \
|
|
|
49 |
delete_specific_prompt as sqlite_delete_specific_prompt,
|
50 |
fetch_keywords_for_media as sqlite_fetch_keywords_for_media, \
|
51 |
update_keywords_for_media as sqlite_update_keywords_for_media, check_media_exists as sqlite_check_media_exists, \
|
52 |
+
get_media_content as sqlite_get_media_content, get_paginated_files as sqlite_get_paginated_files, \
|
53 |
+
get_media_title as sqlite_get_media_title, get_all_content_from_database as sqlite_get_all_content_from_database, \
|
54 |
+
get_next_media_id as sqlite_get_next_media_id, batch_insert_chunks as sqlite_batch_insert_chunks, Database, \
|
55 |
+
save_workflow_chat_to_db as sqlite_save_workflow_chat_to_db, get_workflow_chat as sqlite_get_workflow_chat, \
|
56 |
+
update_media_content_with_version as sqlite_update_media_content_with_version, \
|
|
|
57 |
check_existing_media as sqlite_check_existing_media, get_all_document_versions as sqlite_get_all_document_versions, \
|
58 |
fetch_paginated_data as sqlite_fetch_paginated_data, get_latest_transcription as sqlite_get_latest_transcription, \
|
59 |
mark_media_as_processed as sqlite_mark_media_as_processed,
|
60 |
)
|
61 |
+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import start_new_conversation as sqlite_start_new_conversation, \
|
62 |
+
save_message as sqlite_save_message, load_chat_history as sqlite_load_chat_history, \
|
63 |
+
get_all_conversations as sqlite_get_all_conversations, get_notes_by_keywords as sqlite_get_notes_by_keywords, \
|
64 |
+
get_note_by_id as sqlite_get_note_by_id, update_note as sqlite_update_note, save_notes as sqlite_save_notes, \
|
65 |
+
clear_keywords_from_note as sqlite_clear_keywords_from_note, add_keywords_to_note as sqlite_add_keywords_to_note, \
|
66 |
+
add_keywords_to_conversation as sqlite_add_keywords_to_conversation, \
|
67 |
+
get_keywords_for_note as sqlite_get_keywords_for_note, delete_note as sqlite_delete_note, \
|
68 |
+
search_conversations_by_keywords as sqlite_search_conversations_by_keywords, \
|
69 |
+
delete_conversation as sqlite_delete_conversation, get_conversation_title as sqlite_get_conversation_title, \
|
70 |
+
update_conversation_title as sqlite_update_conversation_title, \
|
71 |
+
fetch_all_conversations as sqlite_fetch_all_conversations, fetch_all_notes as sqlite_fetch_all_notes, \
|
72 |
+
fetch_conversations_by_ids as sqlite_fetch_conversations_by_ids, fetch_notes_by_ids as sqlite_fetch_notes_by_ids, \
|
73 |
+
delete_messages_in_conversation as sqlite_delete_messages_in_conversation, \
|
74 |
+
get_conversation_text as sqlite_get_conversation_text, search_notes_titles as sqlite_search_notes_titles
|
75 |
from App_Function_Libraries.DB.Character_Chat_DB import (
|
76 |
add_character_card as sqlite_add_character_card, get_character_cards as sqlite_get_character_cards, \
|
77 |
get_character_card_by_id as sqlite_get_character_card_by_id, update_character_card as sqlite_update_character_card, \
|
78 |
delete_character_card as sqlite_delete_character_card, add_character_chat as sqlite_add_character_chat, \
|
79 |
get_character_chats as sqlite_get_character_chats, get_character_chat_by_id as sqlite_get_character_chat_by_id, \
|
80 |
+
update_character_chat as sqlite_update_character_chat, delete_character_chat as sqlite_delete_character_chat
|
|
|
81 |
)
|
82 |
#
|
83 |
# Local Imports
|
|
|
215 |
#
|
216 |
# DB Search functions
|
217 |
|
218 |
+
def search_media_db(search_query: str, search_fields: List[str], keywords: str, page: int = 1, results_per_page: int = 10):
|
219 |
if db_type == 'sqlite':
|
220 |
+
return sqlite_search_media_db(search_query, search_fields, keywords, page, results_per_page)
|
221 |
elif db_type == 'elasticsearch':
|
222 |
# Implement Elasticsearch version when available
|
223 |
raise NotImplementedError("Elasticsearch version of search_db not yet implemented")
|
|
|
501 |
# Implement Elasticsearch version
|
502 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
503 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
def insert_prompt_to_db(*args, **kwargs):
|
505 |
if db_type == 'sqlite':
|
506 |
return sqlite_insert_prompt_to_db(*args, **kwargs)
|
|
|
533 |
else:
|
534 |
raise ValueError(f"Unsupported database type: {db_type}")
|
535 |
|
|
|
536 |
def get_latest_transcription(*args, **kwargs):
|
537 |
if db_type == 'sqlite':
|
538 |
return sqlite_get_latest_transcription(*args, **kwargs)
|
|
|
714 |
#
|
715 |
# Chat-related Functions
|
716 |
|
717 |
+
def search_notes_titles(*args, **kwargs):
|
718 |
+
if db_type == 'sqlite':
|
719 |
+
return sqlite_search_notes_titles(*args, **kwargs)
|
720 |
+
elif db_type == 'elasticsearch':
|
721 |
+
# Implement Elasticsearch version
|
722 |
+
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
723 |
+
|
724 |
+
def save_message(*args, **kwargs):
|
725 |
+
if db_type == 'sqlite':
|
726 |
+
return sqlite_save_message(*args, **kwargs)
|
727 |
+
elif db_type == 'elasticsearch':
|
728 |
+
# Implement Elasticsearch version
|
729 |
+
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
730 |
+
|
731 |
+
def load_chat_history(*args, **kwargs):
|
732 |
+
if db_type == 'sqlite':
|
733 |
+
return sqlite_load_chat_history(*args, **kwargs)
|
734 |
+
elif db_type == 'elasticsearch':
|
735 |
+
# Implement Elasticsearch version
|
736 |
+
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
737 |
+
|
738 |
+
def start_new_conversation(*args, **kwargs):
|
739 |
+
if db_type == 'sqlite':
|
740 |
+
return sqlite_start_new_conversation(*args, **kwargs)
|
741 |
+
elif db_type == 'elasticsearch':
|
742 |
+
# Implement Elasticsearch version
|
743 |
+
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
744 |
+
|
745 |
+
def get_all_conversations(*args, **kwargs):
|
746 |
+
if db_type == 'sqlite':
|
747 |
+
return sqlite_get_all_conversations(*args, **kwargs)
|
748 |
+
elif db_type == 'elasticsearch':
|
749 |
+
# Implement Elasticsearch version
|
750 |
+
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
751 |
+
|
752 |
+
def get_notes_by_keywords(*args, **kwargs):
|
753 |
+
if db_type == 'sqlite':
|
754 |
+
return sqlite_get_notes_by_keywords(*args, **kwargs)
|
755 |
+
elif db_type == 'elasticsearch':
|
756 |
+
# Implement Elasticsearch version
|
757 |
+
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
758 |
+
|
759 |
+
def get_note_by_id(*args, **kwargs):
|
760 |
+
if db_type == 'sqlite':
|
761 |
+
return sqlite_get_note_by_id(*args, **kwargs)
|
762 |
+
elif db_type == 'elasticsearch':
|
763 |
+
# Implement Elasticsearch version
|
764 |
+
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
765 |
+
|
766 |
+
def add_keywords_to_conversation(*args, **kwargs):
|
767 |
+
if db_type == 'sqlite':
|
768 |
+
return sqlite_add_keywords_to_conversation(*args, **kwargs)
|
769 |
+
elif db_type == 'elasticsearch':
|
770 |
+
# Implement Elasticsearch version
|
771 |
+
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
772 |
+
|
773 |
+
def get_keywords_for_note(*args, **kwargs):
|
774 |
if db_type == 'sqlite':
|
775 |
+
return sqlite_get_keywords_for_note(*args, **kwargs)
|
776 |
elif db_type == 'elasticsearch':
|
777 |
# Implement Elasticsearch version
|
778 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
779 |
|
780 |
+
def delete_note(*args, **kwargs):
|
781 |
if db_type == 'sqlite':
|
782 |
+
return sqlite_delete_note(*args, **kwargs)
|
783 |
elif db_type == 'elasticsearch':
|
784 |
# Implement Elasticsearch version
|
785 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
786 |
|
787 |
+
def search_conversations_by_keywords(*args, **kwargs):
|
788 |
if db_type == 'sqlite':
|
789 |
+
return sqlite_search_conversations_by_keywords(*args, **kwargs)
|
790 |
elif db_type == 'elasticsearch':
|
791 |
# Implement Elasticsearch version
|
792 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
793 |
|
794 |
+
def delete_conversation(*args, **kwargs):
|
795 |
if db_type == 'sqlite':
|
796 |
+
return sqlite_delete_conversation(*args, **kwargs)
|
797 |
elif db_type == 'elasticsearch':
|
798 |
# Implement Elasticsearch version
|
799 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
800 |
|
801 |
+
def get_conversation_title(*args, **kwargs):
|
802 |
if db_type == 'sqlite':
|
803 |
+
return sqlite_get_conversation_title(*args, **kwargs)
|
804 |
elif db_type == 'elasticsearch':
|
805 |
# Implement Elasticsearch version
|
806 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
807 |
|
808 |
+
def update_conversation_title(*args, **kwargs):
|
809 |
if db_type == 'sqlite':
|
810 |
+
return sqlite_update_conversation_title(*args, **kwargs)
|
811 |
elif db_type == 'elasticsearch':
|
812 |
# Implement Elasticsearch version
|
813 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
814 |
|
815 |
+
def fetch_all_conversations(*args, **kwargs):
|
816 |
if db_type == 'sqlite':
|
817 |
+
return sqlite_fetch_all_conversations()
|
818 |
elif db_type == 'elasticsearch':
|
819 |
# Implement Elasticsearch version
|
820 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
821 |
|
822 |
+
def fetch_all_notes(*args, **kwargs):
|
823 |
if db_type == 'sqlite':
|
824 |
+
return sqlite_fetch_all_notes()
|
825 |
elif db_type == 'elasticsearch':
|
826 |
# Implement Elasticsearch version
|
827 |
raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
|
828 |
|
829 |
+
def delete_messages_in_conversation(*args, **kwargs):
|
830 |
+
if db_type == 'sqlite':
|
831 |
+
return sqlite_delete_messages_in_conversation(*args, **kwargs)
|
832 |
+
elif db_type == 'elasticsearch':
|
833 |
+
# Implement Elasticsearch version
|
834 |
+
raise NotImplementedError("Elasticsearch version of delete_messages_in_conversation not yet implemented")
|
835 |
+
|
836 |
+
def get_conversation_text(*args, **kwargs):
|
837 |
+
if db_type == 'sqlite':
|
838 |
+
return sqlite_get_conversation_text(*args, **kwargs)
|
839 |
+
elif db_type == 'elasticsearch':
|
840 |
+
# Implement Elasticsearch version
|
841 |
+
raise NotImplementedError("Elasticsearch version of get_conversation_text not yet implemented")
|
842 |
+
|
843 |
#
|
844 |
# End of Chat-related Functions
|
845 |
############################################################################################################
|
|
|
919 |
# Implement Elasticsearch version
|
920 |
raise NotImplementedError("Elasticsearch version of delete_character_chat not yet implemented")
|
921 |
|
922 |
+
def update_note(*args, **kwargs):
|
923 |
+
if db_type == 'sqlite':
|
924 |
+
return sqlite_update_note(*args, **kwargs)
|
925 |
+
elif db_type == 'elasticsearch':
|
926 |
+
# Implement Elasticsearch version
|
927 |
+
raise NotImplementedError("Elasticsearch version of update_note not yet implemented")
|
928 |
+
|
929 |
+
def save_notes(*args, **kwargs):
|
930 |
+
if db_type == 'sqlite':
|
931 |
+
return sqlite_save_notes(*args, **kwargs)
|
932 |
+
elif db_type == 'elasticsearch':
|
933 |
+
# Implement Elasticsearch version
|
934 |
+
raise NotImplementedError("Elasticsearch version of save_notes not yet implemented")
|
935 |
+
|
936 |
+
def clear_keywords(*args, **kwargs):
|
937 |
+
if db_type == 'sqlite':
|
938 |
+
return sqlite_clear_keywords_from_note(*args, **kwargs)
|
939 |
+
elif db_type == 'elasticsearch':
|
940 |
+
# Implement Elasticsearch version
|
941 |
+
raise NotImplementedError("Elasticsearch version of clear_keywords not yet implemented")
|
942 |
+
|
943 |
+
def clear_keywords_from_note(*args, **kwargs):
|
944 |
+
if db_type == 'sqlite':
|
945 |
+
return sqlite_clear_keywords_from_note(*args, **kwargs)
|
946 |
+
elif db_type == 'elasticsearch':
|
947 |
+
# Implement Elasticsearch version
|
948 |
+
raise NotImplementedError("Elasticsearch version of clear_keywords_from_note not yet implemented")
|
949 |
+
|
950 |
+
def add_keywords_to_note(*args, **kwargs):
|
951 |
+
if db_type == 'sqlite':
|
952 |
+
return sqlite_add_keywords_to_note(*args, **kwargs)
|
953 |
+
elif db_type == 'elasticsearch':
|
954 |
+
# Implement Elasticsearch version
|
955 |
+
raise NotImplementedError("Elasticsearch version of add_keywords_to_note not yet implemented")
|
956 |
+
|
957 |
+
def fetch_conversations_by_ids(*args, **kwargs):
|
958 |
+
if db_type == 'sqlite':
|
959 |
+
return sqlite_fetch_conversations_by_ids(*args, **kwargs)
|
960 |
+
elif db_type == 'elasticsearch':
|
961 |
+
# Implement Elasticsearch version
|
962 |
+
raise NotImplementedError("Elasticsearch version of fetch_conversations_by_ids not yet implemented")
|
963 |
+
|
964 |
+
def fetch_notes_by_ids(*args, **kwargs):
|
965 |
if db_type == 'sqlite':
|
966 |
+
return sqlite_fetch_notes_by_ids(*args, **kwargs)
|
967 |
elif db_type == 'elasticsearch':
|
968 |
# Implement Elasticsearch version
|
969 |
+
raise NotImplementedError("Elasticsearch version of fetch_notes_by_ids not yet implemented")
|
970 |
|
971 |
#
|
972 |
# End of Character Chat-related Functions
|
App_Function_Libraries/DB/Prompts_DB.py
ADDED
@@ -0,0 +1,626 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Prompts_DB.py
|
2 |
+
# Description: Functions to manage the prompts database.
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
import sqlite3
|
6 |
+
import logging
|
7 |
+
#
|
8 |
+
# External Imports
|
9 |
+
import re
|
10 |
+
from typing import Tuple
|
11 |
+
#
|
12 |
+
# Local Imports
|
13 |
+
from App_Function_Libraries.Utils.Utils import get_database_path
|
14 |
+
#
|
15 |
+
#######################################################################################################################
|
16 |
+
#
|
17 |
+
# Functions to manage prompts DB
|
18 |
+
|
19 |
+
def create_prompts_db():
|
20 |
+
logging.debug("create_prompts_db: Creating prompts database.")
|
21 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
22 |
+
cursor = conn.cursor()
|
23 |
+
cursor.executescript('''
|
24 |
+
CREATE TABLE IF NOT EXISTS Prompts (
|
25 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
26 |
+
name TEXT NOT NULL UNIQUE,
|
27 |
+
author TEXT,
|
28 |
+
details TEXT,
|
29 |
+
system TEXT,
|
30 |
+
user TEXT
|
31 |
+
);
|
32 |
+
CREATE TABLE IF NOT EXISTS Keywords (
|
33 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
34 |
+
keyword TEXT NOT NULL UNIQUE COLLATE NOCASE
|
35 |
+
);
|
36 |
+
CREATE TABLE IF NOT EXISTS PromptKeywords (
|
37 |
+
prompt_id INTEGER,
|
38 |
+
keyword_id INTEGER,
|
39 |
+
FOREIGN KEY (prompt_id) REFERENCES Prompts (id),
|
40 |
+
FOREIGN KEY (keyword_id) REFERENCES Keywords (id),
|
41 |
+
PRIMARY KEY (prompt_id, keyword_id)
|
42 |
+
);
|
43 |
+
CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON Keywords(keyword);
|
44 |
+
CREATE INDEX IF NOT EXISTS idx_promptkeywords_prompt_id ON PromptKeywords(prompt_id);
|
45 |
+
CREATE INDEX IF NOT EXISTS idx_promptkeywords_keyword_id ON PromptKeywords(keyword_id);
|
46 |
+
''')
|
47 |
+
|
48 |
+
# FIXME - dirty hack that should be removed later...
|
49 |
+
# Migration function to add the 'author' column to the Prompts table
|
50 |
+
def add_author_column_to_prompts():
|
51 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
52 |
+
cursor = conn.cursor()
|
53 |
+
# Check if 'author' column already exists
|
54 |
+
cursor.execute("PRAGMA table_info(Prompts)")
|
55 |
+
columns = [col[1] for col in cursor.fetchall()]
|
56 |
+
|
57 |
+
if 'author' not in columns:
|
58 |
+
# Add the 'author' column
|
59 |
+
cursor.execute('ALTER TABLE Prompts ADD COLUMN author TEXT')
|
60 |
+
print("Author column added to Prompts table.")
|
61 |
+
else:
|
62 |
+
print("Author column already exists in Prompts table.")
|
63 |
+
|
64 |
+
add_author_column_to_prompts()
|
65 |
+
|
66 |
+
def normalize_keyword(keyword):
|
67 |
+
return re.sub(r'\s+', ' ', keyword.strip().lower())
|
68 |
+
|
69 |
+
|
70 |
+
# FIXME - update calls to this function to use the new args
|
71 |
+
def add_prompt(name, author, details, system=None, user=None, keywords=None):
|
72 |
+
logging.debug(f"add_prompt: Adding prompt with name: {name}, author: {author}, system: {system}, user: {user}, keywords: {keywords}")
|
73 |
+
if not name:
|
74 |
+
logging.error("add_prompt: A name is required.")
|
75 |
+
return "A name is required."
|
76 |
+
|
77 |
+
try:
|
78 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
79 |
+
cursor = conn.cursor()
|
80 |
+
cursor.execute('''
|
81 |
+
INSERT INTO Prompts (name, author, details, system, user)
|
82 |
+
VALUES (?, ?, ?, ?, ?)
|
83 |
+
''', (name, author, details, system, user))
|
84 |
+
prompt_id = cursor.lastrowid
|
85 |
+
|
86 |
+
if keywords:
|
87 |
+
normalized_keywords = [normalize_keyword(k) for k in keywords if k.strip()]
|
88 |
+
for keyword in set(normalized_keywords): # Use set to remove duplicates
|
89 |
+
cursor.execute('''
|
90 |
+
INSERT OR IGNORE INTO Keywords (keyword) VALUES (?)
|
91 |
+
''', (keyword,))
|
92 |
+
cursor.execute('SELECT id FROM Keywords WHERE keyword = ?', (keyword,))
|
93 |
+
keyword_id = cursor.fetchone()[0]
|
94 |
+
cursor.execute('''
|
95 |
+
INSERT OR IGNORE INTO PromptKeywords (prompt_id, keyword_id) VALUES (?, ?)
|
96 |
+
''', (prompt_id, keyword_id))
|
97 |
+
return "Prompt added successfully."
|
98 |
+
except sqlite3.IntegrityError:
|
99 |
+
return "Prompt with this name already exists."
|
100 |
+
except sqlite3.Error as e:
|
101 |
+
return f"Database error: {e}"
|
102 |
+
|
103 |
+
|
104 |
+
def fetch_prompt_details(name):
|
105 |
+
logging.debug(f"fetch_prompt_details: Fetching details for prompt: {name}")
|
106 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
107 |
+
cursor = conn.cursor()
|
108 |
+
cursor.execute('''
|
109 |
+
SELECT p.name, p.author, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
|
110 |
+
FROM Prompts p
|
111 |
+
LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
112 |
+
LEFT JOIN Keywords k ON pk.keyword_id = k.id
|
113 |
+
WHERE p.name = ?
|
114 |
+
GROUP BY p.id
|
115 |
+
''', (name,))
|
116 |
+
return cursor.fetchone()
|
117 |
+
|
118 |
+
|
119 |
+
def list_prompts(page=1, per_page=10):
|
120 |
+
logging.debug(f"list_prompts: Listing prompts for page {page} with {per_page} prompts per page.")
|
121 |
+
offset = (page - 1) * per_page
|
122 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
123 |
+
cursor = conn.cursor()
|
124 |
+
cursor.execute('SELECT name FROM Prompts LIMIT ? OFFSET ?', (per_page, offset))
|
125 |
+
prompts = [row[0] for row in cursor.fetchall()]
|
126 |
+
|
127 |
+
# Get total count of prompts
|
128 |
+
cursor.execute('SELECT COUNT(*) FROM Prompts')
|
129 |
+
total_count = cursor.fetchone()[0]
|
130 |
+
|
131 |
+
total_pages = (total_count + per_page - 1) // per_page
|
132 |
+
return prompts, total_pages, page
|
133 |
+
|
134 |
+
|
135 |
+
def insert_prompt_to_db(title, author, description, system_prompt, user_prompt, keywords=None):
|
136 |
+
return add_prompt(title, author, description, system_prompt, user_prompt, keywords)
|
137 |
+
|
138 |
+
|
139 |
+
def get_prompt_db_connection():
|
140 |
+
prompt_db_path = get_database_path('prompts.db')
|
141 |
+
return sqlite3.connect(prompt_db_path)
|
142 |
+
|
143 |
+
|
144 |
+
def search_prompts(query):
|
145 |
+
logging.debug(f"search_prompts: Searching prompts with query: {query}")
|
146 |
+
try:
|
147 |
+
with get_prompt_db_connection() as conn:
|
148 |
+
cursor = conn.cursor()
|
149 |
+
cursor.execute("""
|
150 |
+
SELECT p.name, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
|
151 |
+
FROM Prompts p
|
152 |
+
LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
153 |
+
LEFT JOIN Keywords k ON pk.keyword_id = k.id
|
154 |
+
WHERE p.name LIKE ? OR p.details LIKE ? OR p.system LIKE ? OR p.user LIKE ? OR k.keyword LIKE ?
|
155 |
+
GROUP BY p.id
|
156 |
+
ORDER BY p.name
|
157 |
+
""", (f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%'))
|
158 |
+
return cursor.fetchall()
|
159 |
+
except sqlite3.Error as e:
|
160 |
+
logging.error(f"Error searching prompts: {e}")
|
161 |
+
return []
|
162 |
+
|
163 |
+
|
164 |
+
def search_prompts_by_keyword(keyword, page=1, per_page=10):
|
165 |
+
logging.debug(f"search_prompts_by_keyword: Searching prompts by keyword: {keyword}")
|
166 |
+
normalized_keyword = normalize_keyword(keyword)
|
167 |
+
offset = (page - 1) * per_page
|
168 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
169 |
+
cursor = conn.cursor()
|
170 |
+
cursor.execute('''
|
171 |
+
SELECT DISTINCT p.name
|
172 |
+
FROM Prompts p
|
173 |
+
JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
174 |
+
JOIN Keywords k ON pk.keyword_id = k.id
|
175 |
+
WHERE k.keyword LIKE ?
|
176 |
+
LIMIT ? OFFSET ?
|
177 |
+
''', ('%' + normalized_keyword + '%', per_page, offset))
|
178 |
+
prompts = [row[0] for row in cursor.fetchall()]
|
179 |
+
|
180 |
+
# Get total count of matching prompts
|
181 |
+
cursor.execute('''
|
182 |
+
SELECT COUNT(DISTINCT p.id)
|
183 |
+
FROM Prompts p
|
184 |
+
JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
185 |
+
JOIN Keywords k ON pk.keyword_id = k.id
|
186 |
+
WHERE k.keyword LIKE ?
|
187 |
+
''', ('%' + normalized_keyword + '%',))
|
188 |
+
total_count = cursor.fetchone()[0]
|
189 |
+
|
190 |
+
total_pages = (total_count + per_page - 1) // per_page
|
191 |
+
return prompts, total_pages, page
|
192 |
+
|
193 |
+
|
194 |
+
def update_prompt_keywords(prompt_name, new_keywords):
|
195 |
+
logging.debug(f"update_prompt_keywords: Updating keywords for prompt: {prompt_name}")
|
196 |
+
try:
|
197 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
198 |
+
cursor = conn.cursor()
|
199 |
+
|
200 |
+
cursor.execute('SELECT id FROM Prompts WHERE name = ?', (prompt_name,))
|
201 |
+
prompt_id = cursor.fetchone()
|
202 |
+
if not prompt_id:
|
203 |
+
return "Prompt not found."
|
204 |
+
prompt_id = prompt_id[0]
|
205 |
+
|
206 |
+
cursor.execute('DELETE FROM PromptKeywords WHERE prompt_id = ?', (prompt_id,))
|
207 |
+
|
208 |
+
normalized_keywords = [normalize_keyword(k) for k in new_keywords if k.strip()]
|
209 |
+
for keyword in set(normalized_keywords): # Use set to remove duplicates
|
210 |
+
cursor.execute('INSERT OR IGNORE INTO Keywords (keyword) VALUES (?)', (keyword,))
|
211 |
+
cursor.execute('SELECT id FROM Keywords WHERE keyword = ?', (keyword,))
|
212 |
+
keyword_id = cursor.fetchone()[0]
|
213 |
+
cursor.execute('INSERT INTO PromptKeywords (prompt_id, keyword_id) VALUES (?, ?)',
|
214 |
+
(prompt_id, keyword_id))
|
215 |
+
|
216 |
+
# Remove unused keywords
|
217 |
+
cursor.execute('''
|
218 |
+
DELETE FROM Keywords
|
219 |
+
WHERE id NOT IN (SELECT DISTINCT keyword_id FROM PromptKeywords)
|
220 |
+
''')
|
221 |
+
return "Keywords updated successfully."
|
222 |
+
except sqlite3.Error as e:
|
223 |
+
return f"Database error: {e}"
|
224 |
+
|
225 |
+
|
226 |
+
def add_or_update_prompt(title, author, description, system_prompt, user_prompt, keywords=None):
|
227 |
+
logging.debug(f"add_or_update_prompt: Adding or updating prompt: {title}")
|
228 |
+
if not title:
|
229 |
+
return "Error: Title is required."
|
230 |
+
|
231 |
+
existing_prompt = fetch_prompt_details(title)
|
232 |
+
if existing_prompt:
|
233 |
+
# Update existing prompt
|
234 |
+
result = update_prompt_in_db(title, author, description, system_prompt, user_prompt)
|
235 |
+
if "successfully" in result:
|
236 |
+
# Update keywords if the prompt update was successful
|
237 |
+
keyword_result = update_prompt_keywords(title, keywords or [])
|
238 |
+
result += f" {keyword_result}"
|
239 |
+
else:
|
240 |
+
# Insert new prompt
|
241 |
+
result = insert_prompt_to_db(title, author, description, system_prompt, user_prompt, keywords)
|
242 |
+
|
243 |
+
return result
|
244 |
+
|
245 |
+
|
246 |
+
def load_prompt_details(selected_prompt):
|
247 |
+
logging.debug(f"load_prompt_details: Loading prompt details for {selected_prompt}")
|
248 |
+
if selected_prompt:
|
249 |
+
details = fetch_prompt_details(selected_prompt)
|
250 |
+
if details:
|
251 |
+
return details[0], details[1], details[2], details[3], details[4], details[5]
|
252 |
+
return "", "", "", "", "", ""
|
253 |
+
|
254 |
+
|
255 |
+
def update_prompt_in_db(title, author, description, system_prompt, user_prompt):
|
256 |
+
logging.debug(f"update_prompt_in_db: Updating prompt: {title}")
|
257 |
+
try:
|
258 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
259 |
+
cursor = conn.cursor()
|
260 |
+
cursor.execute(
|
261 |
+
"UPDATE Prompts SET author = ?, details = ?, system = ?, user = ? WHERE name = ?",
|
262 |
+
(author, description, system_prompt, user_prompt, title)
|
263 |
+
)
|
264 |
+
if cursor.rowcount == 0:
|
265 |
+
return "No prompt found with the given title."
|
266 |
+
return "Prompt updated successfully!"
|
267 |
+
except sqlite3.Error as e:
|
268 |
+
return f"Error updating prompt: {e}"
|
269 |
+
|
270 |
+
|
271 |
+
def delete_prompt(prompt_id):
|
272 |
+
logging.debug(f"delete_prompt: Deleting prompt with ID: {prompt_id}")
|
273 |
+
try:
|
274 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
275 |
+
cursor = conn.cursor()
|
276 |
+
|
277 |
+
# Delete associated keywords
|
278 |
+
cursor.execute("DELETE FROM PromptKeywords WHERE prompt_id = ?", (prompt_id,))
|
279 |
+
|
280 |
+
# Delete the prompt
|
281 |
+
cursor.execute("DELETE FROM Prompts WHERE id = ?", (prompt_id,))
|
282 |
+
|
283 |
+
if cursor.rowcount == 0:
|
284 |
+
return f"No prompt found with ID {prompt_id}"
|
285 |
+
else:
|
286 |
+
conn.commit()
|
287 |
+
return f"Prompt with ID {prompt_id} has been successfully deleted"
|
288 |
+
except sqlite3.Error as e:
|
289 |
+
return f"An error occurred: {e}"
|
290 |
+
|
291 |
+
|
292 |
+
def delete_prompt_keyword(keyword: str) -> str:
|
293 |
+
"""
|
294 |
+
Delete a keyword and its associations from the prompts database.
|
295 |
+
|
296 |
+
Args:
|
297 |
+
keyword (str): The keyword to delete
|
298 |
+
|
299 |
+
Returns:
|
300 |
+
str: Success/failure message
|
301 |
+
"""
|
302 |
+
logging.debug(f"delete_prompt_keyword: Deleting keyword: {keyword}")
|
303 |
+
try:
|
304 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
305 |
+
cursor = conn.cursor()
|
306 |
+
|
307 |
+
# First normalize the keyword
|
308 |
+
normalized_keyword = normalize_keyword(keyword)
|
309 |
+
|
310 |
+
# Get the keyword ID
|
311 |
+
cursor.execute("SELECT id FROM Keywords WHERE keyword = ?", (normalized_keyword,))
|
312 |
+
result = cursor.fetchone()
|
313 |
+
|
314 |
+
if not result:
|
315 |
+
return f"Keyword '{keyword}' not found."
|
316 |
+
|
317 |
+
keyword_id = result[0]
|
318 |
+
|
319 |
+
# Delete keyword associations from PromptKeywords
|
320 |
+
cursor.execute("DELETE FROM PromptKeywords WHERE keyword_id = ?", (keyword_id,))
|
321 |
+
|
322 |
+
# Delete the keyword itself
|
323 |
+
cursor.execute("DELETE FROM Keywords WHERE id = ?", (keyword_id,))
|
324 |
+
|
325 |
+
# Get the number of affected prompts
|
326 |
+
affected_prompts = cursor.rowcount
|
327 |
+
|
328 |
+
conn.commit()
|
329 |
+
|
330 |
+
logging.info(f"Keyword '{keyword}' deleted successfully")
|
331 |
+
return f"Successfully deleted keyword '{keyword}' and removed it from {affected_prompts} prompts."
|
332 |
+
|
333 |
+
except sqlite3.Error as e:
|
334 |
+
error_msg = f"Database error deleting keyword: {str(e)}"
|
335 |
+
logging.error(error_msg)
|
336 |
+
return error_msg
|
337 |
+
except Exception as e:
|
338 |
+
error_msg = f"Error deleting keyword: {str(e)}"
|
339 |
+
logging.error(error_msg)
|
340 |
+
return error_msg
|
341 |
+
|
342 |
+
|
343 |
+
def export_prompt_keywords_to_csv() -> Tuple[str, str]:
|
344 |
+
"""
|
345 |
+
Export all prompt keywords to a CSV file with associated metadata.
|
346 |
+
|
347 |
+
Returns:
|
348 |
+
Tuple[str, str]: (status_message, file_path)
|
349 |
+
"""
|
350 |
+
import csv
|
351 |
+
import tempfile
|
352 |
+
import os
|
353 |
+
from datetime import datetime
|
354 |
+
|
355 |
+
logging.debug("export_prompt_keywords_to_csv: Starting export")
|
356 |
+
try:
|
357 |
+
# Create a temporary file with a specific name in the system's temp directory
|
358 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
359 |
+
temp_dir = tempfile.gettempdir()
|
360 |
+
file_path = os.path.join(temp_dir, f'prompt_keywords_export_{timestamp}.csv')
|
361 |
+
|
362 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
363 |
+
cursor = conn.cursor()
|
364 |
+
|
365 |
+
# Get keywords with related prompt information
|
366 |
+
query = '''
|
367 |
+
SELECT
|
368 |
+
k.keyword,
|
369 |
+
GROUP_CONCAT(p.name, ' | ') as prompt_names,
|
370 |
+
COUNT(DISTINCT p.id) as num_prompts,
|
371 |
+
GROUP_CONCAT(DISTINCT p.author, ' | ') as authors
|
372 |
+
FROM Keywords k
|
373 |
+
LEFT JOIN PromptKeywords pk ON k.id = pk.keyword_id
|
374 |
+
LEFT JOIN Prompts p ON pk.prompt_id = p.id
|
375 |
+
GROUP BY k.id, k.keyword
|
376 |
+
ORDER BY k.keyword
|
377 |
+
'''
|
378 |
+
|
379 |
+
cursor.execute(query)
|
380 |
+
results = cursor.fetchall()
|
381 |
+
|
382 |
+
# Write to CSV
|
383 |
+
with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
|
384 |
+
writer = csv.writer(csvfile)
|
385 |
+
writer.writerow([
|
386 |
+
'Keyword',
|
387 |
+
'Associated Prompts',
|
388 |
+
'Number of Prompts',
|
389 |
+
'Authors'
|
390 |
+
])
|
391 |
+
|
392 |
+
for row in results:
|
393 |
+
writer.writerow([
|
394 |
+
row[0], # keyword
|
395 |
+
row[1] if row[1] else '', # prompt_names (may be None)
|
396 |
+
row[2], # num_prompts
|
397 |
+
row[3] if row[3] else '' # authors (may be None)
|
398 |
+
])
|
399 |
+
|
400 |
+
status_msg = f"Successfully exported {len(results)} prompt keywords to CSV."
|
401 |
+
logging.info(status_msg)
|
402 |
+
|
403 |
+
return status_msg, file_path
|
404 |
+
|
405 |
+
except sqlite3.Error as e:
|
406 |
+
error_msg = f"Database error exporting keywords: {str(e)}"
|
407 |
+
logging.error(error_msg)
|
408 |
+
return error_msg, "None"
|
409 |
+
except Exception as e:
|
410 |
+
error_msg = f"Error exporting keywords: {str(e)}"
|
411 |
+
logging.error(error_msg)
|
412 |
+
return error_msg, "None"
|
413 |
+
|
414 |
+
|
415 |
+
def view_prompt_keywords() -> str:
|
416 |
+
"""
|
417 |
+
View all keywords currently in the prompts database.
|
418 |
+
|
419 |
+
Returns:
|
420 |
+
str: Markdown formatted string of all keywords
|
421 |
+
"""
|
422 |
+
logging.debug("view_prompt_keywords: Retrieving all keywords")
|
423 |
+
try:
|
424 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
425 |
+
cursor = conn.cursor()
|
426 |
+
cursor.execute("""
|
427 |
+
SELECT k.keyword, COUNT(DISTINCT pk.prompt_id) as prompt_count
|
428 |
+
FROM Keywords k
|
429 |
+
LEFT JOIN PromptKeywords pk ON k.id = pk.keyword_id
|
430 |
+
GROUP BY k.id, k.keyword
|
431 |
+
ORDER BY k.keyword
|
432 |
+
""")
|
433 |
+
|
434 |
+
keywords = cursor.fetchall()
|
435 |
+
if keywords:
|
436 |
+
keyword_list = [f"- {k[0]} ({k[1]} prompts)" for k in keywords]
|
437 |
+
return "### Current Prompt Keywords:\n" + "\n".join(keyword_list)
|
438 |
+
return "No keywords found."
|
439 |
+
|
440 |
+
except Exception as e:
|
441 |
+
error_msg = f"Error retrieving keywords: {str(e)}"
|
442 |
+
logging.error(error_msg)
|
443 |
+
return error_msg
|
444 |
+
|
445 |
+
|
446 |
+
def export_prompts(
|
447 |
+
export_format='csv',
|
448 |
+
filter_keywords=None,
|
449 |
+
include_system=True,
|
450 |
+
include_user=True,
|
451 |
+
include_details=True,
|
452 |
+
include_author=True,
|
453 |
+
include_keywords=True,
|
454 |
+
markdown_template=None
|
455 |
+
) -> Tuple[str, str]:
|
456 |
+
"""
|
457 |
+
Export prompts to CSV or Markdown with configurable options.
|
458 |
+
|
459 |
+
Args:
|
460 |
+
export_format (str): 'csv' or 'markdown'
|
461 |
+
filter_keywords (List[str], optional): Keywords to filter prompts by
|
462 |
+
include_system (bool): Include system prompts in export
|
463 |
+
include_user (bool): Include user prompts in export
|
464 |
+
include_details (bool): Include prompt details/descriptions
|
465 |
+
include_author (bool): Include author information
|
466 |
+
include_keywords (bool): Include associated keywords
|
467 |
+
markdown_template (str, optional): Template for markdown export
|
468 |
+
|
469 |
+
Returns:
|
470 |
+
Tuple[str, str]: (status_message, file_path)
|
471 |
+
"""
|
472 |
+
import csv
|
473 |
+
import tempfile
|
474 |
+
import os
|
475 |
+
import zipfile
|
476 |
+
from datetime import datetime
|
477 |
+
|
478 |
+
try:
|
479 |
+
# Get prompts data
|
480 |
+
with get_prompt_db_connection() as conn:
|
481 |
+
cursor = conn.cursor()
|
482 |
+
|
483 |
+
# Build query based on included fields
|
484 |
+
select_fields = ['p.name']
|
485 |
+
if include_author:
|
486 |
+
select_fields.append('p.author')
|
487 |
+
if include_details:
|
488 |
+
select_fields.append('p.details')
|
489 |
+
if include_system:
|
490 |
+
select_fields.append('p.system')
|
491 |
+
if include_user:
|
492 |
+
select_fields.append('p.user')
|
493 |
+
|
494 |
+
query = f"""
|
495 |
+
SELECT DISTINCT {', '.join(select_fields)}
|
496 |
+
FROM Prompts p
|
497 |
+
"""
|
498 |
+
|
499 |
+
# Add keyword filtering if specified
|
500 |
+
if filter_keywords:
|
501 |
+
placeholders = ','.join(['?' for _ in filter_keywords])
|
502 |
+
query += f"""
|
503 |
+
JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
504 |
+
JOIN Keywords k ON pk.keyword_id = k.id
|
505 |
+
WHERE k.keyword IN ({placeholders})
|
506 |
+
"""
|
507 |
+
|
508 |
+
cursor.execute(query, filter_keywords if filter_keywords else ())
|
509 |
+
prompts = cursor.fetchall()
|
510 |
+
|
511 |
+
# Get keywords for each prompt if needed
|
512 |
+
if include_keywords:
|
513 |
+
prompt_keywords = {}
|
514 |
+
for prompt in prompts:
|
515 |
+
cursor.execute("""
|
516 |
+
SELECT k.keyword
|
517 |
+
FROM Keywords k
|
518 |
+
JOIN PromptKeywords pk ON k.id = pk.keyword_id
|
519 |
+
JOIN Prompts p ON pk.prompt_id = p.id
|
520 |
+
WHERE p.name = ?
|
521 |
+
""", (prompt[0],))
|
522 |
+
prompt_keywords[prompt[0]] = [row[0] for row in cursor.fetchall()]
|
523 |
+
|
524 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
525 |
+
|
526 |
+
if export_format == 'csv':
|
527 |
+
# Export as CSV
|
528 |
+
temp_file = os.path.join(tempfile.gettempdir(), f'prompts_export_{timestamp}.csv')
|
529 |
+
with open(temp_file, 'w', newline='', encoding='utf-8') as csvfile:
|
530 |
+
writer = csv.writer(csvfile)
|
531 |
+
|
532 |
+
# Write header
|
533 |
+
header = ['Name']
|
534 |
+
if include_author:
|
535 |
+
header.append('Author')
|
536 |
+
if include_details:
|
537 |
+
header.append('Details')
|
538 |
+
if include_system:
|
539 |
+
header.append('System Prompt')
|
540 |
+
if include_user:
|
541 |
+
header.append('User Prompt')
|
542 |
+
if include_keywords:
|
543 |
+
header.append('Keywords')
|
544 |
+
writer.writerow(header)
|
545 |
+
|
546 |
+
# Write data
|
547 |
+
for prompt in prompts:
|
548 |
+
row = list(prompt)
|
549 |
+
if include_keywords:
|
550 |
+
row.append(', '.join(prompt_keywords.get(prompt[0], [])))
|
551 |
+
writer.writerow(row)
|
552 |
+
|
553 |
+
return f"Successfully exported {len(prompts)} prompts to CSV.", temp_file
|
554 |
+
|
555 |
+
else:
|
556 |
+
# Export as Markdown files in ZIP
|
557 |
+
temp_dir = tempfile.mkdtemp()
|
558 |
+
zip_path = os.path.join(tempfile.gettempdir(), f'prompts_export_{timestamp}.zip')
|
559 |
+
|
560 |
+
# Define markdown templates
|
561 |
+
templates = {
|
562 |
+
"Basic Template": """# {title}
|
563 |
+
{author_section}
|
564 |
+
{details_section}
|
565 |
+
{system_section}
|
566 |
+
{user_section}
|
567 |
+
{keywords_section}
|
568 |
+
""",
|
569 |
+
"Detailed Template": """# {title}
|
570 |
+
|
571 |
+
## Author
|
572 |
+
{author_section}
|
573 |
+
|
574 |
+
## Description
|
575 |
+
{details_section}
|
576 |
+
|
577 |
+
## System Prompt
|
578 |
+
{system_section}
|
579 |
+
|
580 |
+
## User Prompt
|
581 |
+
{user_section}
|
582 |
+
|
583 |
+
## Keywords
|
584 |
+
{keywords_section}
|
585 |
+
"""
|
586 |
+
}
|
587 |
+
|
588 |
+
template = templates.get(markdown_template, markdown_template or templates["Basic Template"])
|
589 |
+
|
590 |
+
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
591 |
+
for prompt in prompts:
|
592 |
+
# Create markdown content
|
593 |
+
md_content = template.format(
|
594 |
+
title=prompt[0],
|
595 |
+
author_section=f"Author: {prompt[1]}" if include_author else "",
|
596 |
+
details_section=prompt[2] if include_details else "",
|
597 |
+
system_section=prompt[3] if include_system else "",
|
598 |
+
user_section=prompt[4] if include_user else "",
|
599 |
+
keywords_section=', '.join(prompt_keywords.get(prompt[0], [])) if include_keywords else ""
|
600 |
+
)
|
601 |
+
|
602 |
+
# Create safe filename
|
603 |
+
safe_filename = re.sub(r'[^\w\-_\. ]', '_', prompt[0])
|
604 |
+
md_path = os.path.join(temp_dir, f"{safe_filename}.md")
|
605 |
+
|
606 |
+
# Write markdown file
|
607 |
+
with open(md_path, 'w', encoding='utf-8') as f:
|
608 |
+
f.write(md_content)
|
609 |
+
|
610 |
+
# Add to ZIP
|
611 |
+
zipf.write(md_path, os.path.basename(md_path))
|
612 |
+
|
613 |
+
return f"Successfully exported {len(prompts)} prompts to Markdown files.", zip_path
|
614 |
+
|
615 |
+
except Exception as e:
|
616 |
+
error_msg = f"Error exporting prompts: {str(e)}"
|
617 |
+
logging.error(error_msg)
|
618 |
+
return error_msg, "None"
|
619 |
+
|
620 |
+
|
621 |
+
create_prompts_db()
|
622 |
+
|
623 |
+
#
|
624 |
+
# End of Propmts_DB.py
|
625 |
+
#######################################################################################################################
|
626 |
+
|
App_Function_Libraries/DB/RAG_QA_Chat_DB.py
CHANGED
@@ -4,39 +4,37 @@
|
|
4 |
# Imports
|
5 |
import configparser
|
6 |
import logging
|
|
|
7 |
import re
|
8 |
import sqlite3
|
9 |
import uuid
|
10 |
from contextlib import contextmanager
|
11 |
from datetime import datetime
|
12 |
-
|
13 |
-
from
|
14 |
-
|
15 |
#
|
16 |
# External Imports
|
17 |
# (No external imports)
|
18 |
#
|
19 |
# Local Imports
|
20 |
-
|
|
|
21 |
#
|
22 |
########################################################################################################################
|
23 |
#
|
24 |
# Functions:
|
25 |
|
26 |
-
|
27 |
-
config_path =
|
28 |
-
|
29 |
-
|
30 |
-
config
|
31 |
-
config.
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
rag_qa_db_path = get_database_path('RAG_QA_Chat.db')
|
38 |
-
|
39 |
-
print(f"RAG QA Chat Database path: {rag_qa_db_path}")
|
40 |
|
41 |
# Set up logging
|
42 |
logging.basicConfig(level=logging.INFO)
|
@@ -58,7 +56,9 @@ CREATE TABLE IF NOT EXISTS conversation_metadata (
|
|
58 |
conversation_id TEXT PRIMARY KEY,
|
59 |
created_at DATETIME NOT NULL,
|
60 |
last_updated DATETIME NOT NULL,
|
61 |
-
title TEXT NOT NULL
|
|
|
|
|
62 |
);
|
63 |
|
64 |
-- Table for storing keywords
|
@@ -122,19 +122,137 @@ CREATE INDEX IF NOT EXISTS idx_rag_qa_keyword_collections_parent_id ON rag_qa_ke
|
|
122 |
CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_collection_id ON rag_qa_collection_keywords(collection_id);
|
123 |
CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_keyword_id ON rag_qa_collection_keywords(keyword_id);
|
124 |
|
125 |
-
-- Full-text search virtual
|
126 |
-
CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_chats_fts USING fts5(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
-
--
|
|
|
129 |
CREATE TRIGGER IF NOT EXISTS rag_qa_chats_ai AFTER INSERT ON rag_qa_chats BEGIN
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
END;
|
132 |
'''
|
133 |
|
134 |
# Database connection management
|
135 |
@contextmanager
|
136 |
def get_db_connection():
|
137 |
-
|
|
|
138 |
try:
|
139 |
yield conn
|
140 |
finally:
|
@@ -168,10 +286,43 @@ def execute_query(query, params=None, conn=None):
|
|
168 |
conn.commit()
|
169 |
return cursor.fetchall()
|
170 |
|
|
|
171 |
def create_tables():
|
|
|
172 |
with get_db_connection() as conn:
|
173 |
-
conn.
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
# Initialize the database
|
177 |
create_tables()
|
@@ -197,6 +348,7 @@ def validate_keyword(keyword):
|
|
197 |
raise ValueError("Keyword contains invalid characters")
|
198 |
return keyword.strip()
|
199 |
|
|
|
200 |
def validate_collection_name(name):
|
201 |
if not isinstance(name, str):
|
202 |
raise ValueError("Collection name must be a string")
|
@@ -208,6 +360,7 @@ def validate_collection_name(name):
|
|
208 |
raise ValueError("Collection name contains invalid characters")
|
209 |
return name.strip()
|
210 |
|
|
|
211 |
# Core functions
|
212 |
def add_keyword(keyword, conn=None):
|
213 |
try:
|
@@ -222,6 +375,7 @@ def add_keyword(keyword, conn=None):
|
|
222 |
logger.error(f"Error adding keyword '{keyword}': {e}")
|
223 |
raise
|
224 |
|
|
|
225 |
def create_keyword_collection(name, parent_id=None):
|
226 |
try:
|
227 |
validated_name = validate_collection_name(name)
|
@@ -235,6 +389,7 @@ def create_keyword_collection(name, parent_id=None):
|
|
235 |
logger.error(f"Error creating keyword collection '{name}': {e}")
|
236 |
raise
|
237 |
|
|
|
238 |
def add_keyword_to_collection(collection_name, keyword):
|
239 |
try:
|
240 |
validated_collection_name = validate_collection_name(collection_name)
|
@@ -259,6 +414,7 @@ def add_keyword_to_collection(collection_name, keyword):
|
|
259 |
logger.error(f"Error adding keyword '{keyword}' to collection '{collection_name}': {e}")
|
260 |
raise
|
261 |
|
|
|
262 |
def add_keywords_to_conversation(conversation_id, keywords):
|
263 |
if not isinstance(keywords, (list, tuple)):
|
264 |
raise ValueError("Keywords must be a list or tuple")
|
@@ -282,6 +438,23 @@ def add_keywords_to_conversation(conversation_id, keywords):
|
|
282 |
logger.error(f"Error adding keywords to conversation '{conversation_id}': {e}")
|
283 |
raise
|
284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
def get_keywords_for_conversation(conversation_id):
|
286 |
try:
|
287 |
query = '''
|
@@ -298,6 +471,7 @@ def get_keywords_for_conversation(conversation_id):
|
|
298 |
logger.error(f"Error getting keywords for conversation '{conversation_id}': {e}")
|
299 |
raise
|
300 |
|
|
|
301 |
def get_keywords_for_collection(collection_name):
|
302 |
try:
|
303 |
query = '''
|
@@ -315,6 +489,116 @@ def get_keywords_for_collection(collection_name):
|
|
315 |
logger.error(f"Error getting keywords for collection '{collection_name}': {e}")
|
316 |
raise
|
317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
#
|
319 |
# End of Keyword-related functions
|
320 |
###################################################
|
@@ -339,6 +623,7 @@ def save_notes(conversation_id, title, content):
|
|
339 |
logger.error(f"Error saving notes for conversation '{conversation_id}': {e}")
|
340 |
raise
|
341 |
|
|
|
342 |
def update_note(note_id, title, content):
|
343 |
try:
|
344 |
query = "UPDATE rag_qa_notes SET title = ?, content = ?, timestamp = ? WHERE id = ?"
|
@@ -349,6 +634,121 @@ def update_note(note_id, title, content):
|
|
349 |
logger.error(f"Error updating note ID '{note_id}': {e}")
|
350 |
raise
|
351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
def get_notes(conversation_id):
|
353 |
"""Retrieve notes for a given conversation."""
|
354 |
try:
|
@@ -361,6 +761,7 @@ def get_notes(conversation_id):
|
|
361 |
logger.error(f"Error getting notes for conversation '{conversation_id}': {e}")
|
362 |
raise
|
363 |
|
|
|
364 |
def get_note_by_id(note_id):
|
365 |
try:
|
366 |
query = "SELECT id, title, content FROM rag_qa_notes WHERE id = ?"
|
@@ -370,9 +771,21 @@ def get_note_by_id(note_id):
|
|
370 |
logger.error(f"Error getting note by ID '{note_id}': {e}")
|
371 |
raise
|
372 |
|
|
|
373 |
def get_notes_by_keywords(keywords, page=1, page_size=20):
|
374 |
try:
|
375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
query = f'''
|
377 |
SELECT n.id, n.title, n.content, n.timestamp
|
378 |
FROM rag_qa_notes n
|
@@ -381,14 +794,15 @@ def get_notes_by_keywords(keywords, page=1, page_size=20):
|
|
381 |
WHERE k.keyword IN ({placeholders})
|
382 |
ORDER BY n.timestamp DESC
|
383 |
'''
|
384 |
-
results, total_pages, total_count = get_paginated_results(query, tuple(
|
385 |
-
logger.info(f"Retrieved {len(results)} notes matching keywords: {', '.join(
|
386 |
notes = [(row[0], row[1], row[2], row[3]) for row in results]
|
387 |
return notes, total_pages, total_count
|
388 |
except Exception as e:
|
389 |
logger.error(f"Error getting notes by keywords: {e}")
|
390 |
raise
|
391 |
|
|
|
392 |
def get_notes_by_keyword_collection(collection_name, page=1, page_size=20):
|
393 |
try:
|
394 |
query = '''
|
@@ -501,9 +915,10 @@ def delete_note(note_id):
|
|
501 |
#
|
502 |
# Chat-related functions
|
503 |
|
504 |
-
def save_message(conversation_id, role, content):
|
505 |
try:
|
506 |
-
timestamp
|
|
|
507 |
query = "INSERT INTO rag_qa_chats (conversation_id, timestamp, role, content) VALUES (?, ?, ?, ?)"
|
508 |
execute_query(query, (conversation_id, timestamp, role, content))
|
509 |
|
@@ -516,29 +931,103 @@ def save_message(conversation_id, role, content):
|
|
516 |
logger.error(f"Error saving message for conversation '{conversation_id}': {e}")
|
517 |
raise
|
518 |
|
519 |
-
|
|
|
520 |
try:
|
521 |
conversation_id = str(uuid.uuid4())
|
522 |
-
query = "
|
|
|
|
|
|
|
|
|
523 |
now = datetime.now().isoformat()
|
524 |
-
|
525 |
-
|
|
|
526 |
return conversation_id
|
527 |
except Exception as e:
|
528 |
logger.error(f"Error starting new conversation: {e}")
|
529 |
raise
|
530 |
|
|
|
531 |
def get_all_conversations(page=1, page_size=20):
|
532 |
try:
|
533 |
-
query = "
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
except Exception as e:
|
539 |
-
|
540 |
raise
|
541 |
|
|
|
542 |
# Pagination helper function
|
543 |
def get_paginated_results(query, params=None, page=1, page_size=20):
|
544 |
try:
|
@@ -564,6 +1053,7 @@ def get_paginated_results(query, params=None, page=1, page_size=20):
|
|
564 |
logger.error(f"Error retrieving paginated results: {e}")
|
565 |
raise
|
566 |
|
|
|
567 |
def get_all_collections(page=1, page_size=20):
|
568 |
try:
|
569 |
query = "SELECT name FROM rag_qa_keyword_collections"
|
@@ -575,24 +1065,79 @@ def get_all_collections(page=1, page_size=20):
|
|
575 |
logger.error(f"Error getting collections: {e}")
|
576 |
raise
|
577 |
|
578 |
-
|
|
|
579 |
try:
|
580 |
-
|
581 |
-
query =
|
582 |
-
SELECT DISTINCT cm.conversation_id, cm.title
|
583 |
FROM conversation_metadata cm
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
592 |
except Exception as e:
|
593 |
-
logger.error(f"Error searching conversations
|
594 |
raise
|
595 |
|
|
|
596 |
def load_chat_history(conversation_id, page=1, page_size=50):
|
597 |
try:
|
598 |
query = "SELECT role, content FROM rag_qa_chats WHERE conversation_id = ? ORDER BY timestamp"
|
@@ -604,6 +1149,7 @@ def load_chat_history(conversation_id, page=1, page_size=50):
|
|
604 |
logger.error(f"Error loading chat history for conversation '{conversation_id}': {e}")
|
605 |
raise
|
606 |
|
|
|
607 |
def update_conversation_title(conversation_id, new_title):
|
608 |
"""Update the title of a conversation."""
|
609 |
try:
|
@@ -614,6 +1160,59 @@ def update_conversation_title(conversation_id, new_title):
|
|
614 |
logger.error(f"Error updating conversation title: {e}")
|
615 |
raise
|
616 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
617 |
def delete_conversation(conversation_id):
|
618 |
"""Delete a conversation and its associated messages and notes."""
|
619 |
try:
|
@@ -633,11 +1232,203 @@ def delete_conversation(conversation_id):
|
|
633 |
logger.error(f"Error deleting conversation '{conversation_id}': {e}")
|
634 |
raise
|
635 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
636 |
#
|
637 |
# End of Chat-related functions
|
638 |
###################################################
|
639 |
|
640 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
641 |
###################################################
|
642 |
#
|
643 |
# Functions to export DB data
|
|
|
4 |
# Imports
|
5 |
import configparser
|
6 |
import logging
|
7 |
+
import os
|
8 |
import re
|
9 |
import sqlite3
|
10 |
import uuid
|
11 |
from contextlib import contextmanager
|
12 |
from datetime import datetime
|
13 |
+
from pathlib import Path
|
14 |
+
from typing import List, Dict, Any, Tuple, Optional
|
|
|
15 |
#
|
16 |
# External Imports
|
17 |
# (No external imports)
|
18 |
#
|
19 |
# Local Imports
|
20 |
+
from App_Function_Libraries.Utils.Utils import get_project_relative_path, get_project_root
|
21 |
+
|
22 |
#
|
23 |
########################################################################################################################
|
24 |
#
|
25 |
# Functions:
|
26 |
|
27 |
+
def get_rag_qa_db_path():
|
28 |
+
config_path = os.path.join(get_project_root(), 'Config_Files', 'config.txt')
|
29 |
+
config = configparser.ConfigParser()
|
30 |
+
config.read(config_path)
|
31 |
+
if config.has_section('Database') and config.has_option('Database', 'rag_qa_db_path'):
|
32 |
+
rag_qa_db_path = config.get('Database', 'rag_qa_db_path')
|
33 |
+
if not os.path.isabs(rag_qa_db_path):
|
34 |
+
rag_qa_db_path = get_project_relative_path(rag_qa_db_path)
|
35 |
+
return rag_qa_db_path
|
36 |
+
else:
|
37 |
+
raise ValueError("Database path not found in config file")
|
|
|
|
|
|
|
38 |
|
39 |
# Set up logging
|
40 |
logging.basicConfig(level=logging.INFO)
|
|
|
56 |
conversation_id TEXT PRIMARY KEY,
|
57 |
created_at DATETIME NOT NULL,
|
58 |
last_updated DATETIME NOT NULL,
|
59 |
+
title TEXT NOT NULL,
|
60 |
+
media_id INTEGER,
|
61 |
+
rating INTEGER CHECK(rating BETWEEN 1 AND 3)
|
62 |
);
|
63 |
|
64 |
-- Table for storing keywords
|
|
|
122 |
CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_collection_id ON rag_qa_collection_keywords(collection_id);
|
123 |
CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_keyword_id ON rag_qa_collection_keywords(keyword_id);
|
124 |
|
125 |
+
-- Full-text search virtual tables
|
126 |
+
CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_chats_fts USING fts5(
|
127 |
+
content,
|
128 |
+
content='rag_qa_chats',
|
129 |
+
content_rowid='id'
|
130 |
+
);
|
131 |
+
|
132 |
+
-- FTS table for conversation metadata
|
133 |
+
CREATE VIRTUAL TABLE IF NOT EXISTS conversation_metadata_fts USING fts5(
|
134 |
+
title,
|
135 |
+
content='conversation_metadata',
|
136 |
+
content_rowid='rowid'
|
137 |
+
);
|
138 |
+
|
139 |
+
-- FTS table for keywords
|
140 |
+
CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_keywords_fts USING fts5(
|
141 |
+
keyword,
|
142 |
+
content='rag_qa_keywords',
|
143 |
+
content_rowid='id'
|
144 |
+
);
|
145 |
+
|
146 |
+
-- FTS table for keyword collections
|
147 |
+
CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_keyword_collections_fts USING fts5(
|
148 |
+
name,
|
149 |
+
content='rag_qa_keyword_collections',
|
150 |
+
content_rowid='id'
|
151 |
+
);
|
152 |
+
|
153 |
+
-- FTS table for notes
|
154 |
+
CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_notes_fts USING fts5(
|
155 |
+
title,
|
156 |
+
content,
|
157 |
+
content='rag_qa_notes',
|
158 |
+
content_rowid='id'
|
159 |
+
);
|
160 |
+
-- FTS table for notes (modified to include both title and content)
|
161 |
+
CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_notes_fts USING fts5(
|
162 |
+
title,
|
163 |
+
content,
|
164 |
+
content='rag_qa_notes',
|
165 |
+
content_rowid='id'
|
166 |
+
);
|
167 |
|
168 |
+
-- Triggers for maintaining FTS indexes
|
169 |
+
-- Triggers for rag_qa_chats
|
170 |
CREATE TRIGGER IF NOT EXISTS rag_qa_chats_ai AFTER INSERT ON rag_qa_chats BEGIN
|
171 |
+
INSERT INTO rag_qa_chats_fts(rowid, content)
|
172 |
+
VALUES (new.id, new.content);
|
173 |
+
END;
|
174 |
+
|
175 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_chats_au AFTER UPDATE ON rag_qa_chats BEGIN
|
176 |
+
UPDATE rag_qa_chats_fts
|
177 |
+
SET content = new.content
|
178 |
+
WHERE rowid = old.id;
|
179 |
+
END;
|
180 |
+
|
181 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_chats_ad AFTER DELETE ON rag_qa_chats BEGIN
|
182 |
+
DELETE FROM rag_qa_chats_fts WHERE rowid = old.id;
|
183 |
+
END;
|
184 |
+
|
185 |
+
-- Triggers for conversation_metadata
|
186 |
+
CREATE TRIGGER IF NOT EXISTS conversation_metadata_ai AFTER INSERT ON conversation_metadata BEGIN
|
187 |
+
INSERT INTO conversation_metadata_fts(rowid, title)
|
188 |
+
VALUES (new.rowid, new.title);
|
189 |
+
END;
|
190 |
+
|
191 |
+
CREATE TRIGGER IF NOT EXISTS conversation_metadata_au AFTER UPDATE ON conversation_metadata BEGIN
|
192 |
+
UPDATE conversation_metadata_fts
|
193 |
+
SET title = new.title
|
194 |
+
WHERE rowid = old.rowid;
|
195 |
+
END;
|
196 |
+
|
197 |
+
CREATE TRIGGER IF NOT EXISTS conversation_metadata_ad AFTER DELETE ON conversation_metadata BEGIN
|
198 |
+
DELETE FROM conversation_metadata_fts WHERE rowid = old.rowid;
|
199 |
+
END;
|
200 |
+
|
201 |
+
-- Triggers for rag_qa_keywords
|
202 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_keywords_ai AFTER INSERT ON rag_qa_keywords BEGIN
|
203 |
+
INSERT INTO rag_qa_keywords_fts(rowid, keyword)
|
204 |
+
VALUES (new.id, new.keyword);
|
205 |
+
END;
|
206 |
+
|
207 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_keywords_au AFTER UPDATE ON rag_qa_keywords BEGIN
|
208 |
+
UPDATE rag_qa_keywords_fts
|
209 |
+
SET keyword = new.keyword
|
210 |
+
WHERE rowid = old.id;
|
211 |
+
END;
|
212 |
+
|
213 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_keywords_ad AFTER DELETE ON rag_qa_keywords BEGIN
|
214 |
+
DELETE FROM rag_qa_keywords_fts WHERE rowid = old.id;
|
215 |
+
END;
|
216 |
+
|
217 |
+
-- Triggers for rag_qa_keyword_collections
|
218 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_keyword_collections_ai AFTER INSERT ON rag_qa_keyword_collections BEGIN
|
219 |
+
INSERT INTO rag_qa_keyword_collections_fts(rowid, name)
|
220 |
+
VALUES (new.id, new.name);
|
221 |
+
END;
|
222 |
+
|
223 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_keyword_collections_au AFTER UPDATE ON rag_qa_keyword_collections BEGIN
|
224 |
+
UPDATE rag_qa_keyword_collections_fts
|
225 |
+
SET name = new.name
|
226 |
+
WHERE rowid = old.id;
|
227 |
+
END;
|
228 |
+
|
229 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_keyword_collections_ad AFTER DELETE ON rag_qa_keyword_collections BEGIN
|
230 |
+
DELETE FROM rag_qa_keyword_collections_fts WHERE rowid = old.id;
|
231 |
+
END;
|
232 |
+
|
233 |
+
-- Triggers for rag_qa_notes
|
234 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_notes_ai AFTER INSERT ON rag_qa_notes BEGIN
|
235 |
+
INSERT INTO rag_qa_notes_fts(rowid, title, content)
|
236 |
+
VALUES (new.id, new.title, new.content);
|
237 |
+
END;
|
238 |
+
|
239 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_notes_au AFTER UPDATE ON rag_qa_notes BEGIN
|
240 |
+
UPDATE rag_qa_notes_fts
|
241 |
+
SET title = new.title,
|
242 |
+
content = new.content
|
243 |
+
WHERE rowid = old.id;
|
244 |
+
END;
|
245 |
+
|
246 |
+
CREATE TRIGGER IF NOT EXISTS rag_qa_notes_ad AFTER DELETE ON rag_qa_notes BEGIN
|
247 |
+
DELETE FROM rag_qa_notes_fts WHERE rowid = old.id;
|
248 |
END;
|
249 |
'''
|
250 |
|
251 |
# Database connection management
|
252 |
@contextmanager
|
253 |
def get_db_connection():
|
254 |
+
db_path = get_rag_qa_db_path()
|
255 |
+
conn = sqlite3.connect(db_path)
|
256 |
try:
|
257 |
yield conn
|
258 |
finally:
|
|
|
286 |
conn.commit()
|
287 |
return cursor.fetchall()
|
288 |
|
289 |
+
|
290 |
def create_tables():
|
291 |
+
"""Create database tables and initialize FTS indexes."""
|
292 |
with get_db_connection() as conn:
|
293 |
+
cursor = conn.cursor()
|
294 |
+
# Execute the SCHEMA_SQL to create tables and triggers
|
295 |
+
cursor.executescript(SCHEMA_SQL)
|
296 |
+
|
297 |
+
# Check and populate all FTS tables
|
298 |
+
fts_tables = [
|
299 |
+
('rag_qa_notes_fts', 'rag_qa_notes', ['title', 'content']),
|
300 |
+
('rag_qa_chats_fts', 'rag_qa_chats', ['content']),
|
301 |
+
('conversation_metadata_fts', 'conversation_metadata', ['title']),
|
302 |
+
('rag_qa_keywords_fts', 'rag_qa_keywords', ['keyword']),
|
303 |
+
('rag_qa_keyword_collections_fts', 'rag_qa_keyword_collections', ['name'])
|
304 |
+
]
|
305 |
+
|
306 |
+
for fts_table, source_table, columns in fts_tables:
|
307 |
+
# Check if FTS table needs population
|
308 |
+
cursor.execute(f"SELECT COUNT(*) FROM {fts_table}")
|
309 |
+
fts_count = cursor.fetchone()[0]
|
310 |
+
cursor.execute(f"SELECT COUNT(*) FROM {source_table}")
|
311 |
+
source_count = cursor.fetchone()[0]
|
312 |
+
|
313 |
+
if fts_count != source_count:
|
314 |
+
# Repopulate FTS table
|
315 |
+
logger.info(f"Repopulating {fts_table}")
|
316 |
+
cursor.execute(f"DELETE FROM {fts_table}")
|
317 |
+
columns_str = ', '.join(columns)
|
318 |
+
source_columns = ', '.join([f"id" if source_table != 'conversation_metadata' else "rowid"] + columns)
|
319 |
+
cursor.execute(f"""
|
320 |
+
INSERT INTO {fts_table}(rowid, {columns_str})
|
321 |
+
SELECT {source_columns} FROM {source_table}
|
322 |
+
""")
|
323 |
+
|
324 |
+
logger.info("All RAG QA Chat tables and triggers created successfully")
|
325 |
+
|
326 |
|
327 |
# Initialize the database
|
328 |
create_tables()
|
|
|
348 |
raise ValueError("Keyword contains invalid characters")
|
349 |
return keyword.strip()
|
350 |
|
351 |
+
|
352 |
def validate_collection_name(name):
|
353 |
if not isinstance(name, str):
|
354 |
raise ValueError("Collection name must be a string")
|
|
|
360 |
raise ValueError("Collection name contains invalid characters")
|
361 |
return name.strip()
|
362 |
|
363 |
+
|
364 |
# Core functions
|
365 |
def add_keyword(keyword, conn=None):
|
366 |
try:
|
|
|
375 |
logger.error(f"Error adding keyword '{keyword}': {e}")
|
376 |
raise
|
377 |
|
378 |
+
|
379 |
def create_keyword_collection(name, parent_id=None):
|
380 |
try:
|
381 |
validated_name = validate_collection_name(name)
|
|
|
389 |
logger.error(f"Error creating keyword collection '{name}': {e}")
|
390 |
raise
|
391 |
|
392 |
+
|
393 |
def add_keyword_to_collection(collection_name, keyword):
|
394 |
try:
|
395 |
validated_collection_name = validate_collection_name(collection_name)
|
|
|
414 |
logger.error(f"Error adding keyword '{keyword}' to collection '{collection_name}': {e}")
|
415 |
raise
|
416 |
|
417 |
+
|
418 |
def add_keywords_to_conversation(conversation_id, keywords):
|
419 |
if not isinstance(keywords, (list, tuple)):
|
420 |
raise ValueError("Keywords must be a list or tuple")
|
|
|
438 |
logger.error(f"Error adding keywords to conversation '{conversation_id}': {e}")
|
439 |
raise
|
440 |
|
441 |
+
|
442 |
+
def view_rag_keywords():
|
443 |
+
try:
|
444 |
+
rag_db_path = get_rag_qa_db_path()
|
445 |
+
with sqlite3.connect(rag_db_path) as conn:
|
446 |
+
cursor = conn.cursor()
|
447 |
+
cursor.execute("SELECT keyword FROM rag_qa_keywords ORDER BY keyword")
|
448 |
+
keywords = cursor.fetchall()
|
449 |
+
if keywords:
|
450 |
+
keyword_list = [k[0] for k in keywords]
|
451 |
+
return "### Current RAG QA Keywords:\n" + "\n".join(
|
452 |
+
[f"- {k}" for k in keyword_list])
|
453 |
+
return "No keywords found."
|
454 |
+
except Exception as e:
|
455 |
+
return f"Error retrieving keywords: {str(e)}"
|
456 |
+
|
457 |
+
|
458 |
def get_keywords_for_conversation(conversation_id):
|
459 |
try:
|
460 |
query = '''
|
|
|
471 |
logger.error(f"Error getting keywords for conversation '{conversation_id}': {e}")
|
472 |
raise
|
473 |
|
474 |
+
|
475 |
def get_keywords_for_collection(collection_name):
|
476 |
try:
|
477 |
query = '''
|
|
|
489 |
logger.error(f"Error getting keywords for collection '{collection_name}': {e}")
|
490 |
raise
|
491 |
|
492 |
+
|
493 |
+
def delete_rag_keyword(keyword: str) -> str:
|
494 |
+
"""
|
495 |
+
Delete a keyword from the RAG QA database and all its associations.
|
496 |
+
|
497 |
+
Args:
|
498 |
+
keyword (str): The keyword to delete
|
499 |
+
|
500 |
+
Returns:
|
501 |
+
str: Success/failure message
|
502 |
+
"""
|
503 |
+
try:
|
504 |
+
# Validate the keyword
|
505 |
+
validated_keyword = validate_keyword(keyword)
|
506 |
+
|
507 |
+
with transaction() as conn:
|
508 |
+
# First, get the keyword ID
|
509 |
+
cursor = conn.cursor()
|
510 |
+
cursor.execute("SELECT id FROM rag_qa_keywords WHERE keyword = ?", (validated_keyword,))
|
511 |
+
result = cursor.fetchone()
|
512 |
+
|
513 |
+
if not result:
|
514 |
+
return f"Keyword '{validated_keyword}' not found."
|
515 |
+
|
516 |
+
keyword_id = result[0]
|
517 |
+
|
518 |
+
# Delete from all associated tables
|
519 |
+
cursor.execute("DELETE FROM rag_qa_conversation_keywords WHERE keyword_id = ?", (keyword_id,))
|
520 |
+
cursor.execute("DELETE FROM rag_qa_collection_keywords WHERE keyword_id = ?", (keyword_id,))
|
521 |
+
cursor.execute("DELETE FROM rag_qa_note_keywords WHERE keyword_id = ?", (keyword_id,))
|
522 |
+
|
523 |
+
# Finally, delete the keyword itself
|
524 |
+
cursor.execute("DELETE FROM rag_qa_keywords WHERE id = ?", (keyword_id,))
|
525 |
+
|
526 |
+
logger.info(f"Keyword '{validated_keyword}' deleted successfully")
|
527 |
+
return f"Successfully deleted keyword '{validated_keyword}' and all its associations."
|
528 |
+
|
529 |
+
except ValueError as e:
|
530 |
+
error_msg = f"Invalid keyword: {str(e)}"
|
531 |
+
logger.error(error_msg)
|
532 |
+
return error_msg
|
533 |
+
except Exception as e:
|
534 |
+
error_msg = f"Error deleting keyword: {str(e)}"
|
535 |
+
logger.error(error_msg)
|
536 |
+
return error_msg
|
537 |
+
|
538 |
+
|
539 |
+
def export_rag_keywords_to_csv() -> Tuple[str, str]:
|
540 |
+
"""
|
541 |
+
Export all RAG QA keywords to a CSV file.
|
542 |
+
|
543 |
+
Returns:
|
544 |
+
Tuple[str, str]: (status_message, file_path)
|
545 |
+
"""
|
546 |
+
import csv
|
547 |
+
from tempfile import NamedTemporaryFile
|
548 |
+
from datetime import datetime
|
549 |
+
|
550 |
+
try:
|
551 |
+
# Create a temporary CSV file
|
552 |
+
temp_file = NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', newline='')
|
553 |
+
|
554 |
+
with transaction() as conn:
|
555 |
+
cursor = conn.cursor()
|
556 |
+
|
557 |
+
# Get all keywords and their associations
|
558 |
+
query = """
|
559 |
+
SELECT
|
560 |
+
k.keyword,
|
561 |
+
GROUP_CONCAT(DISTINCT c.name) as collections,
|
562 |
+
COUNT(DISTINCT ck.conversation_id) as num_conversations,
|
563 |
+
COUNT(DISTINCT nk.note_id) as num_notes
|
564 |
+
FROM rag_qa_keywords k
|
565 |
+
LEFT JOIN rag_qa_collection_keywords col_k ON k.id = col_k.keyword_id
|
566 |
+
LEFT JOIN rag_qa_keyword_collections c ON col_k.collection_id = c.id
|
567 |
+
LEFT JOIN rag_qa_conversation_keywords ck ON k.id = ck.keyword_id
|
568 |
+
LEFT JOIN rag_qa_note_keywords nk ON k.id = nk.keyword_id
|
569 |
+
GROUP BY k.id, k.keyword
|
570 |
+
ORDER BY k.keyword
|
571 |
+
"""
|
572 |
+
|
573 |
+
cursor.execute(query)
|
574 |
+
results = cursor.fetchall()
|
575 |
+
|
576 |
+
# Write to CSV
|
577 |
+
writer = csv.writer(temp_file)
|
578 |
+
writer.writerow(['Keyword', 'Collections', 'Number of Conversations', 'Number of Notes'])
|
579 |
+
|
580 |
+
for row in results:
|
581 |
+
writer.writerow([
|
582 |
+
row[0], # keyword
|
583 |
+
row[1] if row[1] else '', # collections (may be None)
|
584 |
+
row[2], # num_conversations
|
585 |
+
row[3] # num_notes
|
586 |
+
])
|
587 |
+
|
588 |
+
temp_file.close()
|
589 |
+
|
590 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
591 |
+
status_msg = f"Successfully exported {len(results)} keywords to CSV."
|
592 |
+
logger.info(status_msg)
|
593 |
+
|
594 |
+
return status_msg, temp_file.name
|
595 |
+
|
596 |
+
except Exception as e:
|
597 |
+
error_msg = f"Error exporting keywords: {str(e)}"
|
598 |
+
logger.error(error_msg)
|
599 |
+
return error_msg, ""
|
600 |
+
|
601 |
+
|
602 |
#
|
603 |
# End of Keyword-related functions
|
604 |
###################################################
|
|
|
623 |
logger.error(f"Error saving notes for conversation '{conversation_id}': {e}")
|
624 |
raise
|
625 |
|
626 |
+
|
627 |
def update_note(note_id, title, content):
|
628 |
try:
|
629 |
query = "UPDATE rag_qa_notes SET title = ?, content = ?, timestamp = ? WHERE id = ?"
|
|
|
634 |
logger.error(f"Error updating note ID '{note_id}': {e}")
|
635 |
raise
|
636 |
|
637 |
+
|
638 |
+
def search_notes_titles(search_term: str, page: int = 1, results_per_page: int = 20, connection=None) -> Tuple[
|
639 |
+
List[Tuple], int, int]:
|
640 |
+
"""
|
641 |
+
Search note titles using full-text search. Returns all notes if search_term is empty.
|
642 |
+
|
643 |
+
Args:
|
644 |
+
search_term (str): The search term for note titles. If empty, returns all notes.
|
645 |
+
page (int, optional): Page number for pagination. Defaults to 1.
|
646 |
+
results_per_page (int, optional): Number of results per page. Defaults to 20.
|
647 |
+
connection (sqlite3.Connection, optional): Database connection. Uses new connection if not provided.
|
648 |
+
|
649 |
+
Returns:
|
650 |
+
Tuple[List[Tuple], int, int]: Tuple containing:
|
651 |
+
- List of tuples: (note_id, title, content, timestamp, conversation_id)
|
652 |
+
- Total number of pages
|
653 |
+
- Total count of matching records
|
654 |
+
|
655 |
+
Raises:
|
656 |
+
ValueError: If page number is less than 1
|
657 |
+
sqlite3.Error: If there's a database error
|
658 |
+
"""
|
659 |
+
if page < 1:
|
660 |
+
raise ValueError("Page number must be 1 or greater.")
|
661 |
+
|
662 |
+
offset = (page - 1) * results_per_page
|
663 |
+
|
664 |
+
def execute_search(conn):
|
665 |
+
cursor = conn.cursor()
|
666 |
+
|
667 |
+
# Debug: Show table contents
|
668 |
+
cursor.execute("SELECT title FROM rag_qa_notes")
|
669 |
+
main_titles = cursor.fetchall()
|
670 |
+
logger.debug(f"Main table titles: {main_titles}")
|
671 |
+
|
672 |
+
cursor.execute("SELECT title FROM rag_qa_notes_fts")
|
673 |
+
fts_titles = cursor.fetchall()
|
674 |
+
logger.debug(f"FTS table titles: {fts_titles}")
|
675 |
+
|
676 |
+
if not search_term.strip():
|
677 |
+
# Query for all notes
|
678 |
+
cursor.execute(
|
679 |
+
"""
|
680 |
+
SELECT COUNT(*)
|
681 |
+
FROM rag_qa_notes
|
682 |
+
"""
|
683 |
+
)
|
684 |
+
total_count = cursor.fetchone()[0]
|
685 |
+
|
686 |
+
cursor.execute(
|
687 |
+
"""
|
688 |
+
SELECT id, title, content, timestamp, conversation_id
|
689 |
+
FROM rag_qa_notes
|
690 |
+
ORDER BY timestamp DESC
|
691 |
+
LIMIT ? OFFSET ?
|
692 |
+
""",
|
693 |
+
(results_per_page, offset)
|
694 |
+
)
|
695 |
+
results = cursor.fetchall()
|
696 |
+
else:
|
697 |
+
# Search query
|
698 |
+
search_term_clean = search_term.strip().lower()
|
699 |
+
|
700 |
+
# Test direct FTS search
|
701 |
+
cursor.execute(
|
702 |
+
"""
|
703 |
+
SELECT COUNT(*)
|
704 |
+
FROM rag_qa_notes n
|
705 |
+
JOIN rag_qa_notes_fts fts ON n.id = fts.rowid
|
706 |
+
WHERE fts.title MATCH ?
|
707 |
+
""",
|
708 |
+
(search_term_clean,)
|
709 |
+
)
|
710 |
+
total_count = cursor.fetchone()[0]
|
711 |
+
|
712 |
+
cursor.execute(
|
713 |
+
"""
|
714 |
+
SELECT
|
715 |
+
n.id,
|
716 |
+
n.title,
|
717 |
+
n.content,
|
718 |
+
n.timestamp,
|
719 |
+
n.conversation_id
|
720 |
+
FROM rag_qa_notes n
|
721 |
+
JOIN rag_qa_notes_fts fts ON n.id = fts.rowid
|
722 |
+
WHERE fts.title MATCH ?
|
723 |
+
ORDER BY rank
|
724 |
+
LIMIT ? OFFSET ?
|
725 |
+
""",
|
726 |
+
(search_term_clean, results_per_page, offset)
|
727 |
+
)
|
728 |
+
results = cursor.fetchall()
|
729 |
+
|
730 |
+
logger.debug(f"Search term: {search_term_clean}")
|
731 |
+
logger.debug(f"Results: {results}")
|
732 |
+
|
733 |
+
total_pages = max(1, (total_count + results_per_page - 1) // results_per_page)
|
734 |
+
logger.info(f"Found {total_count} matching notes for search term '{search_term}'")
|
735 |
+
|
736 |
+
return results, total_pages, total_count
|
737 |
+
|
738 |
+
try:
|
739 |
+
if connection:
|
740 |
+
return execute_search(connection)
|
741 |
+
else:
|
742 |
+
with get_db_connection() as conn:
|
743 |
+
return execute_search(conn)
|
744 |
+
|
745 |
+
except sqlite3.Error as e:
|
746 |
+
logger.error(f"Database error in search_notes_titles: {str(e)}")
|
747 |
+
logger.error(f"Search term: {search_term}")
|
748 |
+
raise sqlite3.Error(f"Error searching notes: {str(e)}")
|
749 |
+
|
750 |
+
|
751 |
+
|
752 |
def get_notes(conversation_id):
|
753 |
"""Retrieve notes for a given conversation."""
|
754 |
try:
|
|
|
761 |
logger.error(f"Error getting notes for conversation '{conversation_id}': {e}")
|
762 |
raise
|
763 |
|
764 |
+
|
765 |
def get_note_by_id(note_id):
|
766 |
try:
|
767 |
query = "SELECT id, title, content FROM rag_qa_notes WHERE id = ?"
|
|
|
771 |
logger.error(f"Error getting note by ID '{note_id}': {e}")
|
772 |
raise
|
773 |
|
774 |
+
|
775 |
def get_notes_by_keywords(keywords, page=1, page_size=20):
|
776 |
try:
|
777 |
+
# Handle empty or invalid keywords
|
778 |
+
if not keywords or not isinstance(keywords, (list, tuple)) or len(keywords) == 0:
|
779 |
+
return [], 0, 0
|
780 |
+
|
781 |
+
# Convert all keywords to strings and strip them
|
782 |
+
clean_keywords = [str(k).strip() for k in keywords if k is not None and str(k).strip()]
|
783 |
+
|
784 |
+
# If no valid keywords after cleaning, return empty result
|
785 |
+
if not clean_keywords:
|
786 |
+
return [], 0, 0
|
787 |
+
|
788 |
+
placeholders = ','.join(['?'] * len(clean_keywords))
|
789 |
query = f'''
|
790 |
SELECT n.id, n.title, n.content, n.timestamp
|
791 |
FROM rag_qa_notes n
|
|
|
794 |
WHERE k.keyword IN ({placeholders})
|
795 |
ORDER BY n.timestamp DESC
|
796 |
'''
|
797 |
+
results, total_pages, total_count = get_paginated_results(query, tuple(clean_keywords), page, page_size)
|
798 |
+
logger.info(f"Retrieved {len(results)} notes matching keywords: {', '.join(clean_keywords)} (page {page} of {total_pages})")
|
799 |
notes = [(row[0], row[1], row[2], row[3]) for row in results]
|
800 |
return notes, total_pages, total_count
|
801 |
except Exception as e:
|
802 |
logger.error(f"Error getting notes by keywords: {e}")
|
803 |
raise
|
804 |
|
805 |
+
|
806 |
def get_notes_by_keyword_collection(collection_name, page=1, page_size=20):
|
807 |
try:
|
808 |
query = '''
|
|
|
915 |
#
|
916 |
# Chat-related functions
|
917 |
|
918 |
+
def save_message(conversation_id, role, content, timestamp=None):
|
919 |
try:
|
920 |
+
if timestamp is None:
|
921 |
+
timestamp = datetime.now().isoformat()
|
922 |
query = "INSERT INTO rag_qa_chats (conversation_id, timestamp, role, content) VALUES (?, ?, ?, ?)"
|
923 |
execute_query(query, (conversation_id, timestamp, role, content))
|
924 |
|
|
|
931 |
logger.error(f"Error saving message for conversation '{conversation_id}': {e}")
|
932 |
raise
|
933 |
|
934 |
+
|
935 |
+
def start_new_conversation(title="Untitled Conversation", media_id=None):
|
936 |
try:
|
937 |
conversation_id = str(uuid.uuid4())
|
938 |
+
query = """
|
939 |
+
INSERT INTO conversation_metadata (
|
940 |
+
conversation_id, created_at, last_updated, title, media_id, rating
|
941 |
+
) VALUES (?, ?, ?, ?, ?, ?)
|
942 |
+
"""
|
943 |
now = datetime.now().isoformat()
|
944 |
+
# Set initial rating to NULL
|
945 |
+
execute_query(query, (conversation_id, now, now, title, media_id, None))
|
946 |
+
logger.info(f"New conversation '{conversation_id}' started with title '{title}' and media_id '{media_id}'")
|
947 |
return conversation_id
|
948 |
except Exception as e:
|
949 |
logger.error(f"Error starting new conversation: {e}")
|
950 |
raise
|
951 |
|
952 |
+
|
953 |
def get_all_conversations(page=1, page_size=20):
|
954 |
try:
|
955 |
+
query = """
|
956 |
+
SELECT conversation_id, title, media_id, rating
|
957 |
+
FROM conversation_metadata
|
958 |
+
ORDER BY last_updated DESC
|
959 |
+
LIMIT ? OFFSET ?
|
960 |
+
"""
|
961 |
+
|
962 |
+
count_query = "SELECT COUNT(*) FROM conversation_metadata"
|
963 |
+
db_path = get_rag_qa_db_path()
|
964 |
+
with sqlite3.connect(db_path) as conn:
|
965 |
+
cursor = conn.cursor()
|
966 |
+
|
967 |
+
# Get total count
|
968 |
+
cursor.execute(count_query)
|
969 |
+
total_count = cursor.fetchone()[0]
|
970 |
+
total_pages = (total_count + page_size - 1) // page_size
|
971 |
+
|
972 |
+
# Get page of results
|
973 |
+
offset = (page - 1) * page_size
|
974 |
+
cursor.execute(query, (page_size, offset))
|
975 |
+
results = cursor.fetchall()
|
976 |
+
|
977 |
+
conversations = [{
|
978 |
+
'conversation_id': row[0],
|
979 |
+
'title': row[1],
|
980 |
+
'media_id': row[2],
|
981 |
+
'rating': row[3] # Include rating
|
982 |
+
} for row in results]
|
983 |
+
return conversations, total_pages, total_count
|
984 |
+
except Exception as e:
|
985 |
+
logging.error(f"Error getting conversations: {e}")
|
986 |
+
raise
|
987 |
+
|
988 |
+
|
989 |
+
def get_all_notes(page=1, page_size=20):
|
990 |
+
try:
|
991 |
+
query = """
|
992 |
+
SELECT n.id, n.conversation_id, n.title, n.content, n.timestamp,
|
993 |
+
cm.title as conversation_title, cm.media_id
|
994 |
+
FROM rag_qa_notes n
|
995 |
+
LEFT JOIN conversation_metadata cm ON n.conversation_id = cm.conversation_id
|
996 |
+
ORDER BY n.timestamp DESC
|
997 |
+
LIMIT ? OFFSET ?
|
998 |
+
"""
|
999 |
+
|
1000 |
+
count_query = "SELECT COUNT(*) FROM rag_qa_notes"
|
1001 |
+
db_path = get_rag_qa_db_path()
|
1002 |
+
with sqlite3.connect(db_path) as conn:
|
1003 |
+
cursor = conn.cursor()
|
1004 |
+
|
1005 |
+
# Get total count
|
1006 |
+
cursor.execute(count_query)
|
1007 |
+
total_count = cursor.fetchone()[0]
|
1008 |
+
total_pages = (total_count + page_size - 1) // page_size
|
1009 |
+
|
1010 |
+
# Get page of results
|
1011 |
+
offset = (page - 1) * page_size
|
1012 |
+
cursor.execute(query, (page_size, offset))
|
1013 |
+
results = cursor.fetchall()
|
1014 |
+
|
1015 |
+
notes = [{
|
1016 |
+
'id': row[0],
|
1017 |
+
'conversation_id': row[1],
|
1018 |
+
'title': row[2],
|
1019 |
+
'content': row[3],
|
1020 |
+
'timestamp': row[4],
|
1021 |
+
'conversation_title': row[5],
|
1022 |
+
'media_id': row[6]
|
1023 |
+
} for row in results]
|
1024 |
+
|
1025 |
+
return notes, total_pages, total_count
|
1026 |
except Exception as e:
|
1027 |
+
logging.error(f"Error getting notes: {e}")
|
1028 |
raise
|
1029 |
|
1030 |
+
|
1031 |
# Pagination helper function
|
1032 |
def get_paginated_results(query, params=None, page=1, page_size=20):
|
1033 |
try:
|
|
|
1053 |
logger.error(f"Error retrieving paginated results: {e}")
|
1054 |
raise
|
1055 |
|
1056 |
+
|
1057 |
def get_all_collections(page=1, page_size=20):
|
1058 |
try:
|
1059 |
query = "SELECT name FROM rag_qa_keyword_collections"
|
|
|
1065 |
logger.error(f"Error getting collections: {e}")
|
1066 |
raise
|
1067 |
|
1068 |
+
|
1069 |
+
def search_conversations_by_keywords(keywords=None, title_query=None, content_query=None, page=1, page_size=20):
|
1070 |
try:
|
1071 |
+
# Base query starts with conversation metadata
|
1072 |
+
query = """
|
1073 |
+
SELECT DISTINCT cm.conversation_id, cm.title, cm.last_updated
|
1074 |
FROM conversation_metadata cm
|
1075 |
+
WHERE 1=1
|
1076 |
+
"""
|
1077 |
+
params = []
|
1078 |
+
|
1079 |
+
# Add content search if provided
|
1080 |
+
if content_query and isinstance(content_query, str) and content_query.strip():
|
1081 |
+
query += """
|
1082 |
+
AND EXISTS (
|
1083 |
+
SELECT 1 FROM rag_qa_chats_fts
|
1084 |
+
WHERE rag_qa_chats_fts.content MATCH ?
|
1085 |
+
AND rag_qa_chats_fts.rowid IN (
|
1086 |
+
SELECT id FROM rag_qa_chats
|
1087 |
+
WHERE conversation_id = cm.conversation_id
|
1088 |
+
)
|
1089 |
+
)
|
1090 |
+
"""
|
1091 |
+
params.append(content_query.strip())
|
1092 |
+
|
1093 |
+
# Add title search if provided
|
1094 |
+
if title_query and isinstance(title_query, str) and title_query.strip():
|
1095 |
+
query += """
|
1096 |
+
AND EXISTS (
|
1097 |
+
SELECT 1 FROM conversation_metadata_fts
|
1098 |
+
WHERE conversation_metadata_fts.title MATCH ?
|
1099 |
+
AND conversation_metadata_fts.rowid = cm.rowid
|
1100 |
+
)
|
1101 |
+
"""
|
1102 |
+
params.append(title_query.strip())
|
1103 |
+
|
1104 |
+
# Add keyword search if provided
|
1105 |
+
if keywords and isinstance(keywords, (list, tuple)) and len(keywords) > 0:
|
1106 |
+
# Convert all keywords to strings and strip them
|
1107 |
+
clean_keywords = [str(k).strip() for k in keywords if k is not None and str(k).strip()]
|
1108 |
+
if clean_keywords: # Only add to query if we have valid keywords
|
1109 |
+
placeholders = ','.join(['?' for _ in clean_keywords])
|
1110 |
+
query += f"""
|
1111 |
+
AND EXISTS (
|
1112 |
+
SELECT 1 FROM rag_qa_conversation_keywords ck
|
1113 |
+
JOIN rag_qa_keywords k ON ck.keyword_id = k.id
|
1114 |
+
WHERE ck.conversation_id = cm.conversation_id
|
1115 |
+
AND k.keyword IN ({placeholders})
|
1116 |
+
)
|
1117 |
+
"""
|
1118 |
+
params.extend(clean_keywords)
|
1119 |
+
|
1120 |
+
# Add ordering
|
1121 |
+
query += " ORDER BY cm.last_updated DESC"
|
1122 |
+
|
1123 |
+
results, total_pages, total_count = get_paginated_results(query, tuple(params), page, page_size)
|
1124 |
+
|
1125 |
+
conversations = [
|
1126 |
+
{
|
1127 |
+
'conversation_id': row[0],
|
1128 |
+
'title': row[1],
|
1129 |
+
'last_updated': row[2]
|
1130 |
+
}
|
1131 |
+
for row in results
|
1132 |
+
]
|
1133 |
+
|
1134 |
+
return conversations, total_pages, total_count
|
1135 |
+
|
1136 |
except Exception as e:
|
1137 |
+
logger.error(f"Error searching conversations: {e}")
|
1138 |
raise
|
1139 |
|
1140 |
+
|
1141 |
def load_chat_history(conversation_id, page=1, page_size=50):
|
1142 |
try:
|
1143 |
query = "SELECT role, content FROM rag_qa_chats WHERE conversation_id = ? ORDER BY timestamp"
|
|
|
1149 |
logger.error(f"Error loading chat history for conversation '{conversation_id}': {e}")
|
1150 |
raise
|
1151 |
|
1152 |
+
|
1153 |
def update_conversation_title(conversation_id, new_title):
|
1154 |
"""Update the title of a conversation."""
|
1155 |
try:
|
|
|
1160 |
logger.error(f"Error updating conversation title: {e}")
|
1161 |
raise
|
1162 |
|
1163 |
+
|
1164 |
+
def delete_messages_in_conversation(conversation_id):
|
1165 |
+
"""Helper function to delete all messages in a conversation."""
|
1166 |
+
try:
|
1167 |
+
execute_query("DELETE FROM rag_qa_chats WHERE conversation_id = ?", (conversation_id,))
|
1168 |
+
logging.info(f"Messages in conversation '{conversation_id}' deleted successfully.")
|
1169 |
+
except Exception as e:
|
1170 |
+
logging.error(f"Error deleting messages in conversation '{conversation_id}': {e}")
|
1171 |
+
raise
|
1172 |
+
|
1173 |
+
|
1174 |
+
def get_conversation_title(conversation_id):
|
1175 |
+
"""Helper function to get the conversation title."""
|
1176 |
+
query = "SELECT title FROM conversation_metadata WHERE conversation_id = ?"
|
1177 |
+
result = execute_query(query, (conversation_id,))
|
1178 |
+
if result:
|
1179 |
+
return result[0][0]
|
1180 |
+
else:
|
1181 |
+
return "Untitled Conversation"
|
1182 |
+
|
1183 |
+
|
1184 |
+
def get_conversation_text(conversation_id):
|
1185 |
+
try:
|
1186 |
+
query = """
|
1187 |
+
SELECT role, content
|
1188 |
+
FROM rag_qa_chats
|
1189 |
+
WHERE conversation_id = ?
|
1190 |
+
ORDER BY timestamp ASC
|
1191 |
+
"""
|
1192 |
+
|
1193 |
+
messages = []
|
1194 |
+
# Use the connection as a context manager
|
1195 |
+
db_path = get_rag_qa_db_path()
|
1196 |
+
with sqlite3.connect(db_path) as conn:
|
1197 |
+
cursor = conn.cursor()
|
1198 |
+
cursor.execute(query, (conversation_id,))
|
1199 |
+
messages = cursor.fetchall()
|
1200 |
+
|
1201 |
+
return "\n\n".join([f"{msg[0]}: {msg[1]}" for msg in messages])
|
1202 |
+
except Exception as e:
|
1203 |
+
logger.error(f"Error getting conversation text: {e}")
|
1204 |
+
raise
|
1205 |
+
|
1206 |
+
|
1207 |
+
def get_conversation_details(conversation_id):
|
1208 |
+
query = "SELECT title, media_id, rating FROM conversation_metadata WHERE conversation_id = ?"
|
1209 |
+
result = execute_query(query, (conversation_id,))
|
1210 |
+
if result:
|
1211 |
+
return {'title': result[0][0], 'media_id': result[0][1], 'rating': result[0][2]}
|
1212 |
+
else:
|
1213 |
+
return {'title': "Untitled Conversation", 'media_id': None, 'rating': None}
|
1214 |
+
|
1215 |
+
|
1216 |
def delete_conversation(conversation_id):
|
1217 |
"""Delete a conversation and its associated messages and notes."""
|
1218 |
try:
|
|
|
1232 |
logger.error(f"Error deleting conversation '{conversation_id}': {e}")
|
1233 |
raise
|
1234 |
|
1235 |
+
def set_conversation_rating(conversation_id, rating):
|
1236 |
+
"""Set the rating for a conversation."""
|
1237 |
+
# Validate rating
|
1238 |
+
if rating not in [1, 2, 3]:
|
1239 |
+
raise ValueError('Rating must be an integer between 1 and 3.')
|
1240 |
+
try:
|
1241 |
+
query = "UPDATE conversation_metadata SET rating = ? WHERE conversation_id = ?"
|
1242 |
+
execute_query(query, (rating, conversation_id))
|
1243 |
+
logger.info(f"Rating for conversation '{conversation_id}' set to {rating}")
|
1244 |
+
except Exception as e:
|
1245 |
+
logger.error(f"Error setting rating for conversation '{conversation_id}': {e}")
|
1246 |
+
raise
|
1247 |
+
|
1248 |
+
def get_conversation_rating(conversation_id):
|
1249 |
+
"""Get the rating of a conversation."""
|
1250 |
+
try:
|
1251 |
+
query = "SELECT rating FROM conversation_metadata WHERE conversation_id = ?"
|
1252 |
+
result = execute_query(query, (conversation_id,))
|
1253 |
+
if result:
|
1254 |
+
rating = result[0][0]
|
1255 |
+
logger.info(f"Rating for conversation '{conversation_id}' is {rating}")
|
1256 |
+
return rating
|
1257 |
+
else:
|
1258 |
+
logger.warning(f"Conversation '{conversation_id}' not found.")
|
1259 |
+
return None
|
1260 |
+
except Exception as e:
|
1261 |
+
logger.error(f"Error getting rating for conversation '{conversation_id}': {e}")
|
1262 |
+
raise
|
1263 |
+
|
1264 |
+
|
1265 |
+
def get_conversation_name(conversation_id: str) -> str:
|
1266 |
+
"""
|
1267 |
+
Retrieves the title/name of a conversation from the conversation_metadata table.
|
1268 |
+
|
1269 |
+
Args:
|
1270 |
+
conversation_id (str): The unique identifier of the conversation
|
1271 |
+
|
1272 |
+
Returns:
|
1273 |
+
str: The title of the conversation if found, "Untitled Conversation" if not found
|
1274 |
+
|
1275 |
+
Raises:
|
1276 |
+
sqlite3.Error: If there's a database error
|
1277 |
+
"""
|
1278 |
+
try:
|
1279 |
+
with get_db_connection() as conn:
|
1280 |
+
cursor = conn.cursor()
|
1281 |
+
cursor.execute(
|
1282 |
+
"SELECT title FROM conversation_metadata WHERE conversation_id = ?",
|
1283 |
+
(conversation_id,)
|
1284 |
+
)
|
1285 |
+
result = cursor.fetchone()
|
1286 |
+
|
1287 |
+
if result:
|
1288 |
+
return result[0]
|
1289 |
+
else:
|
1290 |
+
logging.warning(f"No conversation found with ID: {conversation_id}")
|
1291 |
+
return "Untitled Conversation"
|
1292 |
+
|
1293 |
+
except sqlite3.Error as e:
|
1294 |
+
logging.error(f"Database error retrieving conversation name for ID {conversation_id}: {e}")
|
1295 |
+
raise
|
1296 |
+
except Exception as e:
|
1297 |
+
logging.error(f"Unexpected error retrieving conversation name for ID {conversation_id}: {e}")
|
1298 |
+
raise
|
1299 |
+
|
1300 |
+
|
1301 |
+
def search_rag_chat(query: str, fts_top_k: int = 10, relevant_media_ids: List[str] = None) -> List[Dict[str, Any]]:
|
1302 |
+
"""
|
1303 |
+
Perform a full-text search on the RAG Chat database.
|
1304 |
+
|
1305 |
+
Args:
|
1306 |
+
query: Search query string.
|
1307 |
+
fts_top_k: Maximum number of results to return.
|
1308 |
+
relevant_media_ids: Optional list of media IDs to filter results.
|
1309 |
+
|
1310 |
+
Returns:
|
1311 |
+
List of search results with content and metadata.
|
1312 |
+
"""
|
1313 |
+
if not query.strip():
|
1314 |
+
return []
|
1315 |
+
|
1316 |
+
try:
|
1317 |
+
db_path = get_rag_qa_db_path()
|
1318 |
+
with sqlite3.connect(db_path) as conn:
|
1319 |
+
cursor = conn.cursor()
|
1320 |
+
# Perform the full-text search using the FTS virtual table
|
1321 |
+
cursor.execute("""
|
1322 |
+
SELECT rag_qa_chats.id, rag_qa_chats.conversation_id, rag_qa_chats.role, rag_qa_chats.content
|
1323 |
+
FROM rag_qa_chats_fts
|
1324 |
+
JOIN rag_qa_chats ON rag_qa_chats_fts.rowid = rag_qa_chats.id
|
1325 |
+
WHERE rag_qa_chats_fts MATCH ?
|
1326 |
+
LIMIT ?
|
1327 |
+
""", (query, fts_top_k))
|
1328 |
+
|
1329 |
+
rows = cursor.fetchall()
|
1330 |
+
columns = [description[0] for description in cursor.description]
|
1331 |
+
results = [dict(zip(columns, row)) for row in rows]
|
1332 |
+
|
1333 |
+
# Filter by relevant_media_ids if provided
|
1334 |
+
if relevant_media_ids is not None:
|
1335 |
+
results = [
|
1336 |
+
r for r in results
|
1337 |
+
if get_conversation_details(r['conversation_id']).get('media_id') in relevant_media_ids
|
1338 |
+
]
|
1339 |
+
|
1340 |
+
# Format results
|
1341 |
+
formatted_results = [
|
1342 |
+
{
|
1343 |
+
"content": r['content'],
|
1344 |
+
"metadata": {
|
1345 |
+
"conversation_id": r['conversation_id'],
|
1346 |
+
"role": r['role'],
|
1347 |
+
"media_id": get_conversation_details(r['conversation_id']).get('media_id')
|
1348 |
+
}
|
1349 |
+
}
|
1350 |
+
for r in results
|
1351 |
+
]
|
1352 |
+
return formatted_results
|
1353 |
+
|
1354 |
+
except Exception as e:
|
1355 |
+
logging.error(f"Error in search_rag_chat: {e}")
|
1356 |
+
return []
|
1357 |
+
|
1358 |
+
|
1359 |
+
def search_rag_notes(query: str, fts_top_k: int = 10, relevant_media_ids: List[str] = None) -> List[Dict[str, Any]]:
|
1360 |
+
"""
|
1361 |
+
Perform a full-text search on the RAG Notes database.
|
1362 |
+
|
1363 |
+
Args:
|
1364 |
+
query: Search query string.
|
1365 |
+
fts_top_k: Maximum number of results to return.
|
1366 |
+
relevant_media_ids: Optional list of media IDs to filter results.
|
1367 |
+
|
1368 |
+
Returns:
|
1369 |
+
List of search results with content and metadata.
|
1370 |
+
"""
|
1371 |
+
if not query.strip():
|
1372 |
+
return []
|
1373 |
+
|
1374 |
+
try:
|
1375 |
+
db_path = get_rag_qa_db_path()
|
1376 |
+
with sqlite3.connect(db_path) as conn:
|
1377 |
+
cursor = conn.cursor()
|
1378 |
+
# Perform the full-text search using the FTS virtual table
|
1379 |
+
cursor.execute("""
|
1380 |
+
SELECT rag_qa_notes.id, rag_qa_notes.title, rag_qa_notes.content, rag_qa_notes.conversation_id
|
1381 |
+
FROM rag_qa_notes_fts
|
1382 |
+
JOIN rag_qa_notes ON rag_qa_notes_fts.rowid = rag_qa_notes.id
|
1383 |
+
WHERE rag_qa_notes_fts MATCH ?
|
1384 |
+
LIMIT ?
|
1385 |
+
""", (query, fts_top_k))
|
1386 |
+
|
1387 |
+
rows = cursor.fetchall()
|
1388 |
+
columns = [description[0] for description in cursor.description]
|
1389 |
+
results = [dict(zip(columns, row)) for row in rows]
|
1390 |
+
|
1391 |
+
# Filter by relevant_media_ids if provided
|
1392 |
+
if relevant_media_ids is not None:
|
1393 |
+
results = [
|
1394 |
+
r for r in results
|
1395 |
+
if get_conversation_details(r['conversation_id']).get('media_id') in relevant_media_ids
|
1396 |
+
]
|
1397 |
+
|
1398 |
+
# Format results
|
1399 |
+
formatted_results = [
|
1400 |
+
{
|
1401 |
+
"content": r['content'],
|
1402 |
+
"metadata": {
|
1403 |
+
"note_id": r['id'],
|
1404 |
+
"title": r['title'],
|
1405 |
+
"conversation_id": r['conversation_id'],
|
1406 |
+
"media_id": get_conversation_details(r['conversation_id']).get('media_id')
|
1407 |
+
}
|
1408 |
+
}
|
1409 |
+
for r in results
|
1410 |
+
]
|
1411 |
+
return formatted_results
|
1412 |
+
|
1413 |
+
except Exception as e:
|
1414 |
+
logging.error(f"Error in search_rag_notes: {e}")
|
1415 |
+
return []
|
1416 |
+
|
1417 |
#
|
1418 |
# End of Chat-related functions
|
1419 |
###################################################
|
1420 |
|
1421 |
|
1422 |
+
###################################################
|
1423 |
+
#
|
1424 |
+
# Import functions
|
1425 |
+
|
1426 |
+
|
1427 |
+
#
|
1428 |
+
# End of Import functions
|
1429 |
+
###################################################
|
1430 |
+
|
1431 |
+
|
1432 |
###################################################
|
1433 |
#
|
1434 |
# Functions to export DB data
|
App_Function_Libraries/DB/SQLite_DB.py
CHANGED
@@ -21,7 +21,7 @@ import configparser
|
|
21 |
# 11. browse_items(search_query, search_type)
|
22 |
# 12. fetch_item_details(media_id: int)
|
23 |
# 13. add_media_version(media_id: int, prompt: str, summary: str)
|
24 |
-
# 14.
|
25 |
# 15. search_and_display(search_query, search_fields, keywords, page)
|
26 |
# 16. display_details(index, results)
|
27 |
# 17. get_details(index, dataframe)
|
@@ -55,12 +55,14 @@ import re
|
|
55 |
import shutil
|
56 |
import sqlite3
|
57 |
import threading
|
|
|
58 |
import traceback
|
59 |
from contextlib import contextmanager
|
60 |
from datetime import datetime, timedelta
|
61 |
from typing import List, Tuple, Dict, Any, Optional
|
62 |
from urllib.parse import quote
|
63 |
|
|
|
64 |
# Local Libraries
|
65 |
from App_Function_Libraries.Utils.Utils import get_project_relative_path, get_database_path, \
|
66 |
get_database_dir
|
@@ -342,27 +344,6 @@ def create_tables(db) -> None:
|
|
342 |
)
|
343 |
''',
|
344 |
'''
|
345 |
-
CREATE TABLE IF NOT EXISTS ChatConversations (
|
346 |
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
347 |
-
media_id INTEGER,
|
348 |
-
media_name TEXT,
|
349 |
-
conversation_name TEXT,
|
350 |
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
351 |
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
352 |
-
FOREIGN KEY (media_id) REFERENCES Media(id)
|
353 |
-
)
|
354 |
-
''',
|
355 |
-
'''
|
356 |
-
CREATE TABLE IF NOT EXISTS ChatMessages (
|
357 |
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
358 |
-
conversation_id INTEGER,
|
359 |
-
sender TEXT,
|
360 |
-
message TEXT,
|
361 |
-
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
362 |
-
FOREIGN KEY (conversation_id) REFERENCES ChatConversations(id)
|
363 |
-
)
|
364 |
-
''',
|
365 |
-
'''
|
366 |
CREATE TABLE IF NOT EXISTS Transcripts (
|
367 |
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
368 |
media_id INTEGER,
|
@@ -421,8 +402,6 @@ def create_tables(db) -> None:
|
|
421 |
'CREATE INDEX IF NOT EXISTS idx_mediakeywords_keyword_id ON MediaKeywords(keyword_id)',
|
422 |
'CREATE INDEX IF NOT EXISTS idx_media_version_media_id ON MediaVersion(media_id)',
|
423 |
'CREATE INDEX IF NOT EXISTS idx_mediamodifications_media_id ON MediaModifications(media_id)',
|
424 |
-
'CREATE INDEX IF NOT EXISTS idx_chatconversations_media_id ON ChatConversations(media_id)',
|
425 |
-
'CREATE INDEX IF NOT EXISTS idx_chatmessages_conversation_id ON ChatMessages(conversation_id)',
|
426 |
'CREATE INDEX IF NOT EXISTS idx_media_is_trash ON Media(is_trash)',
|
427 |
'CREATE INDEX IF NOT EXISTS idx_mediachunks_media_id ON MediaChunks(media_id)',
|
428 |
'CREATE INDEX IF NOT EXISTS idx_unvectorized_media_chunks_media_id ON UnvectorizedMediaChunks(media_id)',
|
@@ -606,7 +585,10 @@ def mark_media_as_processed(database, media_id):
|
|
606 |
# Function to add media with keywords
|
607 |
def add_media_with_keywords(url, title, media_type, content, keywords, prompt, summary, transcription_model, author,
|
608 |
ingestion_date):
|
|
|
|
|
609 |
logging.debug(f"Entering add_media_with_keywords: URL={url}, Title={title}")
|
|
|
610 |
# Set default values for missing fields
|
611 |
if url is None:
|
612 |
url = 'localhost'
|
@@ -622,10 +604,17 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
|
|
622 |
author = author or 'Unknown'
|
623 |
ingestion_date = ingestion_date or datetime.now().strftime('%Y-%m-%d')
|
624 |
|
625 |
-
if media_type not in ['article', 'audio', '
|
626 |
-
|
|
|
|
|
|
|
|
|
627 |
|
628 |
if ingestion_date and not is_valid_date(ingestion_date):
|
|
|
|
|
|
|
629 |
raise InputError("Invalid ingestion date format. Use YYYY-MM-DD.")
|
630 |
|
631 |
# Handle keywords as either string or list
|
@@ -654,6 +643,7 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
|
|
654 |
logging.debug(f"Existing media ID for {url}: {existing_media_id}")
|
655 |
|
656 |
if existing_media_id:
|
|
|
657 |
media_id = existing_media_id
|
658 |
logging.debug(f"Updating existing media with ID: {media_id}")
|
659 |
cursor.execute('''
|
@@ -661,7 +651,9 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
|
|
661 |
SET content = ?, transcription_model = ?, type = ?, author = ?, ingestion_date = ?
|
662 |
WHERE id = ?
|
663 |
''', (content, transcription_model, media_type, author, ingestion_date, media_id))
|
|
|
664 |
else:
|
|
|
665 |
logging.debug("Inserting new media")
|
666 |
cursor.execute('''
|
667 |
INSERT INTO Media (url, title, type, content, author, ingestion_date, transcription_model)
|
@@ -669,6 +661,7 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
|
|
669 |
''', (url, title, media_type, content, author, ingestion_date, transcription_model))
|
670 |
media_id = cursor.lastrowid
|
671 |
logging.debug(f"New media inserted with ID: {media_id}")
|
|
|
672 |
|
673 |
cursor.execute('''
|
674 |
INSERT INTO MediaModifications (media_id, prompt, summary, modification_date)
|
@@ -698,13 +691,23 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
|
|
698 |
conn.commit()
|
699 |
logging.info(f"Media '{title}' successfully added/updated with ID: {media_id}")
|
700 |
|
701 |
-
|
|
|
|
|
|
|
|
|
702 |
|
703 |
except sqlite3.Error as e:
|
704 |
logging.error(f"SQL Error in add_media_with_keywords: {e}")
|
|
|
|
|
|
|
705 |
raise DatabaseError(f"Error adding media with keywords: {e}")
|
706 |
except Exception as e:
|
707 |
logging.error(f"Unexpected Error in add_media_with_keywords: {e}")
|
|
|
|
|
|
|
708 |
raise DatabaseError(f"Unexpected error: {e}")
|
709 |
|
710 |
|
@@ -779,7 +782,13 @@ def ingest_article_to_db(url, title, author, content, keywords, summary, ingesti
|
|
779 |
|
780 |
# Function to add a keyword
|
781 |
def add_keyword(keyword: str) -> int:
|
|
|
|
|
|
|
782 |
if not keyword.strip():
|
|
|
|
|
|
|
783 |
raise DatabaseError("Keyword cannot be empty")
|
784 |
|
785 |
keyword = keyword.strip().lower()
|
@@ -801,18 +810,32 @@ def add_keyword(keyword: str) -> int:
|
|
801 |
|
802 |
logging.info(f"Keyword '{keyword}' added or updated with ID: {keyword_id}")
|
803 |
conn.commit()
|
|
|
|
|
|
|
|
|
|
|
804 |
return keyword_id
|
805 |
except sqlite3.IntegrityError as e:
|
806 |
logging.error(f"Integrity error adding keyword: {e}")
|
|
|
|
|
|
|
807 |
raise DatabaseError(f"Integrity error adding keyword: {e}")
|
808 |
except sqlite3.Error as e:
|
809 |
logging.error(f"Error adding keyword: {e}")
|
|
|
|
|
|
|
810 |
raise DatabaseError(f"Error adding keyword: {e}")
|
811 |
|
812 |
|
813 |
|
814 |
# Function to delete a keyword
|
815 |
def delete_keyword(keyword: str) -> str:
|
|
|
|
|
|
|
816 |
keyword = keyword.strip().lower()
|
817 |
with db.get_connection() as conn:
|
818 |
cursor = conn.cursor()
|
@@ -823,10 +846,23 @@ def delete_keyword(keyword: str) -> str:
|
|
823 |
cursor.execute('DELETE FROM Keywords WHERE keyword = ?', (keyword,))
|
824 |
cursor.execute('DELETE FROM keyword_fts WHERE rowid = ?', (keyword_id[0],))
|
825 |
conn.commit()
|
|
|
|
|
|
|
|
|
|
|
826 |
return f"Keyword '{keyword}' deleted successfully."
|
827 |
else:
|
|
|
|
|
|
|
|
|
828 |
return f"Keyword '{keyword}' not found."
|
829 |
except sqlite3.Error as e:
|
|
|
|
|
|
|
|
|
830 |
raise DatabaseError(f"Error deleting keyword: {e}")
|
831 |
|
832 |
|
@@ -1000,7 +1036,7 @@ def add_media_version(conn, media_id: int, prompt: str, summary: str) -> None:
|
|
1000 |
|
1001 |
|
1002 |
# Function to search the database with advanced options, including keyword search and full-text search
|
1003 |
-
def
|
1004 |
if page < 1:
|
1005 |
raise ValueError("Page number must be 1 or greater.")
|
1006 |
|
@@ -1055,7 +1091,7 @@ def sqlite_search_db(search_query: str, search_fields: List[str], keywords: str,
|
|
1055 |
|
1056 |
# Gradio function to handle user input and display results with pagination, with better feedback
|
1057 |
def search_and_display(search_query, search_fields, keywords, page):
|
1058 |
-
results =
|
1059 |
|
1060 |
if isinstance(results, pd.DataFrame):
|
1061 |
# Convert DataFrame to a list of tuples or lists
|
@@ -1133,7 +1169,7 @@ def format_results(results):
|
|
1133 |
# Function to export search results to CSV or markdown with pagination
|
1134 |
def export_to_file(search_query: str, search_fields: List[str], keyword: str, page: int = 1, results_per_file: int = 1000, export_format: str = 'csv'):
|
1135 |
try:
|
1136 |
-
results =
|
1137 |
if not results:
|
1138 |
return "No results found to export."
|
1139 |
|
@@ -1381,303 +1417,6 @@ def schedule_chunking(media_id: int, content: str, media_name: str):
|
|
1381 |
#######################################################################################################################
|
1382 |
|
1383 |
|
1384 |
-
#######################################################################################################################
|
1385 |
-
#
|
1386 |
-
# Functions to manage prompts DB
|
1387 |
-
|
1388 |
-
def create_prompts_db():
|
1389 |
-
logging.debug("create_prompts_db: Creating prompts database.")
|
1390 |
-
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
1391 |
-
cursor = conn.cursor()
|
1392 |
-
cursor.executescript('''
|
1393 |
-
CREATE TABLE IF NOT EXISTS Prompts (
|
1394 |
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
1395 |
-
name TEXT NOT NULL UNIQUE,
|
1396 |
-
author TEXT,
|
1397 |
-
details TEXT,
|
1398 |
-
system TEXT,
|
1399 |
-
user TEXT
|
1400 |
-
);
|
1401 |
-
CREATE TABLE IF NOT EXISTS Keywords (
|
1402 |
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
1403 |
-
keyword TEXT NOT NULL UNIQUE COLLATE NOCASE
|
1404 |
-
);
|
1405 |
-
CREATE TABLE IF NOT EXISTS PromptKeywords (
|
1406 |
-
prompt_id INTEGER,
|
1407 |
-
keyword_id INTEGER,
|
1408 |
-
FOREIGN KEY (prompt_id) REFERENCES Prompts (id),
|
1409 |
-
FOREIGN KEY (keyword_id) REFERENCES Keywords (id),
|
1410 |
-
PRIMARY KEY (prompt_id, keyword_id)
|
1411 |
-
);
|
1412 |
-
CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON Keywords(keyword);
|
1413 |
-
CREATE INDEX IF NOT EXISTS idx_promptkeywords_prompt_id ON PromptKeywords(prompt_id);
|
1414 |
-
CREATE INDEX IF NOT EXISTS idx_promptkeywords_keyword_id ON PromptKeywords(keyword_id);
|
1415 |
-
''')
|
1416 |
-
|
1417 |
-
# FIXME - dirty hack that should be removed later...
|
1418 |
-
# Migration function to add the 'author' column to the Prompts table
|
1419 |
-
def add_author_column_to_prompts():
|
1420 |
-
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
1421 |
-
cursor = conn.cursor()
|
1422 |
-
# Check if 'author' column already exists
|
1423 |
-
cursor.execute("PRAGMA table_info(Prompts)")
|
1424 |
-
columns = [col[1] for col in cursor.fetchall()]
|
1425 |
-
|
1426 |
-
if 'author' not in columns:
|
1427 |
-
# Add the 'author' column
|
1428 |
-
cursor.execute('ALTER TABLE Prompts ADD COLUMN author TEXT')
|
1429 |
-
print("Author column added to Prompts table.")
|
1430 |
-
else:
|
1431 |
-
print("Author column already exists in Prompts table.")
|
1432 |
-
|
1433 |
-
add_author_column_to_prompts()
|
1434 |
-
|
1435 |
-
def normalize_keyword(keyword):
|
1436 |
-
return re.sub(r'\s+', ' ', keyword.strip().lower())
|
1437 |
-
|
1438 |
-
|
1439 |
-
# FIXME - update calls to this function to use the new args
|
1440 |
-
def add_prompt(name, author, details, system=None, user=None, keywords=None):
|
1441 |
-
logging.debug(f"add_prompt: Adding prompt with name: {name}, author: {author}, system: {system}, user: {user}, keywords: {keywords}")
|
1442 |
-
if not name:
|
1443 |
-
logging.error("add_prompt: A name is required.")
|
1444 |
-
return "A name is required."
|
1445 |
-
|
1446 |
-
try:
|
1447 |
-
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
1448 |
-
cursor = conn.cursor()
|
1449 |
-
cursor.execute('''
|
1450 |
-
INSERT INTO Prompts (name, author, details, system, user)
|
1451 |
-
VALUES (?, ?, ?, ?, ?)
|
1452 |
-
''', (name, author, details, system, user))
|
1453 |
-
prompt_id = cursor.lastrowid
|
1454 |
-
|
1455 |
-
if keywords:
|
1456 |
-
normalized_keywords = [normalize_keyword(k) for k in keywords if k.strip()]
|
1457 |
-
for keyword in set(normalized_keywords): # Use set to remove duplicates
|
1458 |
-
cursor.execute('''
|
1459 |
-
INSERT OR IGNORE INTO Keywords (keyword) VALUES (?)
|
1460 |
-
''', (keyword,))
|
1461 |
-
cursor.execute('SELECT id FROM Keywords WHERE keyword = ?', (keyword,))
|
1462 |
-
keyword_id = cursor.fetchone()[0]
|
1463 |
-
cursor.execute('''
|
1464 |
-
INSERT OR IGNORE INTO PromptKeywords (prompt_id, keyword_id) VALUES (?, ?)
|
1465 |
-
''', (prompt_id, keyword_id))
|
1466 |
-
return "Prompt added successfully."
|
1467 |
-
except sqlite3.IntegrityError:
|
1468 |
-
return "Prompt with this name already exists."
|
1469 |
-
except sqlite3.Error as e:
|
1470 |
-
return f"Database error: {e}"
|
1471 |
-
|
1472 |
-
|
1473 |
-
def fetch_prompt_details(name):
|
1474 |
-
logging.debug(f"fetch_prompt_details: Fetching details for prompt: {name}")
|
1475 |
-
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
1476 |
-
cursor = conn.cursor()
|
1477 |
-
cursor.execute('''
|
1478 |
-
SELECT p.name, p.author, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
|
1479 |
-
FROM Prompts p
|
1480 |
-
LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
1481 |
-
LEFT JOIN Keywords k ON pk.keyword_id = k.id
|
1482 |
-
WHERE p.name = ?
|
1483 |
-
GROUP BY p.id
|
1484 |
-
''', (name,))
|
1485 |
-
return cursor.fetchone()
|
1486 |
-
|
1487 |
-
|
1488 |
-
def list_prompts(page=1, per_page=10):
|
1489 |
-
logging.debug(f"list_prompts: Listing prompts for page {page} with {per_page} prompts per page.")
|
1490 |
-
offset = (page - 1) * per_page
|
1491 |
-
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
1492 |
-
cursor = conn.cursor()
|
1493 |
-
cursor.execute('SELECT name FROM Prompts LIMIT ? OFFSET ?', (per_page, offset))
|
1494 |
-
prompts = [row[0] for row in cursor.fetchall()]
|
1495 |
-
|
1496 |
-
# Get total count of prompts
|
1497 |
-
cursor.execute('SELECT COUNT(*) FROM Prompts')
|
1498 |
-
total_count = cursor.fetchone()[0]
|
1499 |
-
|
1500 |
-
total_pages = (total_count + per_page - 1) // per_page
|
1501 |
-
return prompts, total_pages, page
|
1502 |
-
|
1503 |
-
# This will not scale. For a large number of prompts, use a more efficient method.
|
1504 |
-
# FIXME - see above statement.
|
1505 |
-
def load_preset_prompts():
|
1506 |
-
logging.debug("load_preset_prompts: Loading preset prompts.")
|
1507 |
-
try:
|
1508 |
-
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
1509 |
-
cursor = conn.cursor()
|
1510 |
-
cursor.execute('SELECT name FROM Prompts ORDER BY name ASC')
|
1511 |
-
prompts = [row[0] for row in cursor.fetchall()]
|
1512 |
-
return prompts
|
1513 |
-
except sqlite3.Error as e:
|
1514 |
-
print(f"Database error: {e}")
|
1515 |
-
return []
|
1516 |
-
|
1517 |
-
|
1518 |
-
def insert_prompt_to_db(title, author, description, system_prompt, user_prompt, keywords=None):
|
1519 |
-
return add_prompt(title, author, description, system_prompt, user_prompt, keywords)
|
1520 |
-
|
1521 |
-
|
1522 |
-
def get_prompt_db_connection():
|
1523 |
-
prompt_db_path = get_database_path('prompts.db')
|
1524 |
-
return sqlite3.connect(prompt_db_path)
|
1525 |
-
|
1526 |
-
|
1527 |
-
def search_prompts(query):
|
1528 |
-
logging.debug(f"search_prompts: Searching prompts with query: {query}")
|
1529 |
-
try:
|
1530 |
-
with get_prompt_db_connection() as conn:
|
1531 |
-
cursor = conn.cursor()
|
1532 |
-
cursor.execute("""
|
1533 |
-
SELECT p.name, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
|
1534 |
-
FROM Prompts p
|
1535 |
-
LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
1536 |
-
LEFT JOIN Keywords k ON pk.keyword_id = k.id
|
1537 |
-
WHERE p.name LIKE ? OR p.details LIKE ? OR p.system LIKE ? OR p.user LIKE ? OR k.keyword LIKE ?
|
1538 |
-
GROUP BY p.id
|
1539 |
-
ORDER BY p.name
|
1540 |
-
""", (f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%'))
|
1541 |
-
return cursor.fetchall()
|
1542 |
-
except sqlite3.Error as e:
|
1543 |
-
logging.error(f"Error searching prompts: {e}")
|
1544 |
-
return []
|
1545 |
-
|
1546 |
-
|
1547 |
-
def search_prompts_by_keyword(keyword, page=1, per_page=10):
|
1548 |
-
logging.debug(f"search_prompts_by_keyword: Searching prompts by keyword: {keyword}")
|
1549 |
-
normalized_keyword = normalize_keyword(keyword)
|
1550 |
-
offset = (page - 1) * per_page
|
1551 |
-
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
1552 |
-
cursor = conn.cursor()
|
1553 |
-
cursor.execute('''
|
1554 |
-
SELECT DISTINCT p.name
|
1555 |
-
FROM Prompts p
|
1556 |
-
JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
1557 |
-
JOIN Keywords k ON pk.keyword_id = k.id
|
1558 |
-
WHERE k.keyword LIKE ?
|
1559 |
-
LIMIT ? OFFSET ?
|
1560 |
-
''', ('%' + normalized_keyword + '%', per_page, offset))
|
1561 |
-
prompts = [row[0] for row in cursor.fetchall()]
|
1562 |
-
|
1563 |
-
# Get total count of matching prompts
|
1564 |
-
cursor.execute('''
|
1565 |
-
SELECT COUNT(DISTINCT p.id)
|
1566 |
-
FROM Prompts p
|
1567 |
-
JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
1568 |
-
JOIN Keywords k ON pk.keyword_id = k.id
|
1569 |
-
WHERE k.keyword LIKE ?
|
1570 |
-
''', ('%' + normalized_keyword + '%',))
|
1571 |
-
total_count = cursor.fetchone()[0]
|
1572 |
-
|
1573 |
-
total_pages = (total_count + per_page - 1) // per_page
|
1574 |
-
return prompts, total_pages, page
|
1575 |
-
|
1576 |
-
|
1577 |
-
def update_prompt_keywords(prompt_name, new_keywords):
|
1578 |
-
logging.debug(f"update_prompt_keywords: Updating keywords for prompt: {prompt_name}")
|
1579 |
-
try:
|
1580 |
-
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
1581 |
-
cursor = conn.cursor()
|
1582 |
-
|
1583 |
-
cursor.execute('SELECT id FROM Prompts WHERE name = ?', (prompt_name,))
|
1584 |
-
prompt_id = cursor.fetchone()
|
1585 |
-
if not prompt_id:
|
1586 |
-
return "Prompt not found."
|
1587 |
-
prompt_id = prompt_id[0]
|
1588 |
-
|
1589 |
-
cursor.execute('DELETE FROM PromptKeywords WHERE prompt_id = ?', (prompt_id,))
|
1590 |
-
|
1591 |
-
normalized_keywords = [normalize_keyword(k) for k in new_keywords if k.strip()]
|
1592 |
-
for keyword in set(normalized_keywords): # Use set to remove duplicates
|
1593 |
-
cursor.execute('INSERT OR IGNORE INTO Keywords (keyword) VALUES (?)', (keyword,))
|
1594 |
-
cursor.execute('SELECT id FROM Keywords WHERE keyword = ?', (keyword,))
|
1595 |
-
keyword_id = cursor.fetchone()[0]
|
1596 |
-
cursor.execute('INSERT INTO PromptKeywords (prompt_id, keyword_id) VALUES (?, ?)',
|
1597 |
-
(prompt_id, keyword_id))
|
1598 |
-
|
1599 |
-
# Remove unused keywords
|
1600 |
-
cursor.execute('''
|
1601 |
-
DELETE FROM Keywords
|
1602 |
-
WHERE id NOT IN (SELECT DISTINCT keyword_id FROM PromptKeywords)
|
1603 |
-
''')
|
1604 |
-
return "Keywords updated successfully."
|
1605 |
-
except sqlite3.Error as e:
|
1606 |
-
return f"Database error: {e}"
|
1607 |
-
|
1608 |
-
|
1609 |
-
def add_or_update_prompt(title, author, description, system_prompt, user_prompt, keywords=None):
|
1610 |
-
logging.debug(f"add_or_update_prompt: Adding or updating prompt: {title}")
|
1611 |
-
if not title:
|
1612 |
-
return "Error: Title is required."
|
1613 |
-
|
1614 |
-
existing_prompt = fetch_prompt_details(title)
|
1615 |
-
if existing_prompt:
|
1616 |
-
# Update existing prompt
|
1617 |
-
result = update_prompt_in_db(title, author, description, system_prompt, user_prompt)
|
1618 |
-
if "successfully" in result:
|
1619 |
-
# Update keywords if the prompt update was successful
|
1620 |
-
keyword_result = update_prompt_keywords(title, keywords or [])
|
1621 |
-
result += f" {keyword_result}"
|
1622 |
-
else:
|
1623 |
-
# Insert new prompt
|
1624 |
-
result = insert_prompt_to_db(title, author, description, system_prompt, user_prompt, keywords)
|
1625 |
-
|
1626 |
-
return result
|
1627 |
-
|
1628 |
-
|
1629 |
-
def load_prompt_details(selected_prompt):
|
1630 |
-
logging.debug(f"load_prompt_details: Loading prompt details for {selected_prompt}")
|
1631 |
-
if selected_prompt:
|
1632 |
-
details = fetch_prompt_details(selected_prompt)
|
1633 |
-
if details:
|
1634 |
-
return details[0], details[1], details[2], details[3], details[4], details[5]
|
1635 |
-
return "", "", "", "", "", ""
|
1636 |
-
|
1637 |
-
|
1638 |
-
def update_prompt_in_db(title, author, description, system_prompt, user_prompt):
|
1639 |
-
logging.debug(f"update_prompt_in_db: Updating prompt: {title}")
|
1640 |
-
try:
|
1641 |
-
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
1642 |
-
cursor = conn.cursor()
|
1643 |
-
cursor.execute(
|
1644 |
-
"UPDATE Prompts SET author = ?, details = ?, system = ?, user = ? WHERE name = ?",
|
1645 |
-
(author, description, system_prompt, user_prompt, title)
|
1646 |
-
)
|
1647 |
-
if cursor.rowcount == 0:
|
1648 |
-
return "No prompt found with the given title."
|
1649 |
-
return "Prompt updated successfully!"
|
1650 |
-
except sqlite3.Error as e:
|
1651 |
-
return f"Error updating prompt: {e}"
|
1652 |
-
|
1653 |
-
|
1654 |
-
create_prompts_db()
|
1655 |
-
|
1656 |
-
def delete_prompt(prompt_id):
|
1657 |
-
logging.debug(f"delete_prompt: Deleting prompt with ID: {prompt_id}")
|
1658 |
-
try:
|
1659 |
-
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
1660 |
-
cursor = conn.cursor()
|
1661 |
-
|
1662 |
-
# Delete associated keywords
|
1663 |
-
cursor.execute("DELETE FROM PromptKeywords WHERE prompt_id = ?", (prompt_id,))
|
1664 |
-
|
1665 |
-
# Delete the prompt
|
1666 |
-
cursor.execute("DELETE FROM Prompts WHERE id = ?", (prompt_id,))
|
1667 |
-
|
1668 |
-
if cursor.rowcount == 0:
|
1669 |
-
return f"No prompt found with ID {prompt_id}"
|
1670 |
-
else:
|
1671 |
-
conn.commit()
|
1672 |
-
return f"Prompt with ID {prompt_id} has been successfully deleted"
|
1673 |
-
except sqlite3.Error as e:
|
1674 |
-
return f"An error occurred: {e}"
|
1675 |
-
|
1676 |
-
#
|
1677 |
-
#
|
1678 |
-
#######################################################################################################################
|
1679 |
-
|
1680 |
-
|
1681 |
#######################################################################################################################
|
1682 |
#
|
1683 |
# Function to fetch/update media content
|
@@ -2020,204 +1759,6 @@ def import_obsidian_note_to_db(note_data):
|
|
2020 |
#######################################################################################################################
|
2021 |
|
2022 |
|
2023 |
-
#######################################################################################################################
|
2024 |
-
#
|
2025 |
-
# Chat-related Functions
|
2026 |
-
|
2027 |
-
|
2028 |
-
|
2029 |
-
def create_chat_conversation(media_id, conversation_name):
|
2030 |
-
try:
|
2031 |
-
with db.get_connection() as conn:
|
2032 |
-
cursor = conn.cursor()
|
2033 |
-
cursor.execute('''
|
2034 |
-
INSERT INTO ChatConversations (media_id, conversation_name, created_at, updated_at)
|
2035 |
-
VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
2036 |
-
''', (media_id, conversation_name))
|
2037 |
-
conn.commit()
|
2038 |
-
return cursor.lastrowid
|
2039 |
-
except sqlite3.Error as e:
|
2040 |
-
logging.error(f"Error creating chat conversation: {e}")
|
2041 |
-
raise DatabaseError(f"Error creating chat conversation: {e}")
|
2042 |
-
|
2043 |
-
|
2044 |
-
def add_chat_message(conversation_id: int, sender: str, message: str) -> int:
|
2045 |
-
try:
|
2046 |
-
with db.get_connection() as conn:
|
2047 |
-
cursor = conn.cursor()
|
2048 |
-
cursor.execute('''
|
2049 |
-
INSERT INTO ChatMessages (conversation_id, sender, message)
|
2050 |
-
VALUES (?, ?, ?)
|
2051 |
-
''', (conversation_id, sender, message))
|
2052 |
-
conn.commit()
|
2053 |
-
return cursor.lastrowid
|
2054 |
-
except sqlite3.Error as e:
|
2055 |
-
logging.error(f"Error adding chat message: {e}")
|
2056 |
-
raise DatabaseError(f"Error adding chat message: {e}")
|
2057 |
-
|
2058 |
-
|
2059 |
-
def get_chat_messages(conversation_id: int) -> List[Dict[str, Any]]:
|
2060 |
-
try:
|
2061 |
-
with db.get_connection() as conn:
|
2062 |
-
cursor = conn.cursor()
|
2063 |
-
cursor.execute('''
|
2064 |
-
SELECT id, sender, message, timestamp
|
2065 |
-
FROM ChatMessages
|
2066 |
-
WHERE conversation_id = ?
|
2067 |
-
ORDER BY timestamp ASC
|
2068 |
-
''', (conversation_id,))
|
2069 |
-
messages = cursor.fetchall()
|
2070 |
-
return [
|
2071 |
-
{
|
2072 |
-
'id': msg[0],
|
2073 |
-
'sender': msg[1],
|
2074 |
-
'message': msg[2],
|
2075 |
-
'timestamp': msg[3]
|
2076 |
-
}
|
2077 |
-
for msg in messages
|
2078 |
-
]
|
2079 |
-
except sqlite3.Error as e:
|
2080 |
-
logging.error(f"Error retrieving chat messages: {e}")
|
2081 |
-
raise DatabaseError(f"Error retrieving chat messages: {e}")
|
2082 |
-
|
2083 |
-
|
2084 |
-
def search_chat_conversations(search_query: str) -> List[Dict[str, Any]]:
|
2085 |
-
try:
|
2086 |
-
with db.get_connection() as conn:
|
2087 |
-
cursor = conn.cursor()
|
2088 |
-
cursor.execute('''
|
2089 |
-
SELECT cc.id, cc.media_id, cc.conversation_name, cc.created_at, m.title as media_title
|
2090 |
-
FROM ChatConversations cc
|
2091 |
-
LEFT JOIN Media m ON cc.media_id = m.id
|
2092 |
-
WHERE cc.conversation_name LIKE ? OR m.title LIKE ?
|
2093 |
-
ORDER BY cc.updated_at DESC
|
2094 |
-
''', (f'%{search_query}%', f'%{search_query}%'))
|
2095 |
-
conversations = cursor.fetchall()
|
2096 |
-
return [
|
2097 |
-
{
|
2098 |
-
'id': conv[0],
|
2099 |
-
'media_id': conv[1],
|
2100 |
-
'conversation_name': conv[2],
|
2101 |
-
'created_at': conv[3],
|
2102 |
-
'media_title': conv[4] or "Unknown Media"
|
2103 |
-
}
|
2104 |
-
for conv in conversations
|
2105 |
-
]
|
2106 |
-
except sqlite3.Error as e:
|
2107 |
-
logging.error(f"Error searching chat conversations: {e}")
|
2108 |
-
return []
|
2109 |
-
|
2110 |
-
|
2111 |
-
def update_chat_message(message_id: int, new_message: str) -> None:
|
2112 |
-
try:
|
2113 |
-
with db.get_connection() as conn:
|
2114 |
-
cursor = conn.cursor()
|
2115 |
-
cursor.execute('''
|
2116 |
-
UPDATE ChatMessages
|
2117 |
-
SET message = ?, timestamp = CURRENT_TIMESTAMP
|
2118 |
-
WHERE id = ?
|
2119 |
-
''', (new_message, message_id))
|
2120 |
-
conn.commit()
|
2121 |
-
except sqlite3.Error as e:
|
2122 |
-
logging.error(f"Error updating chat message: {e}")
|
2123 |
-
raise DatabaseError(f"Error updating chat message: {e}")
|
2124 |
-
|
2125 |
-
|
2126 |
-
def delete_chat_message(message_id: int) -> None:
|
2127 |
-
try:
|
2128 |
-
with db.get_connection() as conn:
|
2129 |
-
cursor = conn.cursor()
|
2130 |
-
cursor.execute('DELETE FROM ChatMessages WHERE id = ?', (message_id,))
|
2131 |
-
conn.commit()
|
2132 |
-
except sqlite3.Error as e:
|
2133 |
-
logging.error(f"Error deleting chat message: {e}")
|
2134 |
-
raise DatabaseError(f"Error deleting chat message: {e}")
|
2135 |
-
|
2136 |
-
|
2137 |
-
def save_chat_history_to_database(chatbot, conversation_id, media_id, media_name, conversation_name):
|
2138 |
-
try:
|
2139 |
-
with db.get_connection() as conn:
|
2140 |
-
cursor = conn.cursor()
|
2141 |
-
|
2142 |
-
# If conversation_id is None, create a new conversation
|
2143 |
-
if conversation_id is None:
|
2144 |
-
cursor.execute('''
|
2145 |
-
INSERT INTO ChatConversations (media_id, media_name, conversation_name, created_at, updated_at)
|
2146 |
-
VALUES (?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
2147 |
-
''', (media_id, media_name, conversation_name))
|
2148 |
-
conversation_id = cursor.lastrowid
|
2149 |
-
else:
|
2150 |
-
# If conversation exists, update the media_name
|
2151 |
-
cursor.execute('''
|
2152 |
-
UPDATE ChatConversations
|
2153 |
-
SET media_name = ?, updated_at = CURRENT_TIMESTAMP
|
2154 |
-
WHERE id = ?
|
2155 |
-
''', (media_name, conversation_id))
|
2156 |
-
|
2157 |
-
# Save each message in the chatbot history
|
2158 |
-
for i, (user_msg, ai_msg) in enumerate(chatbot):
|
2159 |
-
cursor.execute('''
|
2160 |
-
INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
|
2161 |
-
VALUES (?, ?, ?, CURRENT_TIMESTAMP)
|
2162 |
-
''', (conversation_id, 'user', user_msg))
|
2163 |
-
|
2164 |
-
cursor.execute('''
|
2165 |
-
INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
|
2166 |
-
VALUES (?, ?, ?, CURRENT_TIMESTAMP)
|
2167 |
-
''', (conversation_id, 'ai', ai_msg))
|
2168 |
-
|
2169 |
-
# Update the conversation's updated_at timestamp
|
2170 |
-
cursor.execute('''
|
2171 |
-
UPDATE ChatConversations
|
2172 |
-
SET updated_at = CURRENT_TIMESTAMP
|
2173 |
-
WHERE id = ?
|
2174 |
-
''', (conversation_id,))
|
2175 |
-
|
2176 |
-
conn.commit()
|
2177 |
-
|
2178 |
-
return conversation_id
|
2179 |
-
except Exception as e:
|
2180 |
-
logging.error(f"Error saving chat history to database: {str(e)}")
|
2181 |
-
raise
|
2182 |
-
|
2183 |
-
|
2184 |
-
def get_conversation_name(conversation_id):
|
2185 |
-
if conversation_id is None:
|
2186 |
-
return None
|
2187 |
-
|
2188 |
-
try:
|
2189 |
-
with sqlite3.connect('media_summary.db') as conn: # Replace with your actual database name
|
2190 |
-
cursor = conn.cursor()
|
2191 |
-
|
2192 |
-
query = """
|
2193 |
-
SELECT conversation_name, media_name
|
2194 |
-
FROM ChatConversations
|
2195 |
-
WHERE id = ?
|
2196 |
-
"""
|
2197 |
-
|
2198 |
-
cursor.execute(query, (conversation_id,))
|
2199 |
-
result = cursor.fetchone()
|
2200 |
-
|
2201 |
-
if result:
|
2202 |
-
conversation_name, media_name = result
|
2203 |
-
if conversation_name:
|
2204 |
-
return conversation_name
|
2205 |
-
elif media_name:
|
2206 |
-
return f"{media_name}-chat"
|
2207 |
-
|
2208 |
-
return None # Return None if no result found
|
2209 |
-
except sqlite3.Error as e:
|
2210 |
-
logging.error(f"Database error in get_conversation_name: {e}")
|
2211 |
-
return None
|
2212 |
-
except Exception as e:
|
2213 |
-
logging.error(f"Unexpected error in get_conversation_name: {e}")
|
2214 |
-
return None
|
2215 |
-
|
2216 |
-
#
|
2217 |
-
# End of Chat-related Functions
|
2218 |
-
#######################################################################################################################
|
2219 |
-
|
2220 |
-
|
2221 |
#######################################################################################################################
|
2222 |
#
|
2223 |
# Functions to Compare Transcripts
|
@@ -2837,29 +2378,42 @@ def process_chunks(database, chunks: List[Dict], media_id: int, batch_size: int
|
|
2837 |
:param media_id: ID of the media these chunks belong to
|
2838 |
:param batch_size: Number of chunks to process in each batch
|
2839 |
"""
|
|
|
|
|
2840 |
total_chunks = len(chunks)
|
2841 |
processed_chunks = 0
|
2842 |
|
2843 |
-
|
2844 |
-
|
2845 |
-
|
2846 |
-
|
2847 |
-
|
2848 |
-
|
2849 |
-
|
2850 |
-
try:
|
2851 |
-
database.execute_many(
|
2852 |
-
"INSERT INTO MediaChunks (media_id, chunk_text, start_index, end_index) VALUES (?, ?, ?, ?)",
|
2853 |
-
chunk_data
|
2854 |
-
)
|
2855 |
-
processed_chunks += len(batch)
|
2856 |
-
logging.info(f"Processed {processed_chunks}/{total_chunks} chunks for media_id {media_id}")
|
2857 |
-
except Exception as e:
|
2858 |
-
logging.error(f"Error inserting chunk batch for media_id {media_id}: {e}")
|
2859 |
-
# Optionally, you could raise an exception here to stop processing
|
2860 |
-
# raise
|
2861 |
|
2862 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2863 |
|
2864 |
|
2865 |
# Usage example:
|
@@ -2995,46 +2549,48 @@ def update_media_table(db):
|
|
2995 |
#
|
2996 |
# Workflow Functions
|
2997 |
|
|
|
2998 |
def save_workflow_chat_to_db(chat_history, workflow_name, conversation_id=None):
|
2999 |
-
|
3000 |
-
|
3001 |
-
|
3002 |
-
|
3003 |
-
|
3004 |
-
|
3005 |
-
|
3006 |
-
|
3007 |
-
|
3008 |
-
|
3009 |
-
|
3010 |
-
|
3011 |
-
|
3012 |
-
|
3013 |
-
|
3014 |
-
|
3015 |
-
|
3016 |
-
|
3017 |
-
|
3018 |
-
|
3019 |
-
|
3020 |
-
|
3021 |
-
|
3022 |
-
|
3023 |
-
|
3024 |
-
|
3025 |
-
|
3026 |
-
|
3027 |
-
|
3028 |
-
|
3029 |
-
|
3030 |
-
|
3031 |
-
|
3032 |
-
|
3033 |
-
|
3034 |
-
|
3035 |
-
|
3036 |
-
|
3037 |
-
|
|
|
3038 |
|
3039 |
|
3040 |
def get_workflow_chat(conversation_id):
|
|
|
21 |
# 11. browse_items(search_query, search_type)
|
22 |
# 12. fetch_item_details(media_id: int)
|
23 |
# 13. add_media_version(media_id: int, prompt: str, summary: str)
|
24 |
+
# 14. search_media_db(search_query: str, search_fields: List[str], keywords: str, page: int = 1, results_per_page: int = 10)
|
25 |
# 15. search_and_display(search_query, search_fields, keywords, page)
|
26 |
# 16. display_details(index, results)
|
27 |
# 17. get_details(index, dataframe)
|
|
|
55 |
import shutil
|
56 |
import sqlite3
|
57 |
import threading
|
58 |
+
import time
|
59 |
import traceback
|
60 |
from contextlib import contextmanager
|
61 |
from datetime import datetime, timedelta
|
62 |
from typing import List, Tuple, Dict, Any, Optional
|
63 |
from urllib.parse import quote
|
64 |
|
65 |
+
from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
|
66 |
# Local Libraries
|
67 |
from App_Function_Libraries.Utils.Utils import get_project_relative_path, get_database_path, \
|
68 |
get_database_dir
|
|
|
344 |
)
|
345 |
''',
|
346 |
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
CREATE TABLE IF NOT EXISTS Transcripts (
|
348 |
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
349 |
media_id INTEGER,
|
|
|
402 |
'CREATE INDEX IF NOT EXISTS idx_mediakeywords_keyword_id ON MediaKeywords(keyword_id)',
|
403 |
'CREATE INDEX IF NOT EXISTS idx_media_version_media_id ON MediaVersion(media_id)',
|
404 |
'CREATE INDEX IF NOT EXISTS idx_mediamodifications_media_id ON MediaModifications(media_id)',
|
|
|
|
|
405 |
'CREATE INDEX IF NOT EXISTS idx_media_is_trash ON Media(is_trash)',
|
406 |
'CREATE INDEX IF NOT EXISTS idx_mediachunks_media_id ON MediaChunks(media_id)',
|
407 |
'CREATE INDEX IF NOT EXISTS idx_unvectorized_media_chunks_media_id ON UnvectorizedMediaChunks(media_id)',
|
|
|
585 |
# Function to add media with keywords
|
586 |
def add_media_with_keywords(url, title, media_type, content, keywords, prompt, summary, transcription_model, author,
|
587 |
ingestion_date):
|
588 |
+
log_counter("add_media_with_keywords_attempt")
|
589 |
+
start_time = time.time()
|
590 |
logging.debug(f"Entering add_media_with_keywords: URL={url}, Title={title}")
|
591 |
+
|
592 |
# Set default values for missing fields
|
593 |
if url is None:
|
594 |
url = 'localhost'
|
|
|
604 |
author = author or 'Unknown'
|
605 |
ingestion_date = ingestion_date or datetime.now().strftime('%Y-%m-%d')
|
606 |
|
607 |
+
if media_type not in ['article', 'audio', 'book', 'document', 'mediawiki_article', 'mediawiki_dump',
|
608 |
+
'obsidian_note', 'podcast', 'text', 'video', 'unknown']:
|
609 |
+
log_counter("add_media_with_keywords_error", labels={"error_type": "InvalidMediaType"})
|
610 |
+
duration = time.time() - start_time
|
611 |
+
log_histogram("add_media_with_keywords_duration", duration)
|
612 |
+
raise InputError("Invalid media type. Allowed types: article, audio file, document, obsidian_note, podcast, text, video, unknown.")
|
613 |
|
614 |
if ingestion_date and not is_valid_date(ingestion_date):
|
615 |
+
log_counter("add_media_with_keywords_error", labels={"error_type": "InvalidDateFormat"})
|
616 |
+
duration = time.time() - start_time
|
617 |
+
log_histogram("add_media_with_keywords_duration", duration)
|
618 |
raise InputError("Invalid ingestion date format. Use YYYY-MM-DD.")
|
619 |
|
620 |
# Handle keywords as either string or list
|
|
|
643 |
logging.debug(f"Existing media ID for {url}: {existing_media_id}")
|
644 |
|
645 |
if existing_media_id:
|
646 |
+
# Update existing media
|
647 |
media_id = existing_media_id
|
648 |
logging.debug(f"Updating existing media with ID: {media_id}")
|
649 |
cursor.execute('''
|
|
|
651 |
SET content = ?, transcription_model = ?, type = ?, author = ?, ingestion_date = ?
|
652 |
WHERE id = ?
|
653 |
''', (content, transcription_model, media_type, author, ingestion_date, media_id))
|
654 |
+
log_counter("add_media_with_keywords_update")
|
655 |
else:
|
656 |
+
# Insert new media
|
657 |
logging.debug("Inserting new media")
|
658 |
cursor.execute('''
|
659 |
INSERT INTO Media (url, title, type, content, author, ingestion_date, transcription_model)
|
|
|
661 |
''', (url, title, media_type, content, author, ingestion_date, transcription_model))
|
662 |
media_id = cursor.lastrowid
|
663 |
logging.debug(f"New media inserted with ID: {media_id}")
|
664 |
+
log_counter("add_media_with_keywords_insert")
|
665 |
|
666 |
cursor.execute('''
|
667 |
INSERT INTO MediaModifications (media_id, prompt, summary, modification_date)
|
|
|
691 |
conn.commit()
|
692 |
logging.info(f"Media '{title}' successfully added/updated with ID: {media_id}")
|
693 |
|
694 |
+
duration = time.time() - start_time
|
695 |
+
log_histogram("add_media_with_keywords_duration", duration)
|
696 |
+
log_counter("add_media_with_keywords_success")
|
697 |
+
|
698 |
+
return media_id, f"Media '{title}' added/updated successfully with keywords: {', '.join(keyword_list)}"
|
699 |
|
700 |
except sqlite3.Error as e:
|
701 |
logging.error(f"SQL Error in add_media_with_keywords: {e}")
|
702 |
+
duration = time.time() - start_time
|
703 |
+
log_histogram("add_media_with_keywords_duration", duration)
|
704 |
+
log_counter("add_media_with_keywords_error", labels={"error_type": "SQLiteError"})
|
705 |
raise DatabaseError(f"Error adding media with keywords: {e}")
|
706 |
except Exception as e:
|
707 |
logging.error(f"Unexpected Error in add_media_with_keywords: {e}")
|
708 |
+
duration = time.time() - start_time
|
709 |
+
log_histogram("add_media_with_keywords_duration", duration)
|
710 |
+
log_counter("add_media_with_keywords_error", labels={"error_type": type(e).__name__})
|
711 |
raise DatabaseError(f"Unexpected error: {e}")
|
712 |
|
713 |
|
|
|
782 |
|
783 |
# Function to add a keyword
|
784 |
def add_keyword(keyword: str) -> int:
|
785 |
+
log_counter("add_keyword_attempt")
|
786 |
+
start_time = time.time()
|
787 |
+
|
788 |
if not keyword.strip():
|
789 |
+
log_counter("add_keyword_error", labels={"error_type": "EmptyKeyword"})
|
790 |
+
duration = time.time() - start_time
|
791 |
+
log_histogram("add_keyword_duration", duration)
|
792 |
raise DatabaseError("Keyword cannot be empty")
|
793 |
|
794 |
keyword = keyword.strip().lower()
|
|
|
810 |
|
811 |
logging.info(f"Keyword '{keyword}' added or updated with ID: {keyword_id}")
|
812 |
conn.commit()
|
813 |
+
|
814 |
+
duration = time.time() - start_time
|
815 |
+
log_histogram("add_keyword_duration", duration)
|
816 |
+
log_counter("add_keyword_success")
|
817 |
+
|
818 |
return keyword_id
|
819 |
except sqlite3.IntegrityError as e:
|
820 |
logging.error(f"Integrity error adding keyword: {e}")
|
821 |
+
duration = time.time() - start_time
|
822 |
+
log_histogram("add_keyword_duration", duration)
|
823 |
+
log_counter("add_keyword_error", labels={"error_type": "IntegrityError"})
|
824 |
raise DatabaseError(f"Integrity error adding keyword: {e}")
|
825 |
except sqlite3.Error as e:
|
826 |
logging.error(f"Error adding keyword: {e}")
|
827 |
+
duration = time.time() - start_time
|
828 |
+
log_histogram("add_keyword_duration", duration)
|
829 |
+
log_counter("add_keyword_error", labels={"error_type": "SQLiteError"})
|
830 |
raise DatabaseError(f"Error adding keyword: {e}")
|
831 |
|
832 |
|
833 |
|
834 |
# Function to delete a keyword
|
835 |
def delete_keyword(keyword: str) -> str:
|
836 |
+
log_counter("delete_keyword_attempt")
|
837 |
+
start_time = time.time()
|
838 |
+
|
839 |
keyword = keyword.strip().lower()
|
840 |
with db.get_connection() as conn:
|
841 |
cursor = conn.cursor()
|
|
|
846 |
cursor.execute('DELETE FROM Keywords WHERE keyword = ?', (keyword,))
|
847 |
cursor.execute('DELETE FROM keyword_fts WHERE rowid = ?', (keyword_id[0],))
|
848 |
conn.commit()
|
849 |
+
|
850 |
+
duration = time.time() - start_time
|
851 |
+
log_histogram("delete_keyword_duration", duration)
|
852 |
+
log_counter("delete_keyword_success")
|
853 |
+
|
854 |
return f"Keyword '{keyword}' deleted successfully."
|
855 |
else:
|
856 |
+
duration = time.time() - start_time
|
857 |
+
log_histogram("delete_keyword_duration", duration)
|
858 |
+
log_counter("delete_keyword_not_found")
|
859 |
+
|
860 |
return f"Keyword '{keyword}' not found."
|
861 |
except sqlite3.Error as e:
|
862 |
+
duration = time.time() - start_time
|
863 |
+
log_histogram("delete_keyword_duration", duration)
|
864 |
+
log_counter("delete_keyword_error", labels={"error_type": type(e).__name__})
|
865 |
+
logging.error(f"Error deleting keyword: {e}")
|
866 |
raise DatabaseError(f"Error deleting keyword: {e}")
|
867 |
|
868 |
|
|
|
1036 |
|
1037 |
|
1038 |
# Function to search the database with advanced options, including keyword search and full-text search
|
1039 |
+
def search_media_db(search_query: str, search_fields: List[str], keywords: str, page: int = 1, results_per_page: int = 20, connection=None):
|
1040 |
if page < 1:
|
1041 |
raise ValueError("Page number must be 1 or greater.")
|
1042 |
|
|
|
1091 |
|
1092 |
# Gradio function to handle user input and display results with pagination, with better feedback
|
1093 |
def search_and_display(search_query, search_fields, keywords, page):
|
1094 |
+
results = search_media_db(search_query, search_fields, keywords, page)
|
1095 |
|
1096 |
if isinstance(results, pd.DataFrame):
|
1097 |
# Convert DataFrame to a list of tuples or lists
|
|
|
1169 |
# Function to export search results to CSV or markdown with pagination
|
1170 |
def export_to_file(search_query: str, search_fields: List[str], keyword: str, page: int = 1, results_per_file: int = 1000, export_format: str = 'csv'):
|
1171 |
try:
|
1172 |
+
results = search_media_db(search_query, search_fields, keyword, page, results_per_file)
|
1173 |
if not results:
|
1174 |
return "No results found to export."
|
1175 |
|
|
|
1417 |
#######################################################################################################################
|
1418 |
|
1419 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1420 |
#######################################################################################################################
|
1421 |
#
|
1422 |
# Function to fetch/update media content
|
|
|
1759 |
#######################################################################################################################
|
1760 |
|
1761 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1762 |
#######################################################################################################################
|
1763 |
#
|
1764 |
# Functions to Compare Transcripts
|
|
|
2378 |
:param media_id: ID of the media these chunks belong to
|
2379 |
:param batch_size: Number of chunks to process in each batch
|
2380 |
"""
|
2381 |
+
log_counter("process_chunks_attempt", labels={"media_id": media_id})
|
2382 |
+
start_time = time.time()
|
2383 |
total_chunks = len(chunks)
|
2384 |
processed_chunks = 0
|
2385 |
|
2386 |
+
try:
|
2387 |
+
for i in range(0, total_chunks, batch_size):
|
2388 |
+
batch = chunks[i:i + batch_size]
|
2389 |
+
chunk_data = [
|
2390 |
+
(media_id, chunk['text'], chunk['start_index'], chunk['end_index'])
|
2391 |
+
for chunk in batch
|
2392 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2393 |
|
2394 |
+
try:
|
2395 |
+
database.execute_many(
|
2396 |
+
"INSERT INTO MediaChunks (media_id, chunk_text, start_index, end_index) VALUES (?, ?, ?, ?)",
|
2397 |
+
chunk_data
|
2398 |
+
)
|
2399 |
+
processed_chunks += len(batch)
|
2400 |
+
logging.info(f"Processed {processed_chunks}/{total_chunks} chunks for media_id {media_id}")
|
2401 |
+
log_counter("process_chunks_batch_success", labels={"media_id": media_id})
|
2402 |
+
except Exception as e:
|
2403 |
+
logging.error(f"Error inserting chunk batch for media_id {media_id}: {e}")
|
2404 |
+
log_counter("process_chunks_batch_error", labels={"media_id": media_id, "error_type": type(e).__name__})
|
2405 |
+
# Optionally, you could raise an exception here to stop processing
|
2406 |
+
# raise
|
2407 |
+
|
2408 |
+
logging.info(f"Finished processing all {total_chunks} chunks for media_id {media_id}")
|
2409 |
+
duration = time.time() - start_time
|
2410 |
+
log_histogram("process_chunks_duration", duration, labels={"media_id": media_id})
|
2411 |
+
log_counter("process_chunks_success", labels={"media_id": media_id})
|
2412 |
+
except Exception as e:
|
2413 |
+
duration = time.time() - start_time
|
2414 |
+
log_histogram("process_chunks_duration", duration, labels={"media_id": media_id})
|
2415 |
+
log_counter("process_chunks_error", labels={"media_id": media_id, "error_type": type(e).__name__})
|
2416 |
+
logging.error(f"Error processing chunks for media_id {media_id}: {e}")
|
2417 |
|
2418 |
|
2419 |
# Usage example:
|
|
|
2549 |
#
|
2550 |
# Workflow Functions
|
2551 |
|
2552 |
+
# Workflow Functions
|
2553 |
def save_workflow_chat_to_db(chat_history, workflow_name, conversation_id=None):
|
2554 |
+
pass
|
2555 |
+
# try:
|
2556 |
+
# with db.get_connection() as conn:
|
2557 |
+
# cursor = conn.cursor()
|
2558 |
+
#
|
2559 |
+
# if conversation_id is None:
|
2560 |
+
# # Create a new conversation
|
2561 |
+
# conversation_name = f"{workflow_name}_Workflow_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
2562 |
+
# cursor.execute('''
|
2563 |
+
# INSERT INTO ChatConversations (media_id, media_name, conversation_name, created_at, updated_at)
|
2564 |
+
# VALUES (NULL, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
2565 |
+
# ''', (workflow_name, conversation_name))
|
2566 |
+
# conversation_id = cursor.lastrowid
|
2567 |
+
# else:
|
2568 |
+
# # Update existing conversation
|
2569 |
+
# cursor.execute('''
|
2570 |
+
# UPDATE ChatConversations
|
2571 |
+
# SET updated_at = CURRENT_TIMESTAMP
|
2572 |
+
# WHERE id = ?
|
2573 |
+
# ''', (conversation_id,))
|
2574 |
+
#
|
2575 |
+
# # Save messages
|
2576 |
+
# for user_msg, ai_msg in chat_history:
|
2577 |
+
# if user_msg:
|
2578 |
+
# cursor.execute('''
|
2579 |
+
# INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
|
2580 |
+
# VALUES (?, 'user', ?, CURRENT_TIMESTAMP)
|
2581 |
+
# ''', (conversation_id, user_msg))
|
2582 |
+
# if ai_msg:
|
2583 |
+
# cursor.execute('''
|
2584 |
+
# INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
|
2585 |
+
# VALUES (?, 'ai', ?, CURRENT_TIMESTAMP)
|
2586 |
+
# ''', (conversation_id, ai_msg))
|
2587 |
+
#
|
2588 |
+
# conn.commit()
|
2589 |
+
#
|
2590 |
+
# return conversation_id, f"Chat saved successfully! Conversation ID: {conversation_id}"
|
2591 |
+
# except Exception as e:
|
2592 |
+
# logging.error(f"Error saving workflow chat to database: {str(e)}")
|
2593 |
+
# return None, f"Error saving chat to database: {str(e)}"
|
2594 |
|
2595 |
|
2596 |
def get_workflow_chat(conversation_id):
|
App_Function_Libraries/Gradio_Related.py
CHANGED
@@ -1,420 +1,600 @@
|
|
1 |
-
# Gradio_Related.py
|
2 |
-
#########################################
|
3 |
-
# Gradio UI Functions Library
|
4 |
-
# I fucking hate Gradio.
|
5 |
-
#
|
6 |
-
#########################################
|
7 |
-
#
|
8 |
-
# Built-In Imports
|
9 |
-
import logging
|
10 |
-
import os
|
11 |
-
import webbrowser
|
12 |
-
|
13 |
-
#
|
14 |
-
|
15 |
-
|
16 |
-
#
|
17 |
-
|
18 |
-
from App_Function_Libraries.DB.
|
19 |
-
from App_Function_Libraries.Gradio_UI.
|
20 |
-
from App_Function_Libraries.Gradio_UI.
|
21 |
-
from App_Function_Libraries.Gradio_UI.
|
22 |
-
from App_Function_Libraries.Gradio_UI.
|
23 |
-
|
24 |
-
from App_Function_Libraries.Gradio_UI.
|
25 |
-
|
26 |
-
from App_Function_Libraries.Gradio_UI.
|
27 |
-
|
28 |
-
from App_Function_Libraries.Gradio_UI.
|
29 |
-
|
30 |
-
from App_Function_Libraries.Gradio_UI.
|
31 |
-
from App_Function_Libraries.Gradio_UI.
|
32 |
-
|
33 |
-
from App_Function_Libraries.Gradio_UI.
|
34 |
-
|
35 |
-
from App_Function_Libraries.Gradio_UI.
|
36 |
-
|
37 |
-
|
38 |
-
from App_Function_Libraries.Gradio_UI.
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
from App_Function_Libraries.Gradio_UI.
|
44 |
-
from App_Function_Libraries.Gradio_UI.
|
45 |
-
from App_Function_Libraries.Gradio_UI.
|
46 |
-
|
47 |
-
from App_Function_Libraries.Gradio_UI.
|
48 |
-
from App_Function_Libraries.Gradio_UI.
|
49 |
-
|
50 |
-
from App_Function_Libraries.Gradio_UI.
|
51 |
-
from App_Function_Libraries.Gradio_UI.
|
52 |
-
|
53 |
-
from App_Function_Libraries.Gradio_UI.
|
54 |
-
|
55 |
-
|
56 |
-
from App_Function_Libraries.Gradio_UI.
|
57 |
-
|
58 |
-
from App_Function_Libraries.Gradio_UI.
|
59 |
-
|
60 |
-
|
61 |
-
from App_Function_Libraries.Gradio_UI.
|
62 |
-
from App_Function_Libraries.Gradio_UI.
|
63 |
-
|
64 |
-
from App_Function_Libraries.Gradio_UI.
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
from App_Function_Libraries.Gradio_UI.
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
#
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
#
|
114 |
-
#
|
115 |
-
|
116 |
-
#
|
117 |
-
#
|
118 |
-
#
|
119 |
-
|
120 |
-
#
|
121 |
-
#
|
122 |
-
#
|
123 |
-
|
124 |
-
#
|
125 |
-
#
|
126 |
-
#
|
127 |
-
|
128 |
-
#
|
129 |
-
#
|
130 |
-
|
131 |
-
#
|
132 |
-
#
|
133 |
-
#
|
134 |
-
#
|
135 |
-
|
136 |
-
#
|
137 |
-
#
|
138 |
-
|
139 |
-
#
|
140 |
-
#
|
141 |
-
#
|
142 |
-
#
|
143 |
-
#
|
144 |
-
|
145 |
-
#
|
146 |
-
|
147 |
-
#
|
148 |
-
#
|
149 |
-
#
|
150 |
-
#
|
151 |
-
#
|
152 |
-
|
153 |
-
#
|
154 |
-
#
|
155 |
-
|
156 |
-
#
|
157 |
-
#
|
158 |
-
#
|
159 |
-
#
|
160 |
-
#
|
161 |
-
|
162 |
-
#
|
163 |
-
|
164 |
-
#
|
165 |
-
#
|
166 |
-
#
|
167 |
-
|
168 |
-
# FIXME
|
169 |
-
#
|
170 |
-
#
|
171 |
-
# End of
|
172 |
-
|
173 |
-
#
|
174 |
-
#
|
175 |
-
#
|
176 |
-
#
|
177 |
-
#
|
178 |
-
|
179 |
-
|
180 |
-
#
|
181 |
-
#
|
182 |
-
#
|
183 |
-
|
184 |
-
#
|
185 |
-
#
|
186 |
-
#
|
187 |
-
#
|
188 |
-
|
189 |
-
#
|
190 |
-
|
191 |
-
#
|
192 |
-
# Functions
|
193 |
-
#
|
194 |
-
#
|
195 |
-
#
|
196 |
-
#
|
197 |
-
|
198 |
-
#
|
199 |
-
|
200 |
-
#
|
201 |
-
# Functions
|
202 |
-
#
|
203 |
-
#
|
204 |
-
#
|
205 |
-
|
206 |
-
#
|
207 |
-
#
|
208 |
-
|
209 |
-
#
|
210 |
-
#
|
211 |
-
|
212 |
-
#
|
213 |
-
#
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
#
|
218 |
-
#
|
219 |
-
#
|
220 |
-
#
|
221 |
-
#
|
222 |
-
|
223 |
-
#
|
224 |
-
#
|
225 |
-
|
226 |
-
|
227 |
-
#
|
228 |
-
#
|
229 |
-
#
|
230 |
-
#
|
231 |
-
#
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
#
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Gradio_Related.py
|
2 |
+
#########################################
|
3 |
+
# Gradio UI Functions Library
|
4 |
+
# I fucking hate Gradio.
|
5 |
+
#
|
6 |
+
#########################################
|
7 |
+
#
|
8 |
+
# Built-In Imports
|
9 |
+
import logging
|
10 |
+
import os
|
11 |
+
import webbrowser
|
12 |
+
#
|
13 |
+
# Import 3rd-Party Libraries
|
14 |
+
import gradio as gr
|
15 |
+
#
|
16 |
+
# Local Imports
|
17 |
+
from App_Function_Libraries.DB.DB_Manager import get_db_config, backup_dir
|
18 |
+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import create_tables
|
19 |
+
from App_Function_Libraries.Gradio_UI.Anki_tab import create_anki_validation_tab, create_anki_generator_tab
|
20 |
+
from App_Function_Libraries.Gradio_UI.Arxiv_tab import create_arxiv_tab
|
21 |
+
from App_Function_Libraries.Gradio_UI.Audio_ingestion_tab import create_audio_processing_tab
|
22 |
+
from App_Function_Libraries.Gradio_UI.Backup_RAG_Notes_Character_Chat_tab import create_database_management_interface
|
23 |
+
from App_Function_Libraries.Gradio_UI.Book_Ingestion_tab import create_import_book_tab
|
24 |
+
from App_Function_Libraries.Gradio_UI.Character_Chat_tab import create_character_card_interaction_tab, create_character_chat_mgmt_tab, create_custom_character_card_tab, \
|
25 |
+
create_character_card_validation_tab, create_export_characters_tab
|
26 |
+
from App_Function_Libraries.Gradio_UI.Character_interaction_tab import create_narrator_controlled_conversation_tab, \
|
27 |
+
create_multiple_character_chat_tab
|
28 |
+
from App_Function_Libraries.Gradio_UI.Chat_ui import create_chat_interface_four, create_chat_interface_multi_api, \
|
29 |
+
create_chat_interface_stacked, create_chat_interface
|
30 |
+
from App_Function_Libraries.Gradio_UI.Config_tab import create_config_editor_tab
|
31 |
+
from App_Function_Libraries.Gradio_UI.Explain_summarize_tab import create_summarize_explain_tab
|
32 |
+
from App_Function_Libraries.Gradio_UI.Export_Functionality import create_rag_export_tab, create_export_tabs
|
33 |
+
#from App_Function_Libraries.Gradio_UI.Backup_Functionality import create_backup_tab, create_view_backups_tab, \
|
34 |
+
# create_restore_backup_tab
|
35 |
+
from App_Function_Libraries.Gradio_UI.Import_Functionality import create_import_single_prompt_tab, \
|
36 |
+
create_import_obsidian_vault_tab, create_import_item_tab, create_import_multiple_prompts_tab, \
|
37 |
+
create_conversation_import_tab
|
38 |
+
from App_Function_Libraries.Gradio_UI.Introduction_tab import create_introduction_tab
|
39 |
+
from App_Function_Libraries.Gradio_UI.Keywords import create_view_keywords_tab, create_add_keyword_tab, \
|
40 |
+
create_delete_keyword_tab, create_export_keywords_tab, create_rag_qa_keywords_tab, create_character_keywords_tab, \
|
41 |
+
create_meta_keywords_tab, create_prompt_keywords_tab
|
42 |
+
from App_Function_Libraries.Gradio_UI.Live_Recording import create_live_recording_tab
|
43 |
+
from App_Function_Libraries.Gradio_UI.Llamafile_tab import create_chat_with_llamafile_tab
|
44 |
+
#from App_Function_Libraries.Gradio_UI.MMLU_Pro_tab import create_mmlu_pro_tab
|
45 |
+
from App_Function_Libraries.Gradio_UI.Media_edit import create_prompt_clone_tab, create_prompt_edit_tab, \
|
46 |
+
create_media_edit_and_clone_tab, create_media_edit_tab
|
47 |
+
from App_Function_Libraries.Gradio_UI.Media_wiki_tab import create_mediawiki_import_tab, create_mediawiki_config_tab
|
48 |
+
from App_Function_Libraries.Gradio_UI.Mind_Map_tab import create_mindmap_tab
|
49 |
+
from App_Function_Libraries.Gradio_UI.PDF_ingestion_tab import create_pdf_ingestion_tab, create_pdf_ingestion_test_tab
|
50 |
+
from App_Function_Libraries.Gradio_UI.Plaintext_tab_import import create_plain_text_import_tab
|
51 |
+
from App_Function_Libraries.Gradio_UI.Podcast_tab import create_podcast_tab
|
52 |
+
from App_Function_Libraries.Gradio_UI.Prompt_Suggestion_tab import create_prompt_suggestion_tab
|
53 |
+
from App_Function_Libraries.Gradio_UI.RAG_QA_Chat_tab import create_rag_qa_chat_tab, create_rag_qa_notes_management_tab, \
|
54 |
+
create_rag_qa_chat_management_tab
|
55 |
+
from App_Function_Libraries.Gradio_UI.Re_summarize_tab import create_resummary_tab
|
56 |
+
from App_Function_Libraries.Gradio_UI.Search_Tab import create_prompt_search_tab, \
|
57 |
+
create_search_summaries_tab, create_search_tab
|
58 |
+
from App_Function_Libraries.Gradio_UI.RAG_Chat_tab import create_rag_tab
|
59 |
+
from App_Function_Libraries.Gradio_UI.Embeddings_tab import create_embeddings_tab, create_view_embeddings_tab, \
|
60 |
+
create_purge_embeddings_tab
|
61 |
+
from App_Function_Libraries.Gradio_UI.Semantic_Scholar_tab import create_semantic_scholar_tab
|
62 |
+
from App_Function_Libraries.Gradio_UI.Trash import create_view_trash_tab, create_empty_trash_tab, \
|
63 |
+
create_delete_trash_tab, create_search_and_mark_trash_tab
|
64 |
+
from App_Function_Libraries.Gradio_UI.Utilities import create_utilities_yt_timestamp_tab, create_utilities_yt_audio_tab, \
|
65 |
+
create_utilities_yt_video_tab
|
66 |
+
from App_Function_Libraries.Gradio_UI.Video_transcription_tab import create_video_transcription_tab
|
67 |
+
from App_Function_Libraries.Gradio_UI.View_tab import create_manage_items_tab
|
68 |
+
from App_Function_Libraries.Gradio_UI.Website_scraping_tab import create_website_scraping_tab
|
69 |
+
from App_Function_Libraries.Gradio_UI.Workflows_tab import chat_workflows_tab
|
70 |
+
from App_Function_Libraries.Gradio_UI.View_DB_Items_tab import create_view_all_mediadb_with_versions_tab, \
|
71 |
+
create_viewing_mediadb_tab, create_view_all_rag_notes_tab, create_viewing_ragdb_tab, \
|
72 |
+
create_mediadb_keyword_search_tab, create_ragdb_keyword_items_tab
|
73 |
+
from App_Function_Libraries.Gradio_UI.Prompts_tab import create_prompt_view_tab, create_prompts_export_tab
|
74 |
+
#
|
75 |
+
# Gradio UI Imports
|
76 |
+
from App_Function_Libraries.Gradio_UI.Evaluations_Benchmarks_tab import create_geval_tab, create_infinite_bench_tab
|
77 |
+
from App_Function_Libraries.Gradio_UI.XML_Ingestion_Tab import create_xml_import_tab
|
78 |
+
#from App_Function_Libraries.Local_LLM.Local_LLM_huggingface import create_huggingface_tab
|
79 |
+
from App_Function_Libraries.Local_LLM.Local_LLM_ollama import create_ollama_tab
|
80 |
+
from App_Function_Libraries.Utils.Utils import load_and_log_configs
|
81 |
+
|
82 |
+
#
|
83 |
+
#######################################################################################################################
|
84 |
+
# Function Definitions
|
85 |
+
#
|
86 |
+
|
87 |
+
|
88 |
+
# Disable Gradio Analytics
|
89 |
+
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
|
90 |
+
|
91 |
+
|
92 |
+
custom_prompt_input = None
|
93 |
+
server_mode = False
|
94 |
+
share_public = False
|
95 |
+
custom_prompt_summarize_bulleted_notes = ("""
|
96 |
+
<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
97 |
+
**Bulleted Note Creation Guidelines**
|
98 |
+
|
99 |
+
**Headings**:
|
100 |
+
- Based on referenced topics, not categories like quotes or terms
|
101 |
+
- Surrounded by **bold** formatting
|
102 |
+
- Not listed as bullet points
|
103 |
+
- No space between headings and list items underneath
|
104 |
+
|
105 |
+
**Emphasis**:
|
106 |
+
- **Important terms** set in bold font
|
107 |
+
- **Text ending in a colon**: also bolded
|
108 |
+
|
109 |
+
**Review**:
|
110 |
+
- Ensure adherence to specified format
|
111 |
+
- Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
|
112 |
+
""")
|
113 |
+
#
|
114 |
+
# End of globals
|
115 |
+
#######################################################################################################################
|
116 |
+
#
|
117 |
+
# Start of Video/Audio Transcription and Summarization Functions
|
118 |
+
#
|
119 |
+
# Functions:
|
120 |
+
# FIXME
|
121 |
+
#
|
122 |
+
#
|
123 |
+
################################################################################################################
|
124 |
+
# Functions for Re-Summarization
|
125 |
+
#
|
126 |
+
# Functions:
|
127 |
+
# FIXME
|
128 |
+
# End of Re-Summarization Functions
|
129 |
+
#
|
130 |
+
############################################################################################################################################################################################################################
|
131 |
+
#
|
132 |
+
# Explain/Summarize This Tab
|
133 |
+
#
|
134 |
+
# Functions:
|
135 |
+
# FIXME
|
136 |
+
#
|
137 |
+
#
|
138 |
+
############################################################################################################################################################################################################################
|
139 |
+
#
|
140 |
+
# Transcript Comparison Tab
|
141 |
+
#
|
142 |
+
# Functions:
|
143 |
+
# FIXME
|
144 |
+
#
|
145 |
+
#
|
146 |
+
###########################################################################################################################################################################################################################
|
147 |
+
#
|
148 |
+
# Search Tab
|
149 |
+
#
|
150 |
+
# Functions:
|
151 |
+
# FIXME
|
152 |
+
#
|
153 |
+
# End of Search Tab Functions
|
154 |
+
#
|
155 |
+
##############################################################################################################################################################################################################################
|
156 |
+
#
|
157 |
+
# Llamafile Tab
|
158 |
+
#
|
159 |
+
# Functions:
|
160 |
+
# FIXME
|
161 |
+
#
|
162 |
+
# End of Llamafile Tab Functions
|
163 |
+
##############################################################################################################################################################################################################################
|
164 |
+
#
|
165 |
+
# Chat Interface Tab Functions
|
166 |
+
#
|
167 |
+
# Functions:
|
168 |
+
# FIXME
|
169 |
+
#
|
170 |
+
#
|
171 |
+
# End of Chat Interface Tab Functions
|
172 |
+
################################################################################################################################################################################################################################
|
173 |
+
#
|
174 |
+
# Media Edit Tab Functions
|
175 |
+
# Functions:
|
176 |
+
# Fixme
|
177 |
+
# create_media_edit_tab():
|
178 |
+
##### Trash Tab
|
179 |
+
# FIXME
|
180 |
+
# Functions:
|
181 |
+
#
|
182 |
+
# End of Media Edit Tab Functions
|
183 |
+
################################################################################################################
|
184 |
+
#
|
185 |
+
# Import Items Tab Functions
|
186 |
+
#
|
187 |
+
# Functions:
|
188 |
+
#FIXME
|
189 |
+
# End of Import Items Tab Functions
|
190 |
+
################################################################################################################
|
191 |
+
#
|
192 |
+
# Export Items Tab Functions
|
193 |
+
#
|
194 |
+
# Functions:
|
195 |
+
# FIXME
|
196 |
+
#
|
197 |
+
#
|
198 |
+
# End of Export Items Tab Functions
|
199 |
+
################################################################################################################
|
200 |
+
#
|
201 |
+
# Keyword Management Tab Functions
|
202 |
+
#
|
203 |
+
# Functions:
|
204 |
+
# create_view_keywords_tab():
|
205 |
+
# FIXME
|
206 |
+
#
|
207 |
+
# End of Keyword Management Tab Functions
|
208 |
+
################################################################################################################
|
209 |
+
#
|
210 |
+
# Document Editing Tab Functions
|
211 |
+
#
|
212 |
+
# Functions:
|
213 |
+
# #FIXME
|
214 |
+
#
|
215 |
+
#
|
216 |
+
################################################################################################################
|
217 |
+
#
|
218 |
+
# Utilities Tab Functions
|
219 |
+
# Functions:
|
220 |
+
# create_utilities_yt_video_tab():
|
221 |
+
# #FIXME
|
222 |
+
|
223 |
+
#
|
224 |
+
# End of Utilities Tab Functions
|
225 |
+
################################################################################################################
|
226 |
+
|
227 |
+
# FIXME - Prompt sample box
|
228 |
+
#
|
229 |
+
# # Sample data
|
230 |
+
# prompts_category_1 = [
|
231 |
+
# "What are the key points discussed in the video?",
|
232 |
+
# "Summarize the main arguments made by the speaker.",
|
233 |
+
# "Describe the conclusions of the study presented."
|
234 |
+
# ]
|
235 |
+
#
|
236 |
+
# prompts_category_2 = [
|
237 |
+
# "How does the proposed solution address the problem?",
|
238 |
+
# "What are the implications of the findings?",
|
239 |
+
# "Can you explain the theory behind the observed phenomenon?"
|
240 |
+
# ]
|
241 |
+
#
|
242 |
+
# all_prompts2 = prompts_category_1 + prompts_category_2
|
243 |
+
|
244 |
+
|
245 |
+
|
246 |
+
#######################################################################################################################
|
247 |
+
#
|
248 |
+
# Migration Script
|
249 |
+
import sqlite3
|
250 |
+
import uuid
|
251 |
+
import logging
|
252 |
+
import os
|
253 |
+
from datetime import datetime
|
254 |
+
import shutil
|
255 |
+
|
256 |
+
# def migrate_media_db_to_rag_chat_db(media_db_path, rag_chat_db_path):
|
257 |
+
# # Check if migration is needed
|
258 |
+
# if not os.path.exists(media_db_path):
|
259 |
+
# logging.info("Media DB does not exist. No migration needed.")
|
260 |
+
# return
|
261 |
+
#
|
262 |
+
# # Optional: Check if migration has already been completed
|
263 |
+
# migration_flag = os.path.join(os.path.dirname(rag_chat_db_path), 'migration_completed.flag')
|
264 |
+
# if os.path.exists(migration_flag):
|
265 |
+
# logging.info("Migration already completed. Skipping migration.")
|
266 |
+
# return
|
267 |
+
#
|
268 |
+
# # Backup databases
|
269 |
+
# backup_database(media_db_path)
|
270 |
+
# backup_database(rag_chat_db_path)
|
271 |
+
#
|
272 |
+
# # Connect to both databases
|
273 |
+
# try:
|
274 |
+
# media_conn = sqlite3.connect(media_db_path)
|
275 |
+
# rag_conn = sqlite3.connect(rag_chat_db_path)
|
276 |
+
#
|
277 |
+
# # Enable foreign key support
|
278 |
+
# media_conn.execute('PRAGMA foreign_keys = ON;')
|
279 |
+
# rag_conn.execute('PRAGMA foreign_keys = ON;')
|
280 |
+
#
|
281 |
+
# media_cursor = media_conn.cursor()
|
282 |
+
# rag_cursor = rag_conn.cursor()
|
283 |
+
#
|
284 |
+
# # Begin transaction
|
285 |
+
# rag_conn.execute('BEGIN TRANSACTION;')
|
286 |
+
#
|
287 |
+
# # Extract conversations from media DB
|
288 |
+
# media_cursor.execute('''
|
289 |
+
# SELECT id, media_id, media_name, conversation_name, created_at, updated_at
|
290 |
+
# FROM ChatConversations
|
291 |
+
# ''')
|
292 |
+
# conversations = media_cursor.fetchall()
|
293 |
+
#
|
294 |
+
# for conv in conversations:
|
295 |
+
# old_conv_id, media_id, media_name, conversation_name, created_at, updated_at = conv
|
296 |
+
#
|
297 |
+
# # Convert timestamps if necessary
|
298 |
+
# created_at = parse_timestamp(created_at)
|
299 |
+
# updated_at = parse_timestamp(updated_at)
|
300 |
+
#
|
301 |
+
# # Generate a new conversation_id
|
302 |
+
# conversation_id = str(uuid.uuid4())
|
303 |
+
# title = conversation_name or (f"{media_name}-chat" if media_name else "Untitled Conversation")
|
304 |
+
#
|
305 |
+
# # Insert into conversation_metadata
|
306 |
+
# rag_cursor.execute('''
|
307 |
+
# INSERT INTO conversation_metadata (conversation_id, created_at, last_updated, title, media_id)
|
308 |
+
# VALUES (?, ?, ?, ?, ?)
|
309 |
+
# ''', (conversation_id, created_at, updated_at, title, media_id))
|
310 |
+
#
|
311 |
+
# # Extract messages from media DB
|
312 |
+
# media_cursor.execute('''
|
313 |
+
# SELECT sender, message, timestamp
|
314 |
+
# FROM ChatMessages
|
315 |
+
# WHERE conversation_id = ?
|
316 |
+
# ORDER BY timestamp ASC
|
317 |
+
# ''', (old_conv_id,))
|
318 |
+
# messages = media_cursor.fetchall()
|
319 |
+
#
|
320 |
+
# for msg in messages:
|
321 |
+
# sender, content, timestamp = msg
|
322 |
+
#
|
323 |
+
# # Convert timestamp if necessary
|
324 |
+
# timestamp = parse_timestamp(timestamp)
|
325 |
+
#
|
326 |
+
# role = sender # Assuming 'sender' is 'user' or 'ai'
|
327 |
+
#
|
328 |
+
# # Insert message into rag_qa_chats
|
329 |
+
# rag_cursor.execute('''
|
330 |
+
# INSERT INTO rag_qa_chats (conversation_id, timestamp, role, content)
|
331 |
+
# VALUES (?, ?, ?, ?)
|
332 |
+
# ''', (conversation_id, timestamp, role, content))
|
333 |
+
#
|
334 |
+
# # Commit transaction
|
335 |
+
# rag_conn.commit()
|
336 |
+
# logging.info("Migration completed successfully.")
|
337 |
+
#
|
338 |
+
# # Mark migration as complete
|
339 |
+
# with open(migration_flag, 'w') as f:
|
340 |
+
# f.write('Migration completed on ' + datetime.now().isoformat())
|
341 |
+
#
|
342 |
+
# except Exception as e:
|
343 |
+
# # Rollback transaction in case of error
|
344 |
+
# rag_conn.rollback()
|
345 |
+
# logging.error(f"Error during migration: {e}")
|
346 |
+
# raise
|
347 |
+
# finally:
|
348 |
+
# media_conn.close()
|
349 |
+
# rag_conn.close()
|
350 |
+
|
351 |
+
def backup_database(db_path):
|
352 |
+
backup_path = db_path + '.backup'
|
353 |
+
if not os.path.exists(backup_path):
|
354 |
+
shutil.copyfile(db_path, backup_path)
|
355 |
+
logging.info(f"Database backed up to {backup_path}")
|
356 |
+
else:
|
357 |
+
logging.info(f"Backup already exists at {backup_path}")
|
358 |
+
|
359 |
+
def parse_timestamp(timestamp_value):
|
360 |
+
"""
|
361 |
+
Parses the timestamp from the old database and converts it to a standard format.
|
362 |
+
Adjust this function based on the actual format of your timestamps.
|
363 |
+
"""
|
364 |
+
try:
|
365 |
+
# Attempt to parse ISO format
|
366 |
+
return datetime.fromisoformat(timestamp_value).isoformat()
|
367 |
+
except ValueError:
|
368 |
+
# Handle other timestamp formats if necessary
|
369 |
+
# For example, if timestamps are in Unix epoch format
|
370 |
+
try:
|
371 |
+
timestamp_float = float(timestamp_value)
|
372 |
+
return datetime.fromtimestamp(timestamp_float).isoformat()
|
373 |
+
except ValueError:
|
374 |
+
# Default to current time if parsing fails
|
375 |
+
logging.warning(f"Unable to parse timestamp '{timestamp_value}', using current time.")
|
376 |
+
return datetime.now().isoformat()
|
377 |
+
|
378 |
+
#
|
379 |
+
# End of Migration Script
|
380 |
+
#######################################################################################################################
|
381 |
+
|
382 |
+
|
383 |
+
#######################################################################################################################
|
384 |
+
#
|
385 |
+
# Launch UI Function
|
386 |
+
def launch_ui(share_public=None, server_mode=False):
|
387 |
+
webbrowser.open_new_tab('http://127.0.0.1:7860/?__theme=dark')
|
388 |
+
share=share_public
|
389 |
+
css = """
|
390 |
+
.result-box {
|
391 |
+
margin-bottom: 20px;
|
392 |
+
border: 1px solid #ddd;
|
393 |
+
padding: 10px;
|
394 |
+
}
|
395 |
+
.result-box.error {
|
396 |
+
border-color: #ff0000;
|
397 |
+
background-color: #ffeeee;
|
398 |
+
}
|
399 |
+
.transcription, .summary {
|
400 |
+
max-height: 800px;
|
401 |
+
overflow-y: auto;
|
402 |
+
border: 1px solid #eee;
|
403 |
+
padding: 10px;
|
404 |
+
margin-top: 10px;
|
405 |
+
}
|
406 |
+
"""
|
407 |
+
|
408 |
+
config = load_and_log_configs()
|
409 |
+
# Get database paths from config
|
410 |
+
db_config = config['db_config']
|
411 |
+
media_db_path = db_config['sqlite_path']
|
412 |
+
character_chat_db_path = os.path.join(os.path.dirname(media_db_path), "chatDB.db")
|
413 |
+
rag_chat_db_path = os.path.join(os.path.dirname(media_db_path), "rag_qa.db")
|
414 |
+
# Initialize the RAG Chat DB (create tables and update schema)
|
415 |
+
create_tables()
|
416 |
+
|
417 |
+
# Migrate data from the media DB to the RAG Chat DB
|
418 |
+
#migrate_media_db_to_rag_chat_db(media_db_path, rag_chat_db_path)
|
419 |
+
|
420 |
+
|
421 |
+
with gr.Blocks(theme='bethecloud/storj_theme',css=css) as iface:
|
422 |
+
gr.HTML(
|
423 |
+
"""
|
424 |
+
<script>
|
425 |
+
document.addEventListener('DOMContentLoaded', (event) => {
|
426 |
+
document.body.classList.add('dark');
|
427 |
+
document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)';
|
428 |
+
});
|
429 |
+
</script>
|
430 |
+
"""
|
431 |
+
)
|
432 |
+
db_config = get_db_config()
|
433 |
+
db_type = db_config['type']
|
434 |
+
gr.Markdown(f"# tl/dw: Your LLM-powered Research Multi-tool")
|
435 |
+
gr.Markdown(f"(Using {db_type.capitalize()} Database)")
|
436 |
+
with gr.Tabs():
|
437 |
+
with gr.TabItem("Transcribe / Analyze / Ingestion", id="ingestion-grouping", visible=True):
|
438 |
+
with gr.Tabs():
|
439 |
+
create_video_transcription_tab()
|
440 |
+
create_audio_processing_tab()
|
441 |
+
create_podcast_tab()
|
442 |
+
create_import_book_tab()
|
443 |
+
create_plain_text_import_tab()
|
444 |
+
create_xml_import_tab()
|
445 |
+
create_website_scraping_tab()
|
446 |
+
create_pdf_ingestion_tab()
|
447 |
+
create_pdf_ingestion_test_tab()
|
448 |
+
create_resummary_tab()
|
449 |
+
create_summarize_explain_tab()
|
450 |
+
create_live_recording_tab()
|
451 |
+
create_arxiv_tab()
|
452 |
+
create_semantic_scholar_tab()
|
453 |
+
|
454 |
+
with gr.TabItem("RAG Chat/Search", id="RAG Chat Notes group", visible=True):
|
455 |
+
create_rag_tab()
|
456 |
+
create_rag_qa_chat_tab()
|
457 |
+
create_rag_qa_notes_management_tab()
|
458 |
+
create_rag_qa_chat_management_tab()
|
459 |
+
|
460 |
+
with gr.TabItem("Chat with an LLM", id="LLM Chat group", visible=True):
|
461 |
+
create_chat_interface()
|
462 |
+
create_chat_interface_stacked()
|
463 |
+
create_chat_interface_multi_api()
|
464 |
+
create_chat_interface_four()
|
465 |
+
chat_workflows_tab()
|
466 |
+
|
467 |
+
with gr.TabItem("Character Chat", id="character chat group", visible=True):
|
468 |
+
create_character_card_interaction_tab()
|
469 |
+
create_character_chat_mgmt_tab()
|
470 |
+
create_custom_character_card_tab()
|
471 |
+
create_character_card_validation_tab()
|
472 |
+
create_multiple_character_chat_tab()
|
473 |
+
create_narrator_controlled_conversation_tab()
|
474 |
+
create_export_characters_tab()
|
475 |
+
|
476 |
+
with gr.TabItem("Writing Tools", id="writing_tools group", visible=True):
|
477 |
+
from App_Function_Libraries.Gradio_UI.Writing_tab import create_document_feedback_tab
|
478 |
+
create_document_feedback_tab()
|
479 |
+
from App_Function_Libraries.Gradio_UI.Writing_tab import create_grammar_style_check_tab
|
480 |
+
create_grammar_style_check_tab()
|
481 |
+
from App_Function_Libraries.Gradio_UI.Writing_tab import create_tone_adjustment_tab
|
482 |
+
create_tone_adjustment_tab()
|
483 |
+
from App_Function_Libraries.Gradio_UI.Writing_tab import create_creative_writing_tab
|
484 |
+
create_creative_writing_tab()
|
485 |
+
from App_Function_Libraries.Gradio_UI.Writing_tab import create_mikupad_tab
|
486 |
+
create_mikupad_tab()
|
487 |
+
|
488 |
+
with gr.TabItem("Search/View DB Items", id="view db items group", visible=True):
|
489 |
+
create_search_tab()
|
490 |
+
create_search_summaries_tab()
|
491 |
+
create_view_all_mediadb_with_versions_tab()
|
492 |
+
create_viewing_mediadb_tab()
|
493 |
+
create_mediadb_keyword_search_tab()
|
494 |
+
create_view_all_rag_notes_tab()
|
495 |
+
create_viewing_ragdb_tab()
|
496 |
+
create_ragdb_keyword_items_tab()
|
497 |
+
|
498 |
+
with gr.TabItem("Prompts", id='view prompts group', visible=True):
|
499 |
+
with gr.Tabs():
|
500 |
+
create_prompt_view_tab()
|
501 |
+
create_prompt_search_tab()
|
502 |
+
create_prompt_edit_tab()
|
503 |
+
create_prompt_clone_tab()
|
504 |
+
create_prompt_suggestion_tab()
|
505 |
+
create_prompts_export_tab()
|
506 |
+
|
507 |
+
with gr.TabItem("Manage Media DB Items", id="manage group", visible=True):
|
508 |
+
create_media_edit_tab()
|
509 |
+
create_manage_items_tab()
|
510 |
+
create_media_edit_and_clone_tab()
|
511 |
+
|
512 |
+
with gr.TabItem("Embeddings Management", id="embeddings group", visible=True):
|
513 |
+
create_embeddings_tab()
|
514 |
+
create_view_embeddings_tab()
|
515 |
+
create_purge_embeddings_tab()
|
516 |
+
|
517 |
+
with gr.TabItem("Keywords", id="keywords group", visible=True):
|
518 |
+
create_view_keywords_tab()
|
519 |
+
create_add_keyword_tab()
|
520 |
+
create_delete_keyword_tab()
|
521 |
+
create_export_keywords_tab()
|
522 |
+
create_character_keywords_tab()
|
523 |
+
create_rag_qa_keywords_tab()
|
524 |
+
create_meta_keywords_tab()
|
525 |
+
create_prompt_keywords_tab()
|
526 |
+
|
527 |
+
with gr.TabItem("Import", id="import group", visible=True):
|
528 |
+
create_import_item_tab()
|
529 |
+
create_import_obsidian_vault_tab()
|
530 |
+
create_import_single_prompt_tab()
|
531 |
+
create_import_multiple_prompts_tab()
|
532 |
+
create_mediawiki_import_tab()
|
533 |
+
create_mediawiki_config_tab()
|
534 |
+
create_conversation_import_tab()
|
535 |
+
|
536 |
+
with gr.TabItem("Export", id="export group", visible=True):
|
537 |
+
create_export_tabs()
|
538 |
+
|
539 |
+
|
540 |
+
with gr.TabItem("Database Management", id="database_management_group", visible=True):
|
541 |
+
create_database_management_interface(
|
542 |
+
media_db_config={
|
543 |
+
'db_path': media_db_path,
|
544 |
+
'backup_dir': backup_dir
|
545 |
+
},
|
546 |
+
rag_db_config={
|
547 |
+
'db_path': rag_chat_db_path,
|
548 |
+
'backup_dir': backup_dir
|
549 |
+
},
|
550 |
+
char_db_config={
|
551 |
+
'db_path': character_chat_db_path,
|
552 |
+
'backup_dir': backup_dir
|
553 |
+
}
|
554 |
+
)
|
555 |
+
|
556 |
+
with gr.TabItem("Utilities", id="util group", visible=True):
|
557 |
+
create_mindmap_tab()
|
558 |
+
create_utilities_yt_video_tab()
|
559 |
+
create_utilities_yt_audio_tab()
|
560 |
+
create_utilities_yt_timestamp_tab()
|
561 |
+
|
562 |
+
with gr.TabItem("Anki Deck Creation/Validation", id="anki group", visible=True):
|
563 |
+
create_anki_generator_tab()
|
564 |
+
create_anki_validation_tab()
|
565 |
+
|
566 |
+
with gr.TabItem("Local LLM", id="local llm group", visible=True):
|
567 |
+
create_chat_with_llamafile_tab()
|
568 |
+
create_ollama_tab()
|
569 |
+
#create_huggingface_tab()
|
570 |
+
|
571 |
+
with gr.TabItem("Trashcan", id="trashcan group", visible=True):
|
572 |
+
create_search_and_mark_trash_tab()
|
573 |
+
create_view_trash_tab()
|
574 |
+
create_delete_trash_tab()
|
575 |
+
create_empty_trash_tab()
|
576 |
+
|
577 |
+
with gr.TabItem("Evaluations", id="eval", visible=True):
|
578 |
+
create_geval_tab()
|
579 |
+
create_infinite_bench_tab()
|
580 |
+
# FIXME
|
581 |
+
#create_mmlu_pro_tab()
|
582 |
+
|
583 |
+
with gr.TabItem("Introduction/Help", id="introduction group", visible=True):
|
584 |
+
create_introduction_tab()
|
585 |
+
|
586 |
+
with gr.TabItem("Config Editor", id="config group"):
|
587 |
+
create_config_editor_tab()
|
588 |
+
|
589 |
+
# Launch the interface
|
590 |
+
server_port_variable = 7860
|
591 |
+
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
|
592 |
+
if share==True:
|
593 |
+
iface.launch(share=True)
|
594 |
+
elif server_mode and not share_public:
|
595 |
+
iface.launch(share=False, server_name="0.0.0.0", server_port=server_port_variable, )
|
596 |
+
else:
|
597 |
+
try:
|
598 |
+
iface.launch(share=False, server_name="0.0.0.0", server_port=server_port_variable, )
|
599 |
+
except Exception as e:
|
600 |
+
logging.error(f"Error launching interface: {str(e)}")
|
App_Function_Libraries/Gradio_UI/Anki_tab.py
ADDED
@@ -0,0 +1,921 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Anki_Validation_tab.py
|
2 |
+
# Description: Gradio functions for the Anki Validation tab
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
import json
|
6 |
+
import logging
|
7 |
+
import os
|
8 |
+
import tempfile
|
9 |
+
from typing import Optional, Tuple, List, Dict
|
10 |
+
#
|
11 |
+
# External Imports
|
12 |
+
import genanki
|
13 |
+
import gradio as gr
|
14 |
+
#
|
15 |
+
# Local Imports
|
16 |
+
from App_Function_Libraries.Chat.Chat_Functions import approximate_token_count, update_chat_content, save_chat_history, \
|
17 |
+
save_chat_history_to_db_wrapper
|
18 |
+
from App_Function_Libraries.DB.DB_Manager import list_prompts
|
19 |
+
from App_Function_Libraries.Gradio_UI.Chat_ui import update_dropdown_multiple, chat_wrapper, update_selected_parts, \
|
20 |
+
search_conversations, regenerate_last_message, load_conversation, debug_output
|
21 |
+
from App_Function_Libraries.Third_Party.Anki import sanitize_html, generate_card_choices, \
|
22 |
+
export_cards, load_card_for_editing, handle_file_upload, \
|
23 |
+
validate_for_ui, update_card_with_validation, update_card_choices, enhanced_file_upload, \
|
24 |
+
handle_validation
|
25 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
26 |
+
#
|
27 |
+
############################################################################################################
|
28 |
+
#
|
29 |
+
# Functions:
|
30 |
+
|
31 |
+
def create_anki_validation_tab():
|
32 |
+
with gr.TabItem("Anki Flashcard Validation", visible=True):
|
33 |
+
gr.Markdown("# Anki Flashcard Validation and Editor")
|
34 |
+
|
35 |
+
# State variables for internal tracking
|
36 |
+
current_card_data = gr.State({})
|
37 |
+
preview_update_flag = gr.State(False)
|
38 |
+
|
39 |
+
with gr.Row():
|
40 |
+
# Left Column: Input and Validation
|
41 |
+
with gr.Column(scale=1):
|
42 |
+
gr.Markdown("## Import or Create Flashcards")
|
43 |
+
|
44 |
+
input_type = gr.Radio(
|
45 |
+
choices=["JSON", "APKG"],
|
46 |
+
label="Input Type",
|
47 |
+
value="JSON"
|
48 |
+
)
|
49 |
+
|
50 |
+
with gr.Group() as json_input_group:
|
51 |
+
flashcard_input = gr.TextArea(
|
52 |
+
label="Enter Flashcards (JSON format)",
|
53 |
+
placeholder='''{
|
54 |
+
"cards": [
|
55 |
+
{
|
56 |
+
"id": "CARD_001",
|
57 |
+
"type": "basic",
|
58 |
+
"front": "What is the capital of France?",
|
59 |
+
"back": "Paris",
|
60 |
+
"tags": ["geography", "europe"],
|
61 |
+
"note": "Remember: City of Light"
|
62 |
+
}
|
63 |
+
]
|
64 |
+
}''',
|
65 |
+
lines=10
|
66 |
+
)
|
67 |
+
|
68 |
+
import_json = gr.File(
|
69 |
+
label="Or Import JSON File",
|
70 |
+
file_types=[".json"]
|
71 |
+
)
|
72 |
+
|
73 |
+
with gr.Group(visible=False) as apkg_input_group:
|
74 |
+
import_apkg = gr.File(
|
75 |
+
label="Import APKG File",
|
76 |
+
file_types=[".apkg"]
|
77 |
+
)
|
78 |
+
deck_info = gr.JSON(
|
79 |
+
label="Deck Information",
|
80 |
+
visible=False
|
81 |
+
)
|
82 |
+
|
83 |
+
validate_button = gr.Button("Validate Flashcards")
|
84 |
+
|
85 |
+
# Right Column: Validation Results and Editor
|
86 |
+
with gr.Column(scale=1):
|
87 |
+
gr.Markdown("## Validation Results")
|
88 |
+
validation_status = gr.Markdown("")
|
89 |
+
|
90 |
+
with gr.Accordion("Validation Rules", open=False):
|
91 |
+
gr.Markdown("""
|
92 |
+
### Required Fields:
|
93 |
+
- Unique ID
|
94 |
+
- Card Type (basic, cloze, reverse)
|
95 |
+
- Front content
|
96 |
+
- Back content
|
97 |
+
- At least one tag
|
98 |
+
|
99 |
+
### Content Rules:
|
100 |
+
- No empty fields
|
101 |
+
- Front side should be a clear question/prompt
|
102 |
+
- Back side should contain complete answer
|
103 |
+
- Cloze deletions must have valid syntax
|
104 |
+
- No duplicate IDs
|
105 |
+
|
106 |
+
### Image Rules:
|
107 |
+
- Valid image tags
|
108 |
+
- Supported formats (JPG, PNG, GIF)
|
109 |
+
- Base64 encoded or valid URL
|
110 |
+
|
111 |
+
### APKG-specific Rules:
|
112 |
+
- Valid SQLite database structure
|
113 |
+
- Media files properly referenced
|
114 |
+
- Note types match Anki standards
|
115 |
+
- Card templates are well-formed
|
116 |
+
""")
|
117 |
+
|
118 |
+
with gr.Row():
|
119 |
+
# Card Editor
|
120 |
+
gr.Markdown("## Card Editor")
|
121 |
+
with gr.Row():
|
122 |
+
with gr.Column(scale=1):
|
123 |
+
with gr.Accordion("Edit Individual Cards", open=True):
|
124 |
+
card_selector = gr.Dropdown(
|
125 |
+
label="Select Card to Edit",
|
126 |
+
choices=[],
|
127 |
+
interactive=True
|
128 |
+
)
|
129 |
+
|
130 |
+
card_type = gr.Radio(
|
131 |
+
choices=["basic", "cloze", "reverse"],
|
132 |
+
label="Card Type",
|
133 |
+
value="basic"
|
134 |
+
)
|
135 |
+
|
136 |
+
# Front content with preview
|
137 |
+
with gr.Group():
|
138 |
+
gr.Markdown("### Front Content")
|
139 |
+
front_content = gr.TextArea(
|
140 |
+
label="Content (HTML supported)",
|
141 |
+
lines=3
|
142 |
+
)
|
143 |
+
front_preview = gr.HTML(
|
144 |
+
label="Preview"
|
145 |
+
)
|
146 |
+
|
147 |
+
# Back content with preview
|
148 |
+
with gr.Group():
|
149 |
+
gr.Markdown("### Back Content")
|
150 |
+
back_content = gr.TextArea(
|
151 |
+
label="Content (HTML supported)",
|
152 |
+
lines=3
|
153 |
+
)
|
154 |
+
back_preview = gr.HTML(
|
155 |
+
label="Preview"
|
156 |
+
)
|
157 |
+
|
158 |
+
tags_input = gr.TextArea(
|
159 |
+
label="Tags (comma-separated)",
|
160 |
+
lines=1
|
161 |
+
)
|
162 |
+
|
163 |
+
notes_input = gr.TextArea(
|
164 |
+
label="Additional Notes",
|
165 |
+
lines=2
|
166 |
+
)
|
167 |
+
|
168 |
+
with gr.Row():
|
169 |
+
update_card_button = gr.Button("Update Card")
|
170 |
+
delete_card_button = gr.Button("Delete Card", variant="stop")
|
171 |
+
|
172 |
+
with gr.Row():
|
173 |
+
with gr.Column(scale=1):
|
174 |
+
# Export Options
|
175 |
+
gr.Markdown("## Export Options")
|
176 |
+
export_format = gr.Radio(
|
177 |
+
choices=["Anki CSV", "JSON", "Plain Text"],
|
178 |
+
label="Export Format",
|
179 |
+
value="Anki CSV"
|
180 |
+
)
|
181 |
+
export_button = gr.Button("Export Valid Cards")
|
182 |
+
export_file = gr.File(label="Download Validated Cards")
|
183 |
+
export_status = gr.Markdown("")
|
184 |
+
with gr.Column(scale=1):
|
185 |
+
gr.Markdown("## Export Instructions")
|
186 |
+
gr.Markdown("""
|
187 |
+
### Anki CSV Format:
|
188 |
+
- Front, Back, Tags, Type, Note
|
189 |
+
- Use for importing into Anki
|
190 |
+
- Images preserved as HTML
|
191 |
+
|
192 |
+
### JSON Format:
|
193 |
+
- JSON array of cards
|
194 |
+
- Images as base64 or URLs
|
195 |
+
- Use for custom processing
|
196 |
+
|
197 |
+
### Plain Text Format:
|
198 |
+
- Question and Answer pairs
|
199 |
+
- Images represented as [IMG] placeholder
|
200 |
+
- Use for manual review
|
201 |
+
""")
|
202 |
+
|
203 |
+
def update_preview(content):
|
204 |
+
"""Update preview with sanitized content."""
|
205 |
+
if not content:
|
206 |
+
return ""
|
207 |
+
return sanitize_html(content)
|
208 |
+
|
209 |
+
# Event handlers
|
210 |
+
def validation_chain(content: str) -> Tuple[str, List[str]]:
|
211 |
+
"""Combined validation and card choice update."""
|
212 |
+
validation_message = validate_for_ui(content)
|
213 |
+
card_choices = update_card_choices(content)
|
214 |
+
return validation_message, card_choices
|
215 |
+
|
216 |
+
def delete_card(card_selection, current_content):
|
217 |
+
"""Delete selected card and return updated content."""
|
218 |
+
if not card_selection or not current_content:
|
219 |
+
return current_content, "No card selected", []
|
220 |
+
|
221 |
+
try:
|
222 |
+
data = json.loads(current_content)
|
223 |
+
selected_id = card_selection.split(" - ")[0]
|
224 |
+
|
225 |
+
data['cards'] = [card for card in data['cards'] if card['id'] != selected_id]
|
226 |
+
new_content = json.dumps(data, indent=2)
|
227 |
+
|
228 |
+
return (
|
229 |
+
new_content,
|
230 |
+
"Card deleted successfully!",
|
231 |
+
generate_card_choices(new_content)
|
232 |
+
)
|
233 |
+
|
234 |
+
except Exception as e:
|
235 |
+
return current_content, f"Error deleting card: {str(e)}", []
|
236 |
+
|
237 |
+
def process_validation_result(is_valid, message):
|
238 |
+
"""Process validation result into a formatted markdown string."""
|
239 |
+
if is_valid:
|
240 |
+
return f"✅ {message}"
|
241 |
+
else:
|
242 |
+
return f"❌ {message}"
|
243 |
+
|
244 |
+
# Register event handlers
|
245 |
+
input_type.change(
|
246 |
+
fn=lambda t: (
|
247 |
+
gr.update(visible=t == "JSON"),
|
248 |
+
gr.update(visible=t == "APKG"),
|
249 |
+
gr.update(visible=t == "APKG")
|
250 |
+
),
|
251 |
+
inputs=[input_type],
|
252 |
+
outputs=[json_input_group, apkg_input_group, deck_info]
|
253 |
+
)
|
254 |
+
|
255 |
+
# File upload handlers
|
256 |
+
import_json.upload(
|
257 |
+
fn=handle_file_upload,
|
258 |
+
inputs=[import_json, input_type],
|
259 |
+
outputs=[
|
260 |
+
flashcard_input,
|
261 |
+
deck_info,
|
262 |
+
validation_status,
|
263 |
+
card_selector
|
264 |
+
]
|
265 |
+
)
|
266 |
+
|
267 |
+
import_apkg.upload(
|
268 |
+
fn=enhanced_file_upload,
|
269 |
+
inputs=[import_apkg, input_type],
|
270 |
+
outputs=[
|
271 |
+
flashcard_input,
|
272 |
+
deck_info,
|
273 |
+
validation_status,
|
274 |
+
card_selector
|
275 |
+
]
|
276 |
+
)
|
277 |
+
|
278 |
+
# Validation handler
|
279 |
+
validate_button.click(
|
280 |
+
fn=lambda content, input_format: (
|
281 |
+
handle_validation(content, input_format),
|
282 |
+
generate_card_choices(content) if content else []
|
283 |
+
),
|
284 |
+
inputs=[flashcard_input, input_type],
|
285 |
+
outputs=[validation_status, card_selector]
|
286 |
+
)
|
287 |
+
|
288 |
+
# Card editing handlers
|
289 |
+
# Card selector change event
|
290 |
+
card_selector.change(
|
291 |
+
fn=load_card_for_editing,
|
292 |
+
inputs=[card_selector, flashcard_input],
|
293 |
+
outputs=[
|
294 |
+
card_type,
|
295 |
+
front_content,
|
296 |
+
back_content,
|
297 |
+
tags_input,
|
298 |
+
notes_input,
|
299 |
+
front_preview,
|
300 |
+
back_preview
|
301 |
+
]
|
302 |
+
)
|
303 |
+
|
304 |
+
# Live preview updates
|
305 |
+
front_content.change(
|
306 |
+
fn=update_preview,
|
307 |
+
inputs=[front_content],
|
308 |
+
outputs=[front_preview]
|
309 |
+
)
|
310 |
+
|
311 |
+
back_content.change(
|
312 |
+
fn=update_preview,
|
313 |
+
inputs=[back_content],
|
314 |
+
outputs=[back_preview]
|
315 |
+
)
|
316 |
+
|
317 |
+
# Card update handler
|
318 |
+
update_card_button.click(
|
319 |
+
fn=update_card_with_validation,
|
320 |
+
inputs=[
|
321 |
+
flashcard_input,
|
322 |
+
card_selector,
|
323 |
+
card_type,
|
324 |
+
front_content,
|
325 |
+
back_content,
|
326 |
+
tags_input,
|
327 |
+
notes_input
|
328 |
+
],
|
329 |
+
outputs=[
|
330 |
+
flashcard_input,
|
331 |
+
validation_status,
|
332 |
+
card_selector
|
333 |
+
]
|
334 |
+
)
|
335 |
+
|
336 |
+
# Delete card handler
|
337 |
+
delete_card_button.click(
|
338 |
+
fn=delete_card,
|
339 |
+
inputs=[card_selector, flashcard_input],
|
340 |
+
outputs=[flashcard_input, validation_status, card_selector]
|
341 |
+
)
|
342 |
+
|
343 |
+
# Export handler
|
344 |
+
export_button.click(
|
345 |
+
fn=export_cards,
|
346 |
+
inputs=[flashcard_input, export_format],
|
347 |
+
outputs=[export_status, export_file]
|
348 |
+
)
|
349 |
+
|
350 |
+
return (
|
351 |
+
flashcard_input,
|
352 |
+
import_json,
|
353 |
+
import_apkg,
|
354 |
+
validate_button,
|
355 |
+
validation_status,
|
356 |
+
card_selector,
|
357 |
+
card_type,
|
358 |
+
front_content,
|
359 |
+
back_content,
|
360 |
+
front_preview,
|
361 |
+
back_preview,
|
362 |
+
tags_input,
|
363 |
+
notes_input,
|
364 |
+
update_card_button,
|
365 |
+
delete_card_button,
|
366 |
+
export_format,
|
367 |
+
export_button,
|
368 |
+
export_file,
|
369 |
+
export_status,
|
370 |
+
deck_info
|
371 |
+
)
|
372 |
+
|
373 |
+
|
374 |
+
def create_anki_generator_tab():
|
375 |
+
with gr.TabItem("Anki Deck Generator", visible=True):
|
376 |
+
try:
|
377 |
+
default_value = None
|
378 |
+
if default_api_endpoint:
|
379 |
+
if default_api_endpoint in global_api_endpoints:
|
380 |
+
default_value = format_api_name(default_api_endpoint)
|
381 |
+
else:
|
382 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
383 |
+
except Exception as e:
|
384 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
385 |
+
default_value = None
|
386 |
+
custom_css = """
|
387 |
+
.chatbot-container .message-wrap .message {
|
388 |
+
font-size: 14px !important;
|
389 |
+
}
|
390 |
+
"""
|
391 |
+
with gr.TabItem("LLM Chat & Anki Deck Creation", visible=True):
|
392 |
+
gr.Markdown("# Chat with an LLM to help you come up with Questions/Answers for an Anki Deck")
|
393 |
+
chat_history = gr.State([])
|
394 |
+
media_content = gr.State({})
|
395 |
+
selected_parts = gr.State([])
|
396 |
+
conversation_id = gr.State(None)
|
397 |
+
initial_prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
|
398 |
+
|
399 |
+
with gr.Row():
|
400 |
+
with gr.Column(scale=1):
|
401 |
+
search_query_input = gr.Textbox(
|
402 |
+
label="Search Query",
|
403 |
+
placeholder="Enter your search query here..."
|
404 |
+
)
|
405 |
+
search_type_input = gr.Radio(
|
406 |
+
choices=["Title", "Content", "Author", "Keyword"],
|
407 |
+
value="Keyword",
|
408 |
+
label="Search By"
|
409 |
+
)
|
410 |
+
keyword_filter_input = gr.Textbox(
|
411 |
+
label="Filter by Keywords (comma-separated)",
|
412 |
+
placeholder="ml, ai, python, etc..."
|
413 |
+
)
|
414 |
+
search_button = gr.Button("Search")
|
415 |
+
items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
|
416 |
+
item_mapping = gr.State({})
|
417 |
+
with gr.Row():
|
418 |
+
use_content = gr.Checkbox(label="Use Content")
|
419 |
+
use_summary = gr.Checkbox(label="Use Summary")
|
420 |
+
use_prompt = gr.Checkbox(label="Use Prompt")
|
421 |
+
save_conversation = gr.Checkbox(label="Save Conversation", value=False, visible=True)
|
422 |
+
with gr.Row():
|
423 |
+
temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
|
424 |
+
with gr.Row():
|
425 |
+
conversation_search = gr.Textbox(label="Search Conversations")
|
426 |
+
with gr.Row():
|
427 |
+
search_conversations_btn = gr.Button("Search Conversations")
|
428 |
+
with gr.Row():
|
429 |
+
previous_conversations = gr.Dropdown(label="Select Conversation", choices=[], interactive=True)
|
430 |
+
with gr.Row():
|
431 |
+
load_conversations_btn = gr.Button("Load Selected Conversation")
|
432 |
+
|
433 |
+
# Refactored API selection dropdown
|
434 |
+
api_endpoint = gr.Dropdown(
|
435 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
436 |
+
value=default_value,
|
437 |
+
label="API for Chat Interaction (Optional)"
|
438 |
+
)
|
439 |
+
api_key = gr.Textbox(label="API Key (if required)", type="password")
|
440 |
+
custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
|
441 |
+
value=False,
|
442 |
+
visible=True)
|
443 |
+
preset_prompt_checkbox = gr.Checkbox(label="Use a Pre-set Prompt",
|
444 |
+
value=False,
|
445 |
+
visible=True)
|
446 |
+
with gr.Row(visible=False) as preset_prompt_controls:
|
447 |
+
prev_prompt_page = gr.Button("Previous")
|
448 |
+
next_prompt_page = gr.Button("Next")
|
449 |
+
current_prompt_page_text = gr.Text(f"Page {current_page} of {total_pages}")
|
450 |
+
current_prompt_page_state = gr.State(value=1)
|
451 |
+
|
452 |
+
preset_prompt = gr.Dropdown(
|
453 |
+
label="Select Preset Prompt",
|
454 |
+
choices=initial_prompts
|
455 |
+
)
|
456 |
+
user_prompt = gr.Textbox(label="Custom Prompt",
|
457 |
+
placeholder="Enter custom prompt here",
|
458 |
+
lines=3,
|
459 |
+
visible=False)
|
460 |
+
system_prompt_input = gr.Textbox(label="System Prompt",
|
461 |
+
value="You are a helpful AI assitant",
|
462 |
+
lines=3,
|
463 |
+
visible=False)
|
464 |
+
with gr.Column(scale=2):
|
465 |
+
chatbot = gr.Chatbot(height=800, elem_classes="chatbot-container")
|
466 |
+
msg = gr.Textbox(label="Enter your message")
|
467 |
+
submit = gr.Button("Submit")
|
468 |
+
regenerate_button = gr.Button("Regenerate Last Message")
|
469 |
+
token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False)
|
470 |
+
clear_chat_button = gr.Button("Clear Chat")
|
471 |
+
|
472 |
+
chat_media_name = gr.Textbox(label="Custom Chat Name(optional)")
|
473 |
+
save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
|
474 |
+
save_status = gr.Textbox(label="Save Status", interactive=False)
|
475 |
+
save_chat_history_as_file = gr.Button("Save Chat History as File")
|
476 |
+
download_file = gr.File(label="Download Chat History")
|
477 |
+
|
478 |
+
search_button.click(
|
479 |
+
fn=update_dropdown_multiple,
|
480 |
+
inputs=[search_query_input, search_type_input, keyword_filter_input],
|
481 |
+
outputs=[items_output, item_mapping]
|
482 |
+
)
|
483 |
+
|
484 |
+
def update_prompt_visibility(custom_prompt_checked, preset_prompt_checked):
|
485 |
+
user_prompt_visible = custom_prompt_checked
|
486 |
+
system_prompt_visible = custom_prompt_checked
|
487 |
+
preset_prompt_visible = preset_prompt_checked
|
488 |
+
preset_prompt_controls_visible = preset_prompt_checked
|
489 |
+
return (
|
490 |
+
gr.update(visible=user_prompt_visible, interactive=user_prompt_visible),
|
491 |
+
gr.update(visible=system_prompt_visible, interactive=system_prompt_visible),
|
492 |
+
gr.update(visible=preset_prompt_visible, interactive=preset_prompt_visible),
|
493 |
+
gr.update(visible=preset_prompt_controls_visible)
|
494 |
+
)
|
495 |
+
|
496 |
+
def update_prompt_page(direction, current_page_val):
|
497 |
+
new_page = current_page_val + direction
|
498 |
+
if new_page < 1:
|
499 |
+
new_page = 1
|
500 |
+
prompts, total_pages, _ = list_prompts(page=new_page, per_page=20)
|
501 |
+
if new_page > total_pages:
|
502 |
+
new_page = total_pages
|
503 |
+
prompts, total_pages, _ = list_prompts(page=new_page, per_page=20)
|
504 |
+
return (
|
505 |
+
gr.update(choices=prompts),
|
506 |
+
gr.update(value=f"Page {new_page} of {total_pages}"),
|
507 |
+
new_page
|
508 |
+
)
|
509 |
+
|
510 |
+
def clear_chat():
|
511 |
+
return [], None # Return empty list for chatbot and None for conversation_id
|
512 |
+
|
513 |
+
custom_prompt_checkbox.change(
|
514 |
+
update_prompt_visibility,
|
515 |
+
inputs=[custom_prompt_checkbox, preset_prompt_checkbox],
|
516 |
+
outputs=[user_prompt, system_prompt_input, preset_prompt, preset_prompt_controls]
|
517 |
+
)
|
518 |
+
|
519 |
+
preset_prompt_checkbox.change(
|
520 |
+
update_prompt_visibility,
|
521 |
+
inputs=[custom_prompt_checkbox, preset_prompt_checkbox],
|
522 |
+
outputs=[user_prompt, system_prompt_input, preset_prompt, preset_prompt_controls]
|
523 |
+
)
|
524 |
+
|
525 |
+
prev_prompt_page.click(
|
526 |
+
lambda x: update_prompt_page(-1, x),
|
527 |
+
inputs=[current_prompt_page_state],
|
528 |
+
outputs=[preset_prompt, current_prompt_page_text, current_prompt_page_state]
|
529 |
+
)
|
530 |
+
|
531 |
+
next_prompt_page.click(
|
532 |
+
lambda x: update_prompt_page(1, x),
|
533 |
+
inputs=[current_prompt_page_state],
|
534 |
+
outputs=[preset_prompt, current_prompt_page_text, current_prompt_page_state]
|
535 |
+
)
|
536 |
+
|
537 |
+
submit.click(
|
538 |
+
chat_wrapper,
|
539 |
+
inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt,
|
540 |
+
conversation_id,
|
541 |
+
save_conversation, temperature, system_prompt_input],
|
542 |
+
outputs=[msg, chatbot, conversation_id]
|
543 |
+
).then( # Clear the message box after submission
|
544 |
+
lambda x: gr.update(value=""),
|
545 |
+
inputs=[chatbot],
|
546 |
+
outputs=[msg]
|
547 |
+
).then( # Clear the user prompt after the first message
|
548 |
+
lambda: (gr.update(value=""), gr.update(value="")),
|
549 |
+
outputs=[user_prompt, system_prompt_input]
|
550 |
+
).then(
|
551 |
+
lambda history: approximate_token_count(history),
|
552 |
+
inputs=[chatbot],
|
553 |
+
outputs=[token_count_display]
|
554 |
+
)
|
555 |
+
|
556 |
+
|
557 |
+
clear_chat_button.click(
|
558 |
+
clear_chat,
|
559 |
+
outputs=[chatbot, conversation_id]
|
560 |
+
)
|
561 |
+
|
562 |
+
items_output.change(
|
563 |
+
update_chat_content,
|
564 |
+
inputs=[items_output, use_content, use_summary, use_prompt, item_mapping],
|
565 |
+
outputs=[media_content, selected_parts]
|
566 |
+
)
|
567 |
+
|
568 |
+
use_content.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
|
569 |
+
outputs=[selected_parts])
|
570 |
+
use_summary.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
|
571 |
+
outputs=[selected_parts])
|
572 |
+
use_prompt.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
|
573 |
+
outputs=[selected_parts])
|
574 |
+
items_output.change(debug_output, inputs=[media_content, selected_parts], outputs=[])
|
575 |
+
|
576 |
+
search_conversations_btn.click(
|
577 |
+
search_conversations,
|
578 |
+
inputs=[conversation_search],
|
579 |
+
outputs=[previous_conversations]
|
580 |
+
)
|
581 |
+
|
582 |
+
load_conversations_btn.click(
|
583 |
+
clear_chat,
|
584 |
+
outputs=[chatbot, chat_history]
|
585 |
+
).then(
|
586 |
+
load_conversation,
|
587 |
+
inputs=[previous_conversations],
|
588 |
+
outputs=[chatbot, conversation_id]
|
589 |
+
)
|
590 |
+
|
591 |
+
previous_conversations.change(
|
592 |
+
load_conversation,
|
593 |
+
inputs=[previous_conversations],
|
594 |
+
outputs=[chat_history]
|
595 |
+
)
|
596 |
+
|
597 |
+
save_chat_history_as_file.click(
|
598 |
+
save_chat_history,
|
599 |
+
inputs=[chatbot, conversation_id],
|
600 |
+
outputs=[download_file]
|
601 |
+
)
|
602 |
+
|
603 |
+
save_chat_history_to_db.click(
|
604 |
+
save_chat_history_to_db_wrapper,
|
605 |
+
inputs=[chatbot, conversation_id, media_content, chat_media_name],
|
606 |
+
outputs=[conversation_id, gr.Textbox(label="Save Status")]
|
607 |
+
)
|
608 |
+
|
609 |
+
regenerate_button.click(
|
610 |
+
regenerate_last_message,
|
611 |
+
inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temperature,
|
612 |
+
system_prompt_input],
|
613 |
+
outputs=[chatbot, save_status]
|
614 |
+
).then(
|
615 |
+
lambda history: approximate_token_count(history),
|
616 |
+
inputs=[chatbot],
|
617 |
+
outputs=[token_count_display]
|
618 |
+
)
|
619 |
+
gr.Markdown("# Create Anki Deck")
|
620 |
+
|
621 |
+
with gr.Row():
|
622 |
+
# Left Column: Deck Settings
|
623 |
+
with gr.Column(scale=1):
|
624 |
+
gr.Markdown("## Deck Settings")
|
625 |
+
deck_name = gr.Textbox(
|
626 |
+
label="Deck Name",
|
627 |
+
placeholder="My Study Deck",
|
628 |
+
value="My Study Deck"
|
629 |
+
)
|
630 |
+
|
631 |
+
deck_description = gr.Textbox(
|
632 |
+
label="Deck Description",
|
633 |
+
placeholder="Description of your deck",
|
634 |
+
lines=2
|
635 |
+
)
|
636 |
+
|
637 |
+
note_type = gr.Radio(
|
638 |
+
choices=["Basic", "Basic (and reversed)", "Cloze"],
|
639 |
+
label="Note Type",
|
640 |
+
value="Basic"
|
641 |
+
)
|
642 |
+
|
643 |
+
# Card Fields based on note type
|
644 |
+
with gr.Group() as basic_fields:
|
645 |
+
front_template = gr.Textbox(
|
646 |
+
label="Front Template (HTML)",
|
647 |
+
value="{{Front}}",
|
648 |
+
lines=3
|
649 |
+
)
|
650 |
+
back_template = gr.Textbox(
|
651 |
+
label="Back Template (HTML)",
|
652 |
+
value="{{FrontSide}}<hr id='answer'>{{Back}}",
|
653 |
+
lines=3
|
654 |
+
)
|
655 |
+
|
656 |
+
with gr.Group() as cloze_fields:
|
657 |
+
cloze_template = gr.Textbox(
|
658 |
+
label="Cloze Template (HTML)",
|
659 |
+
value="{{cloze:Text}}",
|
660 |
+
lines=3,
|
661 |
+
visible=False
|
662 |
+
)
|
663 |
+
|
664 |
+
css_styling = gr.Textbox(
|
665 |
+
label="Card Styling (CSS)",
|
666 |
+
value=".card {\n font-family: arial;\n font-size: 20px;\n text-align: center;\n color: black;\n background-color: white;\n}\n\n.cloze {\n font-weight: bold;\n color: blue;\n}",
|
667 |
+
lines=5
|
668 |
+
)
|
669 |
+
|
670 |
+
# Right Column: Card Creation
|
671 |
+
with gr.Column(scale=1):
|
672 |
+
gr.Markdown("## Add Cards")
|
673 |
+
|
674 |
+
with gr.Group() as basic_input:
|
675 |
+
front_content = gr.TextArea(
|
676 |
+
label="Front Content",
|
677 |
+
placeholder="Question or prompt",
|
678 |
+
lines=3
|
679 |
+
)
|
680 |
+
back_content = gr.TextArea(
|
681 |
+
label="Back Content",
|
682 |
+
placeholder="Answer",
|
683 |
+
lines=3
|
684 |
+
)
|
685 |
+
|
686 |
+
with gr.Group() as cloze_input:
|
687 |
+
cloze_content = gr.TextArea(
|
688 |
+
label="Cloze Content",
|
689 |
+
placeholder="Text with {{c1::cloze}} deletions",
|
690 |
+
lines=3,
|
691 |
+
visible=False
|
692 |
+
)
|
693 |
+
|
694 |
+
tags_input = gr.TextArea(
|
695 |
+
label="Tags (comma-separated)",
|
696 |
+
placeholder="tag1, tag2, tag3",
|
697 |
+
lines=1
|
698 |
+
)
|
699 |
+
|
700 |
+
add_card_btn = gr.Button("Add Card")
|
701 |
+
|
702 |
+
cards_list = gr.JSON(
|
703 |
+
label="Cards in Deck",
|
704 |
+
value={"cards": []}
|
705 |
+
)
|
706 |
+
|
707 |
+
clear_cards_btn = gr.Button("Clear All Cards", variant="stop")
|
708 |
+
|
709 |
+
with gr.Row():
|
710 |
+
generate_deck_btn = gr.Button("Generate Deck", variant="primary")
|
711 |
+
download_deck = gr.File(label="Download Deck")
|
712 |
+
generation_status = gr.Markdown("")
|
713 |
+
|
714 |
+
def update_note_type_fields(note_type: str):
|
715 |
+
if note_type == "Cloze":
|
716 |
+
return {
|
717 |
+
basic_input: gr.update(visible=False),
|
718 |
+
cloze_input: gr.update(visible=True),
|
719 |
+
basic_fields: gr.update(visible=False),
|
720 |
+
cloze_fields: gr.update(visible=True)
|
721 |
+
}
|
722 |
+
else:
|
723 |
+
return {
|
724 |
+
basic_input: gr.update(visible=True),
|
725 |
+
cloze_input: gr.update(visible=False),
|
726 |
+
basic_fields: gr.update(visible=True),
|
727 |
+
cloze_fields: gr.update(visible=False)
|
728 |
+
}
|
729 |
+
|
730 |
+
def add_card(note_type: str, front: str, back: str, cloze: str, tags: str, current_cards: Dict[str, List]):
|
731 |
+
if not current_cards:
|
732 |
+
current_cards = {"cards": []}
|
733 |
+
|
734 |
+
cards_data = current_cards["cards"]
|
735 |
+
|
736 |
+
# Process tags
|
737 |
+
card_tags = [tag.strip() for tag in tags.split(',') if tag.strip()]
|
738 |
+
|
739 |
+
new_card = {
|
740 |
+
"id": f"CARD_{len(cards_data) + 1}",
|
741 |
+
"tags": card_tags
|
742 |
+
}
|
743 |
+
|
744 |
+
if note_type == "Cloze":
|
745 |
+
if not cloze or "{{c" not in cloze:
|
746 |
+
return current_cards, "❌ Invalid cloze format. Use {{c1::text}} syntax."
|
747 |
+
new_card.update({
|
748 |
+
"type": "cloze",
|
749 |
+
"content": cloze
|
750 |
+
})
|
751 |
+
else:
|
752 |
+
if not front or not back:
|
753 |
+
return current_cards, "❌ Both front and back content are required."
|
754 |
+
new_card.update({
|
755 |
+
"type": "basic",
|
756 |
+
"front": front,
|
757 |
+
"back": back,
|
758 |
+
"is_reverse": note_type == "Basic (and reversed)"
|
759 |
+
})
|
760 |
+
|
761 |
+
cards_data.append(new_card)
|
762 |
+
return {"cards": cards_data}, "✅ Card added successfully!"
|
763 |
+
|
764 |
+
def clear_cards() -> Tuple[Dict[str, List], str]:
|
765 |
+
return {"cards": []}, "✅ All cards cleared!"
|
766 |
+
|
767 |
+
def generate_anki_deck(
|
768 |
+
deck_name: str,
|
769 |
+
deck_description: str,
|
770 |
+
note_type: str,
|
771 |
+
front_template: str,
|
772 |
+
back_template: str,
|
773 |
+
cloze_template: str,
|
774 |
+
css: str,
|
775 |
+
cards_data: Dict[str, List]
|
776 |
+
) -> Tuple[Optional[str], str]:
|
777 |
+
try:
|
778 |
+
if not cards_data or not cards_data.get("cards"):
|
779 |
+
return None, "❌ No cards to generate deck from!"
|
780 |
+
|
781 |
+
# Create model based on note type
|
782 |
+
if note_type == "Cloze":
|
783 |
+
model = genanki.Model(
|
784 |
+
1483883320, # Random model ID
|
785 |
+
'Cloze Model',
|
786 |
+
fields=[
|
787 |
+
{'name': 'Text'},
|
788 |
+
{'name': 'Back Extra'}
|
789 |
+
],
|
790 |
+
templates=[{
|
791 |
+
'name': 'Cloze Card',
|
792 |
+
'qfmt': cloze_template,
|
793 |
+
'afmt': cloze_template + '<br><hr id="extra">{{Back Extra}}'
|
794 |
+
}],
|
795 |
+
css=css,
|
796 |
+
# FIXME CLOZE DOESNT EXIST
|
797 |
+
model_type=1
|
798 |
+
)
|
799 |
+
else:
|
800 |
+
templates = [{
|
801 |
+
'name': 'Card 1',
|
802 |
+
'qfmt': front_template,
|
803 |
+
'afmt': back_template
|
804 |
+
}]
|
805 |
+
|
806 |
+
if note_type == "Basic (and reversed)":
|
807 |
+
templates.append({
|
808 |
+
'name': 'Card 2',
|
809 |
+
'qfmt': '{{Back}}',
|
810 |
+
'afmt': '{{FrontSide}}<hr id="answer">{{Front}}'
|
811 |
+
})
|
812 |
+
|
813 |
+
model = genanki.Model(
|
814 |
+
1607392319, # Random model ID
|
815 |
+
'Basic Model',
|
816 |
+
fields=[
|
817 |
+
{'name': 'Front'},
|
818 |
+
{'name': 'Back'}
|
819 |
+
],
|
820 |
+
templates=templates,
|
821 |
+
css=css
|
822 |
+
)
|
823 |
+
|
824 |
+
# Create deck
|
825 |
+
deck = genanki.Deck(
|
826 |
+
2059400110, # Random deck ID
|
827 |
+
deck_name,
|
828 |
+
description=deck_description
|
829 |
+
)
|
830 |
+
|
831 |
+
# Add cards to deck
|
832 |
+
for card in cards_data["cards"]:
|
833 |
+
if card["type"] == "cloze":
|
834 |
+
note = genanki.Note(
|
835 |
+
model=model,
|
836 |
+
fields=[card["content"], ""],
|
837 |
+
tags=card["tags"]
|
838 |
+
)
|
839 |
+
else:
|
840 |
+
note = genanki.Note(
|
841 |
+
model=model,
|
842 |
+
fields=[card["front"], card["back"]],
|
843 |
+
tags=card["tags"]
|
844 |
+
)
|
845 |
+
deck.add_note(note)
|
846 |
+
|
847 |
+
# Save deck to temporary file
|
848 |
+
temp_dir = tempfile.mkdtemp()
|
849 |
+
deck_path = os.path.join(temp_dir, f"{deck_name}.apkg")
|
850 |
+
genanki.Package(deck).write_to_file(deck_path)
|
851 |
+
|
852 |
+
return deck_path, "✅ Deck generated successfully!"
|
853 |
+
|
854 |
+
except Exception as e:
|
855 |
+
return None, f"❌ Error generating deck: {str(e)}"
|
856 |
+
|
857 |
+
# Register event handlers
|
858 |
+
note_type.change(
|
859 |
+
fn=update_note_type_fields,
|
860 |
+
inputs=[note_type],
|
861 |
+
outputs=[basic_input, cloze_input, basic_fields, cloze_fields]
|
862 |
+
)
|
863 |
+
|
864 |
+
add_card_btn.click(
|
865 |
+
fn=add_card,
|
866 |
+
inputs=[
|
867 |
+
note_type,
|
868 |
+
front_content,
|
869 |
+
back_content,
|
870 |
+
cloze_content,
|
871 |
+
tags_input,
|
872 |
+
cards_list
|
873 |
+
],
|
874 |
+
outputs=[cards_list, generation_status]
|
875 |
+
)
|
876 |
+
|
877 |
+
clear_cards_btn.click(
|
878 |
+
fn=clear_cards,
|
879 |
+
inputs=[],
|
880 |
+
outputs=[cards_list, generation_status]
|
881 |
+
)
|
882 |
+
|
883 |
+
generate_deck_btn.click(
|
884 |
+
fn=generate_anki_deck,
|
885 |
+
inputs=[
|
886 |
+
deck_name,
|
887 |
+
deck_description,
|
888 |
+
note_type,
|
889 |
+
front_template,
|
890 |
+
back_template,
|
891 |
+
cloze_template,
|
892 |
+
css_styling,
|
893 |
+
cards_list
|
894 |
+
],
|
895 |
+
outputs=[download_deck, generation_status]
|
896 |
+
)
|
897 |
+
|
898 |
+
|
899 |
+
return (
|
900 |
+
deck_name,
|
901 |
+
deck_description,
|
902 |
+
note_type,
|
903 |
+
front_template,
|
904 |
+
back_template,
|
905 |
+
cloze_template,
|
906 |
+
css_styling,
|
907 |
+
front_content,
|
908 |
+
back_content,
|
909 |
+
cloze_content,
|
910 |
+
tags_input,
|
911 |
+
cards_list,
|
912 |
+
add_card_btn,
|
913 |
+
clear_cards_btn,
|
914 |
+
generate_deck_btn,
|
915 |
+
download_deck,
|
916 |
+
generation_status
|
917 |
+
)
|
918 |
+
|
919 |
+
#
|
920 |
+
# End of Anki_Validation_tab.py
|
921 |
+
############################################################################################################
|
App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py
CHANGED
@@ -2,16 +2,18 @@
|
|
2 |
# Description: Gradio UI for ingesting audio files into the database
|
3 |
#
|
4 |
# Imports
|
|
|
5 |
#
|
6 |
# External Imports
|
7 |
import gradio as gr
|
8 |
#
|
9 |
# Local Imports
|
10 |
from App_Function_Libraries.Audio.Audio_Files import process_audio_files
|
11 |
-
from App_Function_Libraries.DB.DB_Manager import
|
12 |
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
13 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models
|
14 |
-
from App_Function_Libraries.Utils.Utils import cleanup_temp_files
|
|
|
15 |
# Import metrics logging
|
16 |
from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
|
17 |
from App_Function_Libraries.Metrics.logger_config import logger
|
@@ -22,6 +24,18 @@ from App_Function_Libraries.Metrics.logger_config import logger
|
|
22 |
def create_audio_processing_tab():
|
23 |
with gr.TabItem("Audio File Transcription + Summarization", visible=True):
|
24 |
gr.Markdown("# Transcribe & Summarize Audio Files from URLs or Local Files!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
with gr.Row():
|
26 |
with gr.Column():
|
27 |
audio_url_input = gr.Textbox(label="Audio File URL(s)", placeholder="Enter the URL(s) of the audio file(s), one per line")
|
@@ -46,54 +60,133 @@ def create_audio_processing_tab():
|
|
46 |
keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
|
47 |
|
48 |
with gr.Row():
|
49 |
-
custom_prompt_checkbox = gr.Checkbox(
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
with gr.Row():
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
59 |
with gr.Row():
|
60 |
-
custom_prompt_input = gr.Textbox(
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
64 |
with gr.Row():
|
65 |
-
system_prompt_input = gr.Textbox(
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
-
|
72 |
-
-
|
73 |
-
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
- **
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
85 |
|
86 |
custom_prompt_checkbox.change(
|
87 |
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
88 |
inputs=[custom_prompt_checkbox],
|
89 |
outputs=[custom_prompt_input, system_prompt_input]
|
90 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
preset_prompt_checkbox.change(
|
92 |
-
fn=
|
93 |
inputs=[preset_prompt_checkbox],
|
94 |
-
outputs=[preset_prompt]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
)
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
def update_prompts(preset_name):
|
98 |
prompts = update_user_prompt(preset_name)
|
99 |
return (
|
@@ -103,15 +196,14 @@ def create_audio_processing_tab():
|
|
103 |
|
104 |
preset_prompt.change(
|
105 |
update_prompts,
|
106 |
-
inputs=preset_prompt,
|
107 |
outputs=[custom_prompt_input, system_prompt_input]
|
108 |
)
|
109 |
-
|
110 |
api_name_input = gr.Dropdown(
|
111 |
-
choices=[None
|
112 |
-
|
113 |
-
|
114 |
-
label="API for Summarization (Optional)"
|
115 |
)
|
116 |
api_key_input = gr.Textbox(label="API Key (if required)", placeholder="Enter your API key here", type="password")
|
117 |
custom_keywords_input = gr.Textbox(label="Custom Keywords", placeholder="Enter custom keywords, comma-separated")
|
|
|
2 |
# Description: Gradio UI for ingesting audio files into the database
|
3 |
#
|
4 |
# Imports
|
5 |
+
import logging
|
6 |
#
|
7 |
# External Imports
|
8 |
import gradio as gr
|
9 |
#
|
10 |
# Local Imports
|
11 |
from App_Function_Libraries.Audio.Audio_Files import process_audio_files
|
12 |
+
from App_Function_Libraries.DB.DB_Manager import list_prompts
|
13 |
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
14 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models
|
15 |
+
from App_Function_Libraries.Utils.Utils import cleanup_temp_files, default_api_endpoint, global_api_endpoints, \
|
16 |
+
format_api_name
|
17 |
# Import metrics logging
|
18 |
from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
|
19 |
from App_Function_Libraries.Metrics.logger_config import logger
|
|
|
24 |
def create_audio_processing_tab():
|
25 |
with gr.TabItem("Audio File Transcription + Summarization", visible=True):
|
26 |
gr.Markdown("# Transcribe & Summarize Audio Files from URLs or Local Files!")
|
27 |
+
# Get and validate default value
|
28 |
+
try:
|
29 |
+
default_value = None
|
30 |
+
if default_api_endpoint:
|
31 |
+
if default_api_endpoint in global_api_endpoints:
|
32 |
+
default_value = format_api_name(default_api_endpoint)
|
33 |
+
else:
|
34 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
35 |
+
except Exception as e:
|
36 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
37 |
+
default_value = None
|
38 |
+
|
39 |
with gr.Row():
|
40 |
with gr.Column():
|
41 |
audio_url_input = gr.Textbox(label="Audio File URL(s)", placeholder="Enter the URL(s) of the audio file(s), one per line")
|
|
|
60 |
keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
|
61 |
|
62 |
with gr.Row():
|
63 |
+
custom_prompt_checkbox = gr.Checkbox(
|
64 |
+
label="Use a Custom Prompt",
|
65 |
+
value=False,
|
66 |
+
visible=True
|
67 |
+
)
|
68 |
+
preset_prompt_checkbox = gr.Checkbox(
|
69 |
+
label="Use a pre-set Prompt",
|
70 |
+
value=False,
|
71 |
+
visible=True
|
72 |
+
)
|
73 |
+
|
74 |
+
# Initialize state variables for pagination
|
75 |
+
current_page_state = gr.State(value=1)
|
76 |
+
total_pages_state = gr.State(value=1)
|
77 |
+
|
78 |
+
with gr.Row():
|
79 |
+
# Add pagination controls
|
80 |
+
preset_prompt = gr.Dropdown(
|
81 |
+
label="Select Preset Prompt",
|
82 |
+
choices=[],
|
83 |
+
visible=False
|
84 |
+
)
|
85 |
with gr.Row():
|
86 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
87 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
88 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
89 |
+
|
90 |
with gr.Row():
|
91 |
+
custom_prompt_input = gr.Textbox(
|
92 |
+
label="Custom Prompt",
|
93 |
+
placeholder="Enter custom prompt here",
|
94 |
+
lines=3,
|
95 |
+
visible=False
|
96 |
+
)
|
97 |
with gr.Row():
|
98 |
+
system_prompt_input = gr.Textbox(
|
99 |
+
label="System Prompt",
|
100 |
+
value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhere to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
101 |
+
**Bulleted Note Creation Guidelines**
|
102 |
+
|
103 |
+
**Headings**:
|
104 |
+
- Based on referenced topics, not categories like quotes or terms
|
105 |
+
- Surrounded by **bold** formatting
|
106 |
+
- Not listed as bullet points
|
107 |
+
- No space between headings and list items underneath
|
108 |
+
|
109 |
+
**Emphasis**:
|
110 |
+
- **Important terms** set in bold font
|
111 |
+
- **Text ending in a colon**: also bolded
|
112 |
+
|
113 |
+
**Review**:
|
114 |
+
- Ensure adherence to specified format
|
115 |
+
- Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
|
116 |
+
""",
|
117 |
+
lines=3,
|
118 |
+
visible=False
|
119 |
+
)
|
120 |
|
121 |
custom_prompt_checkbox.change(
|
122 |
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
123 |
inputs=[custom_prompt_checkbox],
|
124 |
outputs=[custom_prompt_input, system_prompt_input]
|
125 |
)
|
126 |
+
|
127 |
+
# Handle preset prompt checkbox change
|
128 |
+
def on_preset_prompt_checkbox_change(is_checked):
|
129 |
+
if is_checked:
|
130 |
+
prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
|
131 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
132 |
+
return (
|
133 |
+
gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
|
134 |
+
gr.update(visible=True), # prev_page_button
|
135 |
+
gr.update(visible=True), # next_page_button
|
136 |
+
gr.update(value=page_display_text, visible=True), # page_display
|
137 |
+
current_page, # current_page_state
|
138 |
+
total_pages # total_pages_state
|
139 |
+
)
|
140 |
+
else:
|
141 |
+
return (
|
142 |
+
gr.update(visible=False, interactive=False), # preset_prompt
|
143 |
+
gr.update(visible=False), # prev_page_button
|
144 |
+
gr.update(visible=False), # next_page_button
|
145 |
+
gr.update(visible=False), # page_display
|
146 |
+
1, # current_page_state
|
147 |
+
1 # total_pages_state
|
148 |
+
)
|
149 |
+
|
150 |
preset_prompt_checkbox.change(
|
151 |
+
fn=on_preset_prompt_checkbox_change,
|
152 |
inputs=[preset_prompt_checkbox],
|
153 |
+
outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
|
154 |
+
)
|
155 |
+
|
156 |
+
# Pagination button functions
|
157 |
+
def on_prev_page_click(current_page, total_pages):
|
158 |
+
new_page = max(current_page - 1, 1)
|
159 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
|
160 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
161 |
+
return (
|
162 |
+
gr.update(choices=prompts),
|
163 |
+
gr.update(value=page_display_text),
|
164 |
+
current_page
|
165 |
+
)
|
166 |
+
|
167 |
+
prev_page_button.click(
|
168 |
+
fn=on_prev_page_click,
|
169 |
+
inputs=[current_page_state, total_pages_state],
|
170 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
171 |
)
|
172 |
|
173 |
+
def on_next_page_click(current_page, total_pages):
|
174 |
+
new_page = min(current_page + 1, total_pages)
|
175 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
|
176 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
177 |
+
return (
|
178 |
+
gr.update(choices=prompts),
|
179 |
+
gr.update(value=page_display_text),
|
180 |
+
current_page
|
181 |
+
)
|
182 |
+
|
183 |
+
next_page_button.click(
|
184 |
+
fn=on_next_page_click,
|
185 |
+
inputs=[current_page_state, total_pages_state],
|
186 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
187 |
+
)
|
188 |
+
|
189 |
+
# Update prompts when a preset is selected
|
190 |
def update_prompts(preset_name):
|
191 |
prompts = update_user_prompt(preset_name)
|
192 |
return (
|
|
|
196 |
|
197 |
preset_prompt.change(
|
198 |
update_prompts,
|
199 |
+
inputs=[preset_prompt],
|
200 |
outputs=[custom_prompt_input, system_prompt_input]
|
201 |
)
|
202 |
+
# Refactored API selection dropdown
|
203 |
api_name_input = gr.Dropdown(
|
204 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
205 |
+
value=default_value,
|
206 |
+
label="API for Summarization/Analysis (Optional)"
|
|
|
207 |
)
|
208 |
api_key_input = gr.Textbox(label="API Key (if required)", placeholder="Enter your API key here", type="password")
|
209 |
custom_keywords_input = gr.Textbox(label="Custom Keywords", placeholder="Enter custom keywords, comma-separated")
|
App_Function_Libraries/Gradio_UI/Backup_Functionality.py
CHANGED
@@ -14,7 +14,7 @@ from App_Function_Libraries.DB.DB_Manager import create_automated_backup, db_pat
|
|
14 |
#
|
15 |
# Functions:
|
16 |
|
17 |
-
def
|
18 |
backup_file = create_automated_backup(db_path, backup_dir)
|
19 |
return f"Backup created: {backup_file}"
|
20 |
|
@@ -42,18 +42,7 @@ def create_backup_tab():
|
|
42 |
create_button = gr.Button("Create Backup")
|
43 |
create_output = gr.Textbox(label="Result")
|
44 |
with gr.Column():
|
45 |
-
create_button.click(
|
46 |
-
|
47 |
-
|
48 |
-
def create_view_backups_tab():
|
49 |
-
with gr.TabItem("View Backups", visible=True):
|
50 |
-
gr.Markdown("# Browse available backups")
|
51 |
-
with gr.Row():
|
52 |
-
with gr.Column():
|
53 |
-
view_button = gr.Button("View Backups")
|
54 |
-
with gr.Column():
|
55 |
-
backup_list = gr.Textbox(label="Available Backups")
|
56 |
-
view_button.click(list_backups, inputs=[], outputs=backup_list)
|
57 |
|
58 |
|
59 |
def create_restore_backup_tab():
|
|
|
14 |
#
|
15 |
# Functions:
|
16 |
|
17 |
+
def create_db_backup():
|
18 |
backup_file = create_automated_backup(db_path, backup_dir)
|
19 |
return f"Backup created: {backup_file}"
|
20 |
|
|
|
42 |
create_button = gr.Button("Create Backup")
|
43 |
create_output = gr.Textbox(label="Result")
|
44 |
with gr.Column():
|
45 |
+
create_button.click(create_db_backup, inputs=[], outputs=create_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
def create_restore_backup_tab():
|
App_Function_Libraries/Gradio_UI/Backup_RAG_Notes_Character_Chat_tab.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Backup_Functionality.py
|
2 |
+
# Functionality for managing database backups
|
3 |
+
#
|
4 |
+
# Imports:
|
5 |
+
import os
|
6 |
+
import shutil
|
7 |
+
import gradio as gr
|
8 |
+
from typing import Dict, List
|
9 |
+
#
|
10 |
+
# Local Imports:
|
11 |
+
from App_Function_Libraries.DB.DB_Manager import create_automated_backup
|
12 |
+
from App_Function_Libraries.DB.DB_Backups import create_backup, create_incremental_backup, restore_single_db_backup
|
13 |
+
|
14 |
+
|
15 |
+
#
|
16 |
+
# End of Imports
|
17 |
+
#######################################################################################################################
|
18 |
+
#
|
19 |
+
# Functions:
|
20 |
+
|
21 |
+
def get_db_specific_backups(backup_dir: str, db_name: str) -> List[str]:
|
22 |
+
"""Get list of backups specific to a database."""
|
23 |
+
all_backups = [f for f in os.listdir(backup_dir) if f.endswith(('.db', '.sqlib'))]
|
24 |
+
db_specific_backups = [
|
25 |
+
backup for backup in all_backups
|
26 |
+
if backup.startswith(f"{db_name}_")
|
27 |
+
]
|
28 |
+
return sorted(db_specific_backups, reverse=True) # Most recent first
|
29 |
+
|
30 |
+
def create_backup_tab(db_path: str, backup_dir: str, db_name: str):
|
31 |
+
"""Create the backup creation tab for a database."""
|
32 |
+
gr.Markdown("## Create Database Backup")
|
33 |
+
gr.Markdown(f"This will create a backup in the directory: `{backup_dir}`")
|
34 |
+
with gr.Row():
|
35 |
+
with gr.Column():
|
36 |
+
#automated_backup_btn = gr.Button("Create Simple Backup")
|
37 |
+
full_backup_btn = gr.Button("Create Full Backup")
|
38 |
+
incr_backup_btn = gr.Button("Create Incremental Backup")
|
39 |
+
with gr.Column():
|
40 |
+
backup_output = gr.Textbox(label="Result")
|
41 |
+
|
42 |
+
def create_db_backup():
|
43 |
+
backup_file = create_automated_backup(db_path, backup_dir)
|
44 |
+
return f"Backup created: {backup_file}"
|
45 |
+
|
46 |
+
# automated_backup_btn.click(
|
47 |
+
# fn=create_db_backup,
|
48 |
+
# inputs=[],
|
49 |
+
# outputs=[backup_output]
|
50 |
+
# )
|
51 |
+
full_backup_btn.click(
|
52 |
+
fn=lambda: create_backup(db_path, backup_dir, db_name),
|
53 |
+
inputs=[],
|
54 |
+
outputs=[backup_output]
|
55 |
+
)
|
56 |
+
incr_backup_btn.click(
|
57 |
+
fn=lambda: create_incremental_backup(db_path, backup_dir, db_name),
|
58 |
+
inputs=[],
|
59 |
+
outputs=[backup_output]
|
60 |
+
)
|
61 |
+
|
62 |
+
def create_view_backups_tab(backup_dir: str, db_name: str):
|
63 |
+
"""Create the backup viewing tab for a database."""
|
64 |
+
gr.Markdown("## Available Backups")
|
65 |
+
with gr.Row():
|
66 |
+
with gr.Column():
|
67 |
+
view_btn = gr.Button("Refresh Backup List")
|
68 |
+
with gr.Column():
|
69 |
+
backup_list = gr.Textbox(label="Available Backups")
|
70 |
+
|
71 |
+
def list_db_backups():
|
72 |
+
"""List backups specific to this database."""
|
73 |
+
backups = get_db_specific_backups(backup_dir, db_name)
|
74 |
+
return "\n".join(backups) if backups else f"No backups found for {db_name} database"
|
75 |
+
|
76 |
+
view_btn.click(
|
77 |
+
fn=list_db_backups,
|
78 |
+
inputs=[],
|
79 |
+
outputs=[backup_list]
|
80 |
+
)
|
81 |
+
|
82 |
+
def validate_backup_name(backup_name: str, db_name: str) -> bool:
|
83 |
+
"""Validate that the backup name matches the database being restored."""
|
84 |
+
# Check if backup name starts with the database name prefix and has valid extension
|
85 |
+
valid_prefixes = [
|
86 |
+
f"{db_name}_backup_", # Full backup prefix
|
87 |
+
f"{db_name}_incremental_" # Incremental backup prefix
|
88 |
+
]
|
89 |
+
has_valid_prefix = any(backup_name.startswith(prefix) for prefix in valid_prefixes)
|
90 |
+
has_valid_extension = backup_name.endswith(('.db', '.sqlib'))
|
91 |
+
return has_valid_prefix and has_valid_extension
|
92 |
+
|
93 |
+
def create_restore_backup_tab(db_path: str, backup_dir: str, db_name: str):
|
94 |
+
"""Create the backup restoration tab for a database."""
|
95 |
+
gr.Markdown("## Restore Database")
|
96 |
+
gr.Markdown("⚠️ **Warning**: Restoring a backup will overwrite the current database.")
|
97 |
+
with gr.Row():
|
98 |
+
with gr.Column():
|
99 |
+
backup_input = gr.Textbox(label="Backup Filename")
|
100 |
+
restore_btn = gr.Button("Restore", variant="primary")
|
101 |
+
with gr.Column():
|
102 |
+
restore_output = gr.Textbox(label="Result")
|
103 |
+
|
104 |
+
def secure_restore(backup_name: str) -> str:
|
105 |
+
"""Restore backup with validation checks."""
|
106 |
+
if not backup_name:
|
107 |
+
return "Please enter a backup filename"
|
108 |
+
|
109 |
+
# Validate backup name format
|
110 |
+
if not validate_backup_name(backup_name, db_name):
|
111 |
+
return f"Invalid backup file. Please select a backup file that starts with '{db_name}_backup_' or '{db_name}_incremental_'"
|
112 |
+
|
113 |
+
# Check if backup exists
|
114 |
+
backup_path = os.path.join(backup_dir, backup_name)
|
115 |
+
if not os.path.exists(backup_path):
|
116 |
+
return f"Backup file not found: {backup_name}"
|
117 |
+
|
118 |
+
# Proceed with restore
|
119 |
+
return restore_single_db_backup(db_path, backup_dir, db_name, backup_name)
|
120 |
+
|
121 |
+
restore_btn.click(
|
122 |
+
fn=secure_restore,
|
123 |
+
inputs=[backup_input],
|
124 |
+
outputs=[restore_output]
|
125 |
+
)
|
126 |
+
|
127 |
+
def create_media_db_tabs(db_config: Dict[str, str]):
|
128 |
+
"""Create all tabs for the Media database."""
|
129 |
+
create_backup_tab(
|
130 |
+
db_path=db_config['db_path'],
|
131 |
+
backup_dir=db_config['backup_dir'],
|
132 |
+
db_name='media'
|
133 |
+
)
|
134 |
+
create_view_backups_tab(
|
135 |
+
backup_dir=db_config['backup_dir'],
|
136 |
+
db_name='media'
|
137 |
+
)
|
138 |
+
create_restore_backup_tab(
|
139 |
+
db_path=db_config['db_path'],
|
140 |
+
backup_dir=db_config['backup_dir'],
|
141 |
+
db_name='media'
|
142 |
+
)
|
143 |
+
|
144 |
+
def create_rag_chat_tabs(db_config: Dict[str, str]):
|
145 |
+
"""Create all tabs for the RAG Chat database."""
|
146 |
+
create_backup_tab(
|
147 |
+
db_path=db_config['db_path'],
|
148 |
+
backup_dir=db_config['backup_dir'],
|
149 |
+
db_name='rag_qa' # Updated to match DB_Manager.py
|
150 |
+
)
|
151 |
+
create_view_backups_tab(
|
152 |
+
backup_dir=db_config['backup_dir'],
|
153 |
+
db_name='rag_qa' # Updated to match DB_Manager.py
|
154 |
+
)
|
155 |
+
create_restore_backup_tab(
|
156 |
+
db_path=db_config['db_path'],
|
157 |
+
backup_dir=db_config['backup_dir'],
|
158 |
+
db_name='rag_qa' # Updated to match DB_Manager.py
|
159 |
+
)
|
160 |
+
|
161 |
+
def create_character_chat_tabs(db_config: Dict[str, str]):
|
162 |
+
"""Create all tabs for the Character Chat database."""
|
163 |
+
create_backup_tab(
|
164 |
+
db_path=db_config['db_path'],
|
165 |
+
backup_dir=db_config['backup_dir'],
|
166 |
+
db_name='chatDB' # Updated to match DB_Manager.py
|
167 |
+
)
|
168 |
+
create_view_backups_tab(
|
169 |
+
backup_dir=db_config['backup_dir'],
|
170 |
+
db_name='chatDB' # Updated to match DB_Manager.py
|
171 |
+
)
|
172 |
+
create_restore_backup_tab(
|
173 |
+
db_path=db_config['db_path'],
|
174 |
+
backup_dir=db_config['backup_dir'],
|
175 |
+
db_name='chatDB'
|
176 |
+
)
|
177 |
+
|
178 |
+
def create_database_management_interface(
|
179 |
+
media_db_config: Dict[str, str],
|
180 |
+
rag_db_config: Dict[str, str],
|
181 |
+
char_db_config: Dict[str, str]
|
182 |
+
):
|
183 |
+
"""Create the main database management interface with tabs for each database."""
|
184 |
+
with gr.TabItem("Media Database", id="media_db_group", visible=True):
|
185 |
+
create_media_db_tabs(media_db_config)
|
186 |
+
|
187 |
+
with gr.TabItem("RAG Chat Database", id="rag_chat_group", visible=True):
|
188 |
+
create_rag_chat_tabs(rag_db_config)
|
189 |
+
|
190 |
+
with gr.TabItem("Character Chat Database", id="character_chat_group", visible=True):
|
191 |
+
create_character_chat_tabs(char_db_config)
|
192 |
+
|
193 |
+
#
|
194 |
+
# End of Functions
|
195 |
+
#######################################################################################################################
|
App_Function_Libraries/Gradio_UI/Book_Ingestion_tab.py
CHANGED
@@ -8,69 +8,113 @@
|
|
8 |
#
|
9 |
####################
|
10 |
# Imports
|
|
|
11 |
#
|
12 |
# External Imports
|
13 |
import gradio as gr
|
14 |
#
|
15 |
# Local Imports
|
16 |
-
from App_Function_Libraries.Books.Book_Ingestion_Lib import
|
|
|
17 |
#
|
18 |
########################################################################################################################
|
19 |
#
|
20 |
# Functions:
|
21 |
|
22 |
-
|
23 |
-
|
24 |
def create_import_book_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
with gr.TabItem("Ebook(epub) Files", visible=True):
|
26 |
with gr.Row():
|
27 |
with gr.Column():
|
28 |
gr.Markdown("# Import .epub files")
|
29 |
-
gr.Markdown("Upload
|
30 |
gr.Markdown(
|
31 |
"🔗 **How to remove DRM from your ebooks:** [Reddit Guide](https://www.reddit.com/r/Calibre/comments/1ck4w8e/2024_guide_on_removing_drm_from_kobo_kindle_ebooks/)")
|
32 |
-
import_file = gr.File(label="Upload file for import", file_types=[".epub", ".zip"])
|
33 |
-
title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content (for single files)")
|
34 |
-
author_input = gr.Textbox(label="Author", placeholder="Enter the author's name (for single files)")
|
35 |
-
keywords_input = gr.Textbox(label="Keywords (like genre or publish year)",
|
36 |
-
placeholder="Enter keywords, comma-separated")
|
37 |
-
system_prompt_input = gr.Textbox(label="System Prompt", lines=3,
|
38 |
-
value=""""
|
39 |
-
<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
40 |
-
**Bulleted Note Creation Guidelines**
|
41 |
-
|
42 |
-
**Headings**:
|
43 |
-
- Based on referenced topics, not categories like quotes or terms
|
44 |
-
- Surrounded by **bold** formatting
|
45 |
-
- Not listed as bullet points
|
46 |
-
- No space between headings and list items underneath
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
|
|
|
|
|
59 |
api_name_input = gr.Dropdown(
|
60 |
-
choices=[None
|
61 |
-
|
62 |
-
label="API for
|
63 |
)
|
64 |
api_key_input = gr.Textbox(label="API Key", type="password")
|
65 |
|
66 |
# Chunking options
|
67 |
-
max_chunk_size = gr.Slider(
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
|
|
72 |
|
73 |
-
import_button = gr.Button("Import eBook(s)")
|
74 |
with gr.Column():
|
75 |
with gr.Row():
|
76 |
import_output = gr.Textbox(label="Import Status", lines=10, interactive=False)
|
@@ -78,10 +122,10 @@ def create_import_book_tab():
|
|
78 |
import_button.click(
|
79 |
fn=import_file_handler,
|
80 |
inputs=[
|
81 |
-
|
82 |
-
title_input,
|
83 |
author_input,
|
84 |
keywords_input,
|
|
|
85 |
custom_prompt_input,
|
86 |
auto_summarize_checkbox,
|
87 |
api_name_input,
|
@@ -93,8 +137,8 @@ def create_import_book_tab():
|
|
93 |
outputs=import_output
|
94 |
)
|
95 |
|
96 |
-
return
|
97 |
|
98 |
#
|
99 |
# End of File
|
100 |
-
########################################################################################################################
|
|
|
8 |
#
|
9 |
####################
|
10 |
# Imports
|
11 |
+
import logging
|
12 |
#
|
13 |
# External Imports
|
14 |
import gradio as gr
|
15 |
#
|
16 |
# Local Imports
|
17 |
+
from App_Function_Libraries.Books.Book_Ingestion_Lib import import_file_handler
|
18 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
19 |
#
|
20 |
########################################################################################################################
|
21 |
#
|
22 |
# Functions:
|
23 |
|
|
|
|
|
24 |
def create_import_book_tab():
|
25 |
+
try:
|
26 |
+
default_value = None
|
27 |
+
if default_api_endpoint:
|
28 |
+
if default_api_endpoint in global_api_endpoints:
|
29 |
+
default_value = format_api_name(default_api_endpoint)
|
30 |
+
else:
|
31 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
32 |
+
except Exception as e:
|
33 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
34 |
+
default_value = None
|
35 |
+
|
36 |
with gr.TabItem("Ebook(epub) Files", visible=True):
|
37 |
with gr.Row():
|
38 |
with gr.Column():
|
39 |
gr.Markdown("# Import .epub files")
|
40 |
+
gr.Markdown("Upload multiple .epub files or a .zip file containing multiple .epub files")
|
41 |
gr.Markdown(
|
42 |
"🔗 **How to remove DRM from your ebooks:** [Reddit Guide](https://www.reddit.com/r/Calibre/comments/1ck4w8e/2024_guide_on_removing_drm_from_kobo_kindle_ebooks/)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
+
# Updated to support multiple files
|
45 |
+
import_files = gr.File(
|
46 |
+
label="Upload files for import",
|
47 |
+
file_count="multiple",
|
48 |
+
file_types=[".epub", ".zip", ".html", ".htm", ".xml", ".opml"]
|
49 |
+
)
|
50 |
|
51 |
+
# Optional fields for overriding auto-extracted metadata
|
52 |
+
author_input = gr.Textbox(
|
53 |
+
label="Author Override (optional)",
|
54 |
+
placeholder="Enter author name to override auto-extracted metadata"
|
55 |
+
)
|
56 |
+
keywords_input = gr.Textbox(
|
57 |
+
label="Keywords (like genre or publish year)",
|
58 |
+
placeholder="Enter keywords, comma-separated - will be applied to all uploaded books"
|
59 |
+
)
|
60 |
+
system_prompt_input = gr.Textbox(
|
61 |
+
label="System Prompt",
|
62 |
+
lines=3,
|
63 |
+
value=""""
|
64 |
+
<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
65 |
+
**Bulleted Note Creation Guidelines**
|
66 |
+
|
67 |
+
**Headings**:
|
68 |
+
- Based on referenced topics, not categories like quotes or terms
|
69 |
+
- Surrounded by **bold** formatting
|
70 |
+
- Not listed as bullet points
|
71 |
+
- No space between headings and list items underneath
|
72 |
+
|
73 |
+
**Emphasis**:
|
74 |
+
- **Important terms** set in bold font
|
75 |
+
- **Text ending in a colon**: also bolded
|
76 |
+
|
77 |
+
**Review**:
|
78 |
+
- Ensure adherence to specified format
|
79 |
+
- Do not reference these instructions in your response.</s>[INST]
|
80 |
+
"""
|
81 |
+
)
|
82 |
+
custom_prompt_input = gr.Textbox(
|
83 |
+
label="Custom User Prompt",
|
84 |
+
placeholder="Enter a custom user prompt for summarization (optional)"
|
85 |
+
)
|
86 |
auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
|
87 |
+
|
88 |
+
# API configuration
|
89 |
api_name_input = gr.Dropdown(
|
90 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
91 |
+
value=default_value,
|
92 |
+
label="API for Summarization/Analysis (Optional)"
|
93 |
)
|
94 |
api_key_input = gr.Textbox(label="API Key", type="password")
|
95 |
|
96 |
# Chunking options
|
97 |
+
max_chunk_size = gr.Slider(
|
98 |
+
minimum=100,
|
99 |
+
maximum=2000,
|
100 |
+
value=500,
|
101 |
+
step=50,
|
102 |
+
label="Max Chunk Size"
|
103 |
+
)
|
104 |
+
chunk_overlap = gr.Slider(
|
105 |
+
minimum=0,
|
106 |
+
maximum=500,
|
107 |
+
value=200,
|
108 |
+
step=10,
|
109 |
+
label="Chunk Overlap"
|
110 |
+
)
|
111 |
+
custom_chapter_pattern = gr.Textbox(
|
112 |
+
label="Custom Chapter Pattern (optional)",
|
113 |
+
placeholder="Enter a custom regex pattern for chapter detection"
|
114 |
+
)
|
115 |
|
116 |
+
import_button = gr.Button("Import eBooks")
|
117 |
|
|
|
118 |
with gr.Column():
|
119 |
with gr.Row():
|
120 |
import_output = gr.Textbox(label="Import Status", lines=10, interactive=False)
|
|
|
122 |
import_button.click(
|
123 |
fn=import_file_handler,
|
124 |
inputs=[
|
125 |
+
import_files, # Now handles multiple files
|
|
|
126 |
author_input,
|
127 |
keywords_input,
|
128 |
+
system_prompt_input,
|
129 |
custom_prompt_input,
|
130 |
auto_summarize_checkbox,
|
131 |
api_name_input,
|
|
|
137 |
outputs=import_output
|
138 |
)
|
139 |
|
140 |
+
return import_files, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
|
141 |
|
142 |
#
|
143 |
# End of File
|
144 |
+
########################################################################################################################
|
App_Function_Libraries/Gradio_UI/Character_Chat_tab.py
CHANGED
@@ -2,10 +2,10 @@
|
|
2 |
# Description: Library for character card import functions
|
3 |
#
|
4 |
# Imports
|
|
|
5 |
import re
|
6 |
import tempfile
|
7 |
import uuid
|
8 |
-
from datetime import datetime
|
9 |
import json
|
10 |
import logging
|
11 |
import io
|
@@ -21,7 +21,7 @@ import gradio as gr
|
|
21 |
from App_Function_Libraries.Character_Chat.Character_Chat_Lib import validate_character_book, validate_v2_card, \
|
22 |
replace_placeholders, replace_user_placeholder, extract_json_from_image, parse_character_book, \
|
23 |
load_chat_and_character, load_chat_history, load_character_and_image, extract_character_id, load_character_wrapper
|
24 |
-
from App_Function_Libraries.Chat import chat
|
25 |
from App_Function_Libraries.DB.Character_Chat_DB import (
|
26 |
add_character_card,
|
27 |
get_character_cards,
|
@@ -32,9 +32,12 @@ from App_Function_Libraries.DB.Character_Chat_DB import (
|
|
32 |
update_character_chat,
|
33 |
delete_character_chat,
|
34 |
delete_character_card,
|
35 |
-
update_character_card, search_character_chats,
|
36 |
)
|
37 |
-
from App_Function_Libraries.Utils.Utils import sanitize_user_input
|
|
|
|
|
|
|
38 |
#
|
39 |
############################################################################################################
|
40 |
#
|
@@ -252,8 +255,37 @@ def export_all_characters():
|
|
252 |
# Gradio tabs
|
253 |
|
254 |
def create_character_card_interaction_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
with gr.TabItem("Chat with a Character Card", visible=True):
|
256 |
gr.Markdown("# Chat with a Character Card")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
with gr.Row():
|
258 |
with gr.Column(scale=1):
|
259 |
character_image = gr.Image(label="Character Image", type="pil")
|
@@ -265,13 +297,10 @@ def create_character_card_interaction_tab():
|
|
265 |
load_characters_button = gr.Button("Load Existing Characters")
|
266 |
character_dropdown = gr.Dropdown(label="Select Character", choices=[])
|
267 |
user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here")
|
|
|
268 |
api_name_input = gr.Dropdown(
|
269 |
-
choices=[
|
270 |
-
|
271 |
-
"OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
|
272 |
-
"Custom-OpenAI-API"
|
273 |
-
],
|
274 |
-
value="HuggingFace",
|
275 |
label="API for Interaction (Mandatory)"
|
276 |
)
|
277 |
api_key_input = gr.Textbox(
|
@@ -281,24 +310,8 @@ def create_character_card_interaction_tab():
|
|
281 |
temperature_slider = gr.Slider(
|
282 |
minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature"
|
283 |
)
|
284 |
-
import_chat_button = gr.Button("Import Chat History")
|
285 |
chat_file_upload = gr.File(label="Upload Chat History JSON", visible=True)
|
286 |
-
|
287 |
-
# Chat History Import and Search
|
288 |
-
gr.Markdown("## Search and Load Existing Chats")
|
289 |
-
chat_search_query = gr.Textbox(
|
290 |
-
label="Search Chats",
|
291 |
-
placeholder="Enter chat name or keywords to search"
|
292 |
-
)
|
293 |
-
chat_search_button = gr.Button("Search Chats")
|
294 |
-
chat_search_dropdown = gr.Dropdown(label="Search Results", choices=[], visible=False)
|
295 |
-
load_chat_button = gr.Button("Load Selected Chat", visible=False)
|
296 |
-
|
297 |
-
# Checkbox to Decide Whether to Save Chats by Default
|
298 |
-
auto_save_checkbox = gr.Checkbox(label="Save chats automatically", value=True)
|
299 |
-
chat_media_name = gr.Textbox(label="Custom Chat Name (optional)", visible=True)
|
300 |
-
save_chat_history_to_db = gr.Button("Save Chat History to Database")
|
301 |
-
save_status = gr.Textbox(label="Save Status", interactive=False)
|
302 |
|
303 |
with gr.Column(scale=2):
|
304 |
chat_history = gr.Chatbot(label="Conversation", height=800)
|
@@ -307,6 +320,7 @@ def create_character_card_interaction_tab():
|
|
307 |
answer_for_me_button = gr.Button("Answer for Me")
|
308 |
continue_talking_button = gr.Button("Continue Talking")
|
309 |
regenerate_button = gr.Button("Regenerate Last Message")
|
|
|
310 |
clear_chat_button = gr.Button("Clear Chat")
|
311 |
save_snapshot_button = gr.Button("Save Chat Snapshot")
|
312 |
update_chat_dropdown = gr.Dropdown(label="Select Chat to Update", choices=[], visible=False)
|
@@ -491,23 +505,114 @@ def create_character_card_interaction_tab():
|
|
491 |
|
492 |
return history, save_status
|
493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
494 |
def save_chat_history_to_db_wrapper(
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
|
|
|
|
|
|
|
|
500 |
|
501 |
-
|
502 |
-
|
503 |
-
|
|
|
|
|
|
|
|
|
504 |
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
|
512 |
def update_character_info(name):
|
513 |
return load_character_and_image(name, user_name.value)
|
@@ -871,6 +976,10 @@ def create_character_card_interaction_tab():
|
|
871 |
auto_save_checkbox
|
872 |
],
|
873 |
outputs=[chat_history, save_status]
|
|
|
|
|
|
|
|
|
874 |
)
|
875 |
|
876 |
continue_talking_button.click(
|
@@ -885,6 +994,10 @@ def create_character_card_interaction_tab():
|
|
885 |
auto_save_checkbox
|
886 |
],
|
887 |
outputs=[chat_history, save_status]
|
|
|
|
|
|
|
|
|
888 |
)
|
889 |
|
890 |
import_card_button.click(
|
@@ -903,6 +1016,10 @@ def create_character_card_interaction_tab():
|
|
903 |
fn=clear_chat_history,
|
904 |
inputs=[character_data, user_name_input],
|
905 |
outputs=[chat_history, character_data]
|
|
|
|
|
|
|
|
|
906 |
)
|
907 |
|
908 |
character_dropdown.change(
|
@@ -928,7 +1045,13 @@ def create_character_card_interaction_tab():
|
|
928 |
auto_save_checkbox
|
929 |
],
|
930 |
outputs=[chat_history, save_status]
|
931 |
-
).then(
|
|
|
|
|
|
|
|
|
|
|
|
|
932 |
|
933 |
regenerate_button.click(
|
934 |
fn=regenerate_last_message,
|
@@ -942,6 +1065,10 @@ def create_character_card_interaction_tab():
|
|
942 |
auto_save_checkbox
|
943 |
],
|
944 |
outputs=[chat_history, save_status]
|
|
|
|
|
|
|
|
|
945 |
)
|
946 |
|
947 |
import_chat_button.click(
|
@@ -951,8 +1078,12 @@ def create_character_card_interaction_tab():
|
|
951 |
|
952 |
chat_file_upload.change(
|
953 |
fn=import_chat_history,
|
954 |
-
inputs=[chat_file_upload, chat_history, character_data],
|
955 |
outputs=[chat_history, character_data, save_status]
|
|
|
|
|
|
|
|
|
956 |
)
|
957 |
|
958 |
save_chat_history_to_db.click(
|
@@ -1009,6 +1140,10 @@ def create_character_card_interaction_tab():
|
|
1009 |
fn=load_selected_chat_from_search,
|
1010 |
inputs=[chat_search_dropdown, user_name_input],
|
1011 |
outputs=[character_data, chat_history, character_image, save_status]
|
|
|
|
|
|
|
|
|
1012 |
)
|
1013 |
|
1014 |
# Show Load Chat Button when a chat is selected
|
@@ -1023,8 +1158,8 @@ def create_character_card_interaction_tab():
|
|
1023 |
|
1024 |
|
1025 |
def create_character_chat_mgmt_tab():
|
1026 |
-
with gr.TabItem("Character
|
1027 |
-
gr.Markdown("# Character
|
1028 |
|
1029 |
with gr.Row():
|
1030 |
# Left Column: Character Import and Chat Management
|
@@ -1057,13 +1192,17 @@ def create_character_chat_mgmt_tab():
|
|
1057 |
gr.Markdown("## Chat Management")
|
1058 |
select_chat = gr.Dropdown(label="Select Chat", choices=[], visible=False, interactive=True)
|
1059 |
load_chat_button = gr.Button("Load Selected Chat", visible=False)
|
1060 |
-
conversation_list = gr.Dropdown(label="Select Conversation
|
1061 |
conversation_mapping = gr.State({})
|
1062 |
|
1063 |
with gr.Tabs():
|
1064 |
with gr.TabItem("Edit", visible=True):
|
1065 |
chat_content = gr.TextArea(label="Chat/Character Content (JSON)", lines=20, max_lines=50)
|
1066 |
save_button = gr.Button("Save Changes")
|
|
|
|
|
|
|
|
|
1067 |
delete_button = gr.Button("Delete Conversation/Character", variant="stop")
|
1068 |
|
1069 |
with gr.TabItem("Preview", visible=True):
|
@@ -1306,6 +1445,90 @@ def create_character_chat_mgmt_tab():
|
|
1306 |
|
1307 |
return "Import results:\n" + "\n".join(results)
|
1308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1309 |
# Register new callback for character import
|
1310 |
import_characters_button.click(
|
1311 |
fn=import_multiple_characters,
|
@@ -1368,6 +1591,18 @@ def create_character_chat_mgmt_tab():
|
|
1368 |
outputs=select_character
|
1369 |
)
|
1370 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1371 |
return (
|
1372 |
character_files, import_characters_button, import_status,
|
1373 |
search_query, search_button, search_results, search_status,
|
|
|
2 |
# Description: Library for character card import functions
|
3 |
#
|
4 |
# Imports
|
5 |
+
from datetime import datetime
|
6 |
import re
|
7 |
import tempfile
|
8 |
import uuid
|
|
|
9 |
import json
|
10 |
import logging
|
11 |
import io
|
|
|
21 |
from App_Function_Libraries.Character_Chat.Character_Chat_Lib import validate_character_book, validate_v2_card, \
|
22 |
replace_placeholders, replace_user_placeholder, extract_json_from_image, parse_character_book, \
|
23 |
load_chat_and_character, load_chat_history, load_character_and_image, extract_character_id, load_character_wrapper
|
24 |
+
from App_Function_Libraries.Chat.Chat_Functions import chat, approximate_token_count
|
25 |
from App_Function_Libraries.DB.Character_Chat_DB import (
|
26 |
add_character_card,
|
27 |
get_character_cards,
|
|
|
32 |
update_character_chat,
|
33 |
delete_character_chat,
|
34 |
delete_character_card,
|
35 |
+
update_character_card, search_character_chats, save_chat_history_to_character_db,
|
36 |
)
|
37 |
+
from App_Function_Libraries.Utils.Utils import sanitize_user_input, format_api_name, global_api_endpoints, \
|
38 |
+
default_api_endpoint, load_comprehensive_config
|
39 |
+
|
40 |
+
|
41 |
#
|
42 |
############################################################################################################
|
43 |
#
|
|
|
255 |
# Gradio tabs
|
256 |
|
257 |
def create_character_card_interaction_tab():
|
258 |
+
try:
|
259 |
+
default_value = None
|
260 |
+
if default_api_endpoint:
|
261 |
+
if default_api_endpoint in global_api_endpoints:
|
262 |
+
default_value = format_api_name(default_api_endpoint)
|
263 |
+
else:
|
264 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
265 |
+
except Exception as e:
|
266 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
267 |
+
default_value = None
|
268 |
with gr.TabItem("Chat with a Character Card", visible=True):
|
269 |
gr.Markdown("# Chat with a Character Card")
|
270 |
+
with gr.Row():
|
271 |
+
with gr.Column(scale=1):
|
272 |
+
# Checkbox to Decide Whether to Save Chats by Default
|
273 |
+
config = load_comprehensive_config()
|
274 |
+
auto_save_value = config.get('auto-save', 'save_character_chats', fallback='False')
|
275 |
+
auto_save_checkbox = gr.Checkbox(label="Save chats automatically", value=auto_save_value)
|
276 |
+
chat_media_name = gr.Textbox(label="Custom Chat Name (optional)", visible=True)
|
277 |
+
save_chat_history_to_db = gr.Button("Save Chat History to Database")
|
278 |
+
save_status = gr.Textbox(label="Status", interactive=False)
|
279 |
+
with gr.Column(scale=2):
|
280 |
+
gr.Markdown("## Search and Load Existing Chats")
|
281 |
+
chat_search_query = gr.Textbox(
|
282 |
+
label="Search Chats",
|
283 |
+
placeholder="Enter chat name or keywords to search"
|
284 |
+
)
|
285 |
+
chat_search_button = gr.Button("Search Chats")
|
286 |
+
chat_search_dropdown = gr.Dropdown(label="Search Results", choices=[], visible=False)
|
287 |
+
load_chat_button = gr.Button("Load Selected Chat", visible=False)
|
288 |
+
|
289 |
with gr.Row():
|
290 |
with gr.Column(scale=1):
|
291 |
character_image = gr.Image(label="Character Image", type="pil")
|
|
|
297 |
load_characters_button = gr.Button("Load Existing Characters")
|
298 |
character_dropdown = gr.Dropdown(label="Select Character", choices=[])
|
299 |
user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here")
|
300 |
+
# Refactored API selection dropdown
|
301 |
api_name_input = gr.Dropdown(
|
302 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
303 |
+
value=default_value,
|
|
|
|
|
|
|
|
|
304 |
label="API for Interaction (Mandatory)"
|
305 |
)
|
306 |
api_key_input = gr.Textbox(
|
|
|
310 |
temperature_slider = gr.Slider(
|
311 |
minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature"
|
312 |
)
|
|
|
313 |
chat_file_upload = gr.File(label="Upload Chat History JSON", visible=True)
|
314 |
+
import_chat_button = gr.Button("Import Chat History")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
|
316 |
with gr.Column(scale=2):
|
317 |
chat_history = gr.Chatbot(label="Conversation", height=800)
|
|
|
320 |
answer_for_me_button = gr.Button("Answer for Me")
|
321 |
continue_talking_button = gr.Button("Continue Talking")
|
322 |
regenerate_button = gr.Button("Regenerate Last Message")
|
323 |
+
token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False)
|
324 |
clear_chat_button = gr.Button("Clear Chat")
|
325 |
save_snapshot_button = gr.Button("Save Chat Snapshot")
|
326 |
update_chat_dropdown = gr.Dropdown(label="Select Chat to Update", choices=[], visible=False)
|
|
|
505 |
|
506 |
return history, save_status
|
507 |
|
508 |
+
def validate_chat_history(chat_history: List[Tuple[Optional[str], str]]) -> bool:
|
509 |
+
"""
|
510 |
+
Validate the chat history format and content.
|
511 |
+
|
512 |
+
Args:
|
513 |
+
chat_history: List of message tuples (user_message, bot_message)
|
514 |
+
|
515 |
+
Returns:
|
516 |
+
bool: True if valid, False if invalid
|
517 |
+
"""
|
518 |
+
if not isinstance(chat_history, list):
|
519 |
+
return False
|
520 |
+
|
521 |
+
for entry in chat_history:
|
522 |
+
if not isinstance(entry, tuple) or len(entry) != 2:
|
523 |
+
return False
|
524 |
+
# First element can be None (for system messages) or str
|
525 |
+
if not (entry[0] is None or isinstance(entry[0], str)):
|
526 |
+
return False
|
527 |
+
# Second element (bot response) must be str and not empty
|
528 |
+
if not isinstance(entry[1], str) or not entry[1].strip():
|
529 |
+
return False
|
530 |
+
|
531 |
+
return True
|
532 |
+
|
533 |
+
def sanitize_conversation_name(name: str) -> str:
|
534 |
+
"""
|
535 |
+
Sanitize the conversation name.
|
536 |
+
|
537 |
+
Args:
|
538 |
+
name: Raw conversation name
|
539 |
+
|
540 |
+
Returns:
|
541 |
+
str: Sanitized conversation name
|
542 |
+
"""
|
543 |
+
# Remove any non-alphanumeric characters except spaces and basic punctuation
|
544 |
+
sanitized = re.sub(r'[^a-zA-Z0-9\s\-_.]', '', name)
|
545 |
+
# Limit length
|
546 |
+
sanitized = sanitized[:100]
|
547 |
+
# Ensure it's not empty
|
548 |
+
if not sanitized.strip():
|
549 |
+
sanitized = f"Chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
550 |
+
return sanitized
|
551 |
+
|
552 |
def save_chat_history_to_db_wrapper(
|
553 |
+
chat_history: List[Tuple[Optional[str], str]],
|
554 |
+
conversation_id: str,
|
555 |
+
media_content: Dict,
|
556 |
+
chat_media_name: str,
|
557 |
+
char_data: Dict,
|
558 |
+
auto_save: bool
|
559 |
+
) -> Tuple[str, str]:
|
560 |
+
"""
|
561 |
+
Save chat history to the database with validation.
|
562 |
|
563 |
+
Args:
|
564 |
+
chat_history: List of message tuples
|
565 |
+
conversation_id: Current conversation ID
|
566 |
+
media_content: Media content metadata
|
567 |
+
chat_media_name: Custom name for the chat
|
568 |
+
char_data: Character data dictionary
|
569 |
+
auto_save: Auto-save flag
|
570 |
|
571 |
+
Returns:
|
572 |
+
Tuple[str, str]: (status message, detail message)
|
573 |
+
"""
|
574 |
+
try:
|
575 |
+
# Basic input validation
|
576 |
+
if not chat_history:
|
577 |
+
return "No chat history to save.", ""
|
578 |
+
|
579 |
+
if not validate_chat_history(chat_history):
|
580 |
+
return "Invalid chat history format.", "Please ensure the chat history is valid."
|
581 |
+
|
582 |
+
if not char_data:
|
583 |
+
return "No character selected.", "Please select a character first."
|
584 |
+
|
585 |
+
character_id = char_data.get('id')
|
586 |
+
if not character_id:
|
587 |
+
return "Invalid character data: No character ID found.", ""
|
588 |
+
|
589 |
+
# Sanitize and prepare conversation name
|
590 |
+
conversation_name = sanitize_conversation_name(
|
591 |
+
chat_media_name if chat_media_name.strip()
|
592 |
+
else f"Chat with {char_data.get('name', 'Unknown')} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
593 |
+
)
|
594 |
+
|
595 |
+
# Save to the database using your existing function
|
596 |
+
chat_id = save_chat_history_to_character_db(
|
597 |
+
character_id=character_id,
|
598 |
+
conversation_name=conversation_name,
|
599 |
+
chat_history=chat_history
|
600 |
+
)
|
601 |
+
|
602 |
+
if chat_id:
|
603 |
+
success_message = (
|
604 |
+
f"Chat saved successfully!\n"
|
605 |
+
f"ID: {chat_id}\n"
|
606 |
+
f"Name: {conversation_name}\n"
|
607 |
+
f"Messages: {len(chat_history)}"
|
608 |
+
)
|
609 |
+
return success_message, ""
|
610 |
+
else:
|
611 |
+
return "Failed to save chat to database.", "Database operation failed."
|
612 |
+
|
613 |
+
except Exception as e:
|
614 |
+
logging.error(f"Error saving chat history: {str(e)}", exc_info=True)
|
615 |
+
return f"Error saving chat: {str(e)}", "Please check the logs for more details."
|
616 |
|
617 |
def update_character_info(name):
|
618 |
return load_character_and_image(name, user_name.value)
|
|
|
976 |
auto_save_checkbox
|
977 |
],
|
978 |
outputs=[chat_history, save_status]
|
979 |
+
).then(
|
980 |
+
lambda history: approximate_token_count(history),
|
981 |
+
inputs=[chat_history],
|
982 |
+
outputs=[token_count_display]
|
983 |
)
|
984 |
|
985 |
continue_talking_button.click(
|
|
|
994 |
auto_save_checkbox
|
995 |
],
|
996 |
outputs=[chat_history, save_status]
|
997 |
+
).then(
|
998 |
+
lambda history: approximate_token_count(history),
|
999 |
+
inputs=[chat_history],
|
1000 |
+
outputs=[token_count_display]
|
1001 |
)
|
1002 |
|
1003 |
import_card_button.click(
|
|
|
1016 |
fn=clear_chat_history,
|
1017 |
inputs=[character_data, user_name_input],
|
1018 |
outputs=[chat_history, character_data]
|
1019 |
+
).then(
|
1020 |
+
lambda history: approximate_token_count(history),
|
1021 |
+
inputs=[chat_history],
|
1022 |
+
outputs=[token_count_display]
|
1023 |
)
|
1024 |
|
1025 |
character_dropdown.change(
|
|
|
1045 |
auto_save_checkbox
|
1046 |
],
|
1047 |
outputs=[chat_history, save_status]
|
1048 |
+
).then(
|
1049 |
+
lambda: "", outputs=user_input
|
1050 |
+
).then(
|
1051 |
+
lambda history: approximate_token_count(history),
|
1052 |
+
inputs=[chat_history],
|
1053 |
+
outputs=[token_count_display]
|
1054 |
+
)
|
1055 |
|
1056 |
regenerate_button.click(
|
1057 |
fn=regenerate_last_message,
|
|
|
1065 |
auto_save_checkbox
|
1066 |
],
|
1067 |
outputs=[chat_history, save_status]
|
1068 |
+
).then(
|
1069 |
+
lambda history: approximate_token_count(history),
|
1070 |
+
inputs=[chat_history],
|
1071 |
+
outputs=[token_count_display]
|
1072 |
)
|
1073 |
|
1074 |
import_chat_button.click(
|
|
|
1078 |
|
1079 |
chat_file_upload.change(
|
1080 |
fn=import_chat_history,
|
1081 |
+
inputs=[chat_file_upload, chat_history, character_data, user_name_input],
|
1082 |
outputs=[chat_history, character_data, save_status]
|
1083 |
+
).then(
|
1084 |
+
lambda history: approximate_token_count(history),
|
1085 |
+
inputs=[chat_history],
|
1086 |
+
outputs=[token_count_display]
|
1087 |
)
|
1088 |
|
1089 |
save_chat_history_to_db.click(
|
|
|
1140 |
fn=load_selected_chat_from_search,
|
1141 |
inputs=[chat_search_dropdown, user_name_input],
|
1142 |
outputs=[character_data, chat_history, character_image, save_status]
|
1143 |
+
).then(
|
1144 |
+
lambda history: approximate_token_count(history),
|
1145 |
+
inputs=[chat_history],
|
1146 |
+
outputs=[token_count_display]
|
1147 |
)
|
1148 |
|
1149 |
# Show Load Chat Button when a chat is selected
|
|
|
1158 |
|
1159 |
|
1160 |
def create_character_chat_mgmt_tab():
|
1161 |
+
with gr.TabItem("Character Chat Management", visible=True):
|
1162 |
+
gr.Markdown("# Character Chat Management")
|
1163 |
|
1164 |
with gr.Row():
|
1165 |
# Left Column: Character Import and Chat Management
|
|
|
1192 |
gr.Markdown("## Chat Management")
|
1193 |
select_chat = gr.Dropdown(label="Select Chat", choices=[], visible=False, interactive=True)
|
1194 |
load_chat_button = gr.Button("Load Selected Chat", visible=False)
|
1195 |
+
conversation_list = gr.Dropdown(label="Select Conversation", choices=[])
|
1196 |
conversation_mapping = gr.State({})
|
1197 |
|
1198 |
with gr.Tabs():
|
1199 |
with gr.TabItem("Edit", visible=True):
|
1200 |
chat_content = gr.TextArea(label="Chat/Character Content (JSON)", lines=20, max_lines=50)
|
1201 |
save_button = gr.Button("Save Changes")
|
1202 |
+
export_chat_button = gr.Button("Export Current Conversation", variant="secondary")
|
1203 |
+
export_all_chats_button = gr.Button("Export All Character Conversations", variant="secondary")
|
1204 |
+
export_file = gr.File(label="Downloaded File", visible=False)
|
1205 |
+
export_status = gr.Markdown("")
|
1206 |
delete_button = gr.Button("Delete Conversation/Character", variant="stop")
|
1207 |
|
1208 |
with gr.TabItem("Preview", visible=True):
|
|
|
1445 |
|
1446 |
return "Import results:\n" + "\n".join(results)
|
1447 |
|
1448 |
+
def export_current_conversation(selected_chat):
|
1449 |
+
if not selected_chat:
|
1450 |
+
return "Please select a conversation to export.", None
|
1451 |
+
|
1452 |
+
try:
|
1453 |
+
chat_id = int(selected_chat.split('(ID: ')[1].rstrip(')'))
|
1454 |
+
chat = get_character_chat_by_id(chat_id)
|
1455 |
+
|
1456 |
+
if not chat:
|
1457 |
+
return "Selected chat not found.", None
|
1458 |
+
|
1459 |
+
# Ensure chat_history is properly parsed
|
1460 |
+
chat_history = chat['chat_history']
|
1461 |
+
if isinstance(chat_history, str):
|
1462 |
+
chat_history = json.loads(chat_history)
|
1463 |
+
|
1464 |
+
export_data = {
|
1465 |
+
"conversation_id": chat['id'],
|
1466 |
+
"conversation_name": chat['conversation_name'],
|
1467 |
+
"character_id": chat['character_id'],
|
1468 |
+
"chat_history": chat_history,
|
1469 |
+
"exported_at": datetime.now().isoformat()
|
1470 |
+
}
|
1471 |
+
|
1472 |
+
# Convert to JSON string
|
1473 |
+
json_str = json.dumps(export_data, indent=2, ensure_ascii=False)
|
1474 |
+
|
1475 |
+
# Create file name
|
1476 |
+
file_name = f"conversation_{chat['id']}_{chat['conversation_name']}.json"
|
1477 |
+
|
1478 |
+
# Return file for download
|
1479 |
+
return "Conversation exported successfully!", (file_name, json_str, "application/json")
|
1480 |
+
|
1481 |
+
except Exception as e:
|
1482 |
+
logging.error(f"Error exporting conversation: {e}")
|
1483 |
+
return f"Error exporting conversation: {str(e)}", None
|
1484 |
+
|
1485 |
+
def export_all_character_conversations(character_selection):
|
1486 |
+
if not character_selection:
|
1487 |
+
return "Please select a character first.", None
|
1488 |
+
|
1489 |
+
try:
|
1490 |
+
character_id = int(character_selection.split('(ID: ')[1].rstrip(')'))
|
1491 |
+
character = get_character_card_by_id(character_id)
|
1492 |
+
chats = get_character_chats(character_id=character_id)
|
1493 |
+
|
1494 |
+
if not chats:
|
1495 |
+
return "No conversations found for this character.", None
|
1496 |
+
|
1497 |
+
# Process chat histories
|
1498 |
+
conversations = []
|
1499 |
+
for chat in chats:
|
1500 |
+
chat_history = chat['chat_history']
|
1501 |
+
if isinstance(chat_history, str):
|
1502 |
+
chat_history = json.loads(chat_history)
|
1503 |
+
|
1504 |
+
conversations.append({
|
1505 |
+
"conversation_id": chat['id'],
|
1506 |
+
"conversation_name": chat['conversation_name'],
|
1507 |
+
"chat_history": chat_history
|
1508 |
+
})
|
1509 |
+
|
1510 |
+
export_data = {
|
1511 |
+
"character": {
|
1512 |
+
"id": character['id'],
|
1513 |
+
"name": character['name']
|
1514 |
+
},
|
1515 |
+
"conversations": conversations,
|
1516 |
+
"exported_at": datetime.now().isoformat()
|
1517 |
+
}
|
1518 |
+
|
1519 |
+
# Convert to JSON string
|
1520 |
+
json_str = json.dumps(export_data, indent=2, ensure_ascii=False)
|
1521 |
+
|
1522 |
+
# Create file name
|
1523 |
+
file_name = f"all_conversations_{character['name']}_{character['id']}.json"
|
1524 |
+
|
1525 |
+
# Return file for download
|
1526 |
+
return "All conversations exported successfully!", (file_name, json_str, "application/json")
|
1527 |
+
|
1528 |
+
except Exception as e:
|
1529 |
+
logging.error(f"Error exporting all conversations: {e}")
|
1530 |
+
return f"Error exporting conversations: {str(e)}", None
|
1531 |
+
|
1532 |
# Register new callback for character import
|
1533 |
import_characters_button.click(
|
1534 |
fn=import_multiple_characters,
|
|
|
1591 |
outputs=select_character
|
1592 |
)
|
1593 |
|
1594 |
+
export_chat_button.click(
|
1595 |
+
fn=export_current_conversation,
|
1596 |
+
inputs=[select_chat],
|
1597 |
+
outputs=[export_status, export_file]
|
1598 |
+
)
|
1599 |
+
|
1600 |
+
export_all_chats_button.click(
|
1601 |
+
fn=export_all_character_conversations,
|
1602 |
+
inputs=[select_character],
|
1603 |
+
outputs=[export_status, export_file]
|
1604 |
+
)
|
1605 |
+
|
1606 |
return (
|
1607 |
character_files, import_characters_button, import_status,
|
1608 |
search_query, search_button, search_results, search_status,
|
App_Function_Libraries/Gradio_UI/Character_interaction_tab.py
CHANGED
@@ -17,9 +17,12 @@ import gradio as gr
|
|
17 |
from PIL import Image
|
18 |
#
|
19 |
# Local Imports
|
20 |
-
from App_Function_Libraries.Chat import chat, load_characters, save_chat_history_to_db_wrapper
|
21 |
from App_Function_Libraries.Gradio_UI.Chat_ui import chat_wrapper
|
22 |
from App_Function_Libraries.Gradio_UI.Writing_tab import generate_writing_feedback
|
|
|
|
|
|
|
23 |
#
|
24 |
########################################################################################################################
|
25 |
#
|
@@ -253,6 +256,16 @@ def character_interaction(character1: str, character2: str, api_endpoint: str, a
|
|
253 |
|
254 |
|
255 |
def create_multiple_character_chat_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
with gr.TabItem("Multi-Character Chat", visible=True):
|
257 |
characters, conversation, current_character, other_character = character_interaction_setup()
|
258 |
|
@@ -264,13 +277,12 @@ def create_multiple_character_chat_tab():
|
|
264 |
character_selectors = [gr.Dropdown(label=f"Character {i + 1}", choices=list(characters.keys())) for i in
|
265 |
range(4)]
|
266 |
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
value="HuggingFace")
|
274 |
api_key = gr.Textbox(label="API Key (if required)", type="password")
|
275 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
|
276 |
scenario = gr.Textbox(label="Scenario (optional)", lines=3)
|
@@ -393,17 +405,26 @@ def create_multiple_character_chat_tab():
|
|
393 |
|
394 |
# From `Fuzzlewumper` on Reddit.
|
395 |
def create_narrator_controlled_conversation_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
with gr.TabItem("Narrator-Controlled Conversation", visible=True):
|
397 |
gr.Markdown("# Narrator-Controlled Conversation")
|
398 |
|
399 |
with gr.Row():
|
400 |
with gr.Column(scale=1):
|
|
|
401 |
api_endpoint = gr.Dropdown(
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
"Custom-OpenAI-API"],
|
406 |
-
value="HuggingFace"
|
407 |
)
|
408 |
api_key = gr.Textbox(label="API Key (if required)", type="password")
|
409 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
|
|
|
17 |
from PIL import Image
|
18 |
#
|
19 |
# Local Imports
|
20 |
+
from App_Function_Libraries.Chat.Chat_Functions import chat, load_characters, save_chat_history_to_db_wrapper
|
21 |
from App_Function_Libraries.Gradio_UI.Chat_ui import chat_wrapper
|
22 |
from App_Function_Libraries.Gradio_UI.Writing_tab import generate_writing_feedback
|
23 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, format_api_name, global_api_endpoints
|
24 |
+
|
25 |
+
|
26 |
#
|
27 |
########################################################################################################################
|
28 |
#
|
|
|
256 |
|
257 |
|
258 |
def create_multiple_character_chat_tab():
|
259 |
+
try:
|
260 |
+
default_value = None
|
261 |
+
if default_api_endpoint:
|
262 |
+
if default_api_endpoint in global_api_endpoints:
|
263 |
+
default_value = format_api_name(default_api_endpoint)
|
264 |
+
else:
|
265 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
266 |
+
except Exception as e:
|
267 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
268 |
+
default_value = None
|
269 |
with gr.TabItem("Multi-Character Chat", visible=True):
|
270 |
characters, conversation, current_character, other_character = character_interaction_setup()
|
271 |
|
|
|
277 |
character_selectors = [gr.Dropdown(label=f"Character {i + 1}", choices=list(characters.keys())) for i in
|
278 |
range(4)]
|
279 |
|
280 |
+
# Refactored API selection dropdown
|
281 |
+
api_endpoint = gr.Dropdown(
|
282 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
283 |
+
value=default_value,
|
284 |
+
label="API for Interaction (Optional)"
|
285 |
+
)
|
|
|
286 |
api_key = gr.Textbox(label="API Key (if required)", type="password")
|
287 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
|
288 |
scenario = gr.Textbox(label="Scenario (optional)", lines=3)
|
|
|
405 |
|
406 |
# From `Fuzzlewumper` on Reddit.
|
407 |
def create_narrator_controlled_conversation_tab():
|
408 |
+
try:
|
409 |
+
default_value = None
|
410 |
+
if default_api_endpoint:
|
411 |
+
if default_api_endpoint in global_api_endpoints:
|
412 |
+
default_value = format_api_name(default_api_endpoint)
|
413 |
+
else:
|
414 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
415 |
+
except Exception as e:
|
416 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
417 |
+
default_value = None
|
418 |
with gr.TabItem("Narrator-Controlled Conversation", visible=True):
|
419 |
gr.Markdown("# Narrator-Controlled Conversation")
|
420 |
|
421 |
with gr.Row():
|
422 |
with gr.Column(scale=1):
|
423 |
+
# Refactored API selection dropdown
|
424 |
api_endpoint = gr.Dropdown(
|
425 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
426 |
+
value=default_value,
|
427 |
+
label="API for Chat Interaction (Optional)"
|
|
|
|
|
428 |
)
|
429 |
api_key = gr.Textbox(label="API Key (if required)", type="password")
|
430 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
|
App_Function_Libraries/Gradio_UI/Chat_ui.py
CHANGED
@@ -2,23 +2,25 @@
|
|
2 |
# Description: Chat interface functions for Gradio
|
3 |
#
|
4 |
# Imports
|
5 |
-
import html
|
6 |
-
import json
|
7 |
import logging
|
8 |
import os
|
9 |
import sqlite3
|
|
|
10 |
from datetime import datetime
|
11 |
#
|
12 |
# External Imports
|
13 |
import gradio as gr
|
14 |
#
|
15 |
# Local Imports
|
16 |
-
from App_Function_Libraries.Chat import chat, save_chat_history,
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
19 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_user_prompt
|
20 |
-
|
21 |
-
|
22 |
#
|
23 |
#
|
24 |
########################################################################################################################
|
@@ -91,10 +93,9 @@ def chat_wrapper(message, history, media_content, selected_parts, api_endpoint,
|
|
91 |
# Create a new conversation
|
92 |
media_id = media_content.get('id', None)
|
93 |
conversation_name = f"Chat about {media_content.get('title', 'Unknown Media')} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
94 |
-
conversation_id =
|
95 |
-
|
96 |
# Add user message to the database
|
97 |
-
user_message_id =
|
98 |
|
99 |
# Include the selected parts and custom_prompt only for the first message
|
100 |
if not history and selected_parts:
|
@@ -113,7 +114,7 @@ def chat_wrapper(message, history, media_content, selected_parts, api_endpoint,
|
|
113 |
|
114 |
if save_conversation:
|
115 |
# Add assistant message to the database
|
116 |
-
|
117 |
|
118 |
# Update history
|
119 |
new_history = history + [(message, bot_message)]
|
@@ -123,51 +124,57 @@ def chat_wrapper(message, history, media_content, selected_parts, api_endpoint,
|
|
123 |
logging.error(f"Error in chat wrapper: {str(e)}")
|
124 |
return "An error occurred.", history, conversation_id
|
125 |
|
|
|
126 |
def search_conversations(query):
|
|
|
127 |
try:
|
128 |
-
|
129 |
-
if
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
return gr.update(choices=[])
|
132 |
|
|
|
133 |
conversation_options = [
|
134 |
-
(f"{
|
135 |
-
for
|
136 |
]
|
137 |
-
|
138 |
return gr.update(choices=conversation_options)
|
139 |
except Exception as e:
|
140 |
-
|
141 |
return gr.update(choices=[])
|
142 |
|
143 |
|
144 |
def load_conversation(conversation_id):
|
|
|
145 |
if not conversation_id:
|
146 |
return [], None
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
for msg in messages
|
152 |
-
]
|
153 |
-
return history, conversation_id
|
154 |
-
|
155 |
-
|
156 |
-
def update_message_in_chat(message_id, new_text, history):
|
157 |
-
update_chat_message(message_id, new_text)
|
158 |
-
updated_history = [(msg1, msg2) if msg1[1] != message_id and msg2[1] != message_id
|
159 |
-
else ((new_text, msg1[1]) if msg1[1] == message_id else (new_text, msg2[1]))
|
160 |
-
for msg1, msg2 in history]
|
161 |
-
return updated_history
|
162 |
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
|
169 |
|
170 |
-
def regenerate_last_message(history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature,
|
|
|
171 |
if not history:
|
172 |
return history, "No messages to regenerate."
|
173 |
|
@@ -200,7 +207,56 @@ def regenerate_last_message(history, media_content, selected_parts, api_endpoint
|
|
200 |
|
201 |
return new_history, "Last message regenerated successfully."
|
202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
def create_chat_interface():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
custom_css = """
|
205 |
.chatbot-container .message-wrap .message {
|
206 |
font-size: 14px !important;
|
@@ -215,9 +271,19 @@ def create_chat_interface():
|
|
215 |
|
216 |
with gr.Row():
|
217 |
with gr.Column(scale=1):
|
218 |
-
search_query_input = gr.Textbox(
|
219 |
-
|
220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
search_button = gr.Button("Search")
|
222 |
items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
|
223 |
item_mapping = gr.State({})
|
@@ -237,53 +303,60 @@ def create_chat_interface():
|
|
237 |
with gr.Row():
|
238 |
load_conversations_btn = gr.Button("Load Selected Conversation")
|
239 |
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
245 |
api_key = gr.Textbox(label="API Key (if required)", type="password")
|
|
|
|
|
|
|
|
|
|
|
246 |
custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
|
247 |
value=False,
|
248 |
visible=True)
|
249 |
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
|
250 |
value=False,
|
251 |
visible=True)
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
with gr.Column(scale=2):
|
264 |
-
chatbot = gr.Chatbot(height=
|
265 |
msg = gr.Textbox(label="Enter your message")
|
266 |
submit = gr.Button("Submit")
|
267 |
regenerate_button = gr.Button("Regenerate Last Message")
|
|
|
268 |
clear_chat_button = gr.Button("Clear Chat")
|
269 |
|
270 |
-
edit_message_id = gr.Number(label="Message ID to Edit", visible=False)
|
271 |
-
edit_message_text = gr.Textbox(label="Edit Message", visible=False)
|
272 |
-
update_message_button = gr.Button("Update Message", visible=False)
|
273 |
-
|
274 |
-
delete_message_id = gr.Number(label="Message ID to Delete", visible=False)
|
275 |
-
delete_message_button = gr.Button("Delete Message", visible=False)
|
276 |
-
|
277 |
chat_media_name = gr.Textbox(label="Custom Chat Name(optional)")
|
278 |
save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
|
|
|
279 |
save_chat_history_as_file = gr.Button("Save Chat History as File")
|
280 |
download_file = gr.File(label="Download Chat History")
|
281 |
-
save_status = gr.Textbox(label="Save Status", interactive=False)
|
282 |
|
283 |
# Restore original functionality
|
284 |
search_button.click(
|
285 |
-
fn=
|
286 |
-
inputs=[search_query_input, search_type_input],
|
287 |
outputs=[items_output, item_mapping]
|
288 |
)
|
289 |
|
@@ -314,21 +387,72 @@ def create_chat_interface():
|
|
314 |
clear_chat,
|
315 |
outputs=[chatbot, conversation_id]
|
316 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
preset_prompt.change(
|
318 |
update_prompts,
|
319 |
-
inputs=preset_prompt,
|
320 |
outputs=[user_prompt, system_prompt_input]
|
321 |
)
|
|
|
322 |
custom_prompt_checkbox.change(
|
323 |
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
324 |
inputs=[custom_prompt_checkbox],
|
325 |
outputs=[user_prompt, system_prompt_input]
|
326 |
)
|
327 |
-
|
328 |
-
fn=lambda x: gr.update(visible=x),
|
329 |
-
inputs=[preset_prompt_checkbox],
|
330 |
-
outputs=[preset_prompt]
|
331 |
-
)
|
332 |
submit.click(
|
333 |
chat_wrapper,
|
334 |
inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, conversation_id,
|
@@ -341,6 +465,10 @@ def create_chat_interface():
|
|
341 |
).then( # Clear the user prompt after the first message
|
342 |
lambda: (gr.update(value=""), gr.update(value="")),
|
343 |
outputs=[user_prompt, system_prompt_input]
|
|
|
|
|
|
|
|
|
344 |
)
|
345 |
|
346 |
items_output.change(
|
@@ -348,6 +476,7 @@ def create_chat_interface():
|
|
348 |
inputs=[items_output, use_content, use_summary, use_prompt, item_mapping],
|
349 |
outputs=[media_content, selected_parts]
|
350 |
)
|
|
|
351 |
use_content.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
|
352 |
outputs=[selected_parts])
|
353 |
use_summary.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
|
@@ -377,18 +506,6 @@ def create_chat_interface():
|
|
377 |
outputs=[chat_history]
|
378 |
)
|
379 |
|
380 |
-
update_message_button.click(
|
381 |
-
update_message_in_chat,
|
382 |
-
inputs=[edit_message_id, edit_message_text, chat_history],
|
383 |
-
outputs=[chatbot]
|
384 |
-
)
|
385 |
-
|
386 |
-
delete_message_button.click(
|
387 |
-
delete_message_from_chat,
|
388 |
-
inputs=[delete_message_id, chat_history],
|
389 |
-
outputs=[chatbot]
|
390 |
-
)
|
391 |
-
|
392 |
save_chat_history_as_file.click(
|
393 |
save_chat_history,
|
394 |
inputs=[chatbot, conversation_id],
|
@@ -403,15 +520,28 @@ def create_chat_interface():
|
|
403 |
|
404 |
regenerate_button.click(
|
405 |
regenerate_last_message,
|
406 |
-
inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temperature,
|
|
|
407 |
outputs=[chatbot, save_status]
|
|
|
|
|
|
|
|
|
408 |
)
|
409 |
|
410 |
-
chatbot.select(show_edit_message, None, [edit_message_text, edit_message_id, update_message_button])
|
411 |
-
chatbot.select(show_delete_message, None, [delete_message_id, delete_message_button])
|
412 |
-
|
413 |
|
414 |
def create_chat_interface_stacked():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
custom_css = """
|
416 |
.chatbot-container .message-wrap .message {
|
417 |
font-size: 14px !important;
|
@@ -426,9 +556,19 @@ def create_chat_interface_stacked():
|
|
426 |
|
427 |
with gr.Row():
|
428 |
with gr.Column():
|
429 |
-
search_query_input = gr.Textbox(
|
430 |
-
|
431 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
search_button = gr.Button("Search")
|
433 |
items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
|
434 |
item_mapping = gr.State({})
|
@@ -446,45 +586,165 @@ def create_chat_interface_stacked():
|
|
446 |
search_conversations_btn = gr.Button("Search Conversations")
|
447 |
load_conversations_btn = gr.Button("Load Selected Conversation")
|
448 |
with gr.Column():
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
|
|
|
|
453 |
api_key = gr.Textbox(label="API Key (if required)", type="password")
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
gr.Markdown("Scroll down for the chat window...")
|
466 |
with gr.Row():
|
467 |
with gr.Column(scale=1):
|
468 |
-
chatbot = gr.Chatbot(height=
|
469 |
msg = gr.Textbox(label="Enter your message")
|
470 |
with gr.Row():
|
471 |
with gr.Column():
|
472 |
submit = gr.Button("Submit")
|
473 |
regenerate_button = gr.Button("Regenerate Last Message")
|
|
|
474 |
clear_chat_button = gr.Button("Clear Chat")
|
475 |
chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True)
|
476 |
save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
|
|
|
477 |
save_chat_history_as_file = gr.Button("Save Chat History as File")
|
478 |
with gr.Column():
|
479 |
download_file = gr.File(label="Download Chat History")
|
480 |
|
481 |
# Restore original functionality
|
482 |
search_button.click(
|
483 |
-
fn=
|
484 |
-
inputs=[search_query_input, search_type_input],
|
485 |
outputs=[items_output, item_mapping]
|
486 |
)
|
487 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
def update_prompts(preset_name):
|
489 |
prompts = update_user_prompt(preset_name)
|
490 |
return (
|
@@ -492,13 +752,85 @@ def create_chat_interface_stacked():
|
|
492 |
gr.update(value=prompts["system_prompt"], visible=True)
|
493 |
)
|
494 |
|
|
|
|
|
|
|
495 |
clear_chat_button.click(
|
496 |
clear_chat,
|
497 |
-
outputs=[chatbot, conversation_id]
|
498 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
499 |
preset_prompt.change(
|
500 |
update_prompts,
|
501 |
-
inputs=preset_prompt,
|
502 |
outputs=[user_prompt, system_prompt]
|
503 |
)
|
504 |
|
@@ -507,13 +839,14 @@ def create_chat_interface_stacked():
|
|
507 |
inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt,
|
508 |
conversation_id, save_conversation, temp, system_prompt],
|
509 |
outputs=[msg, chatbot, conversation_id]
|
510 |
-
).then(
|
511 |
lambda x: gr.update(value=""),
|
512 |
inputs=[chatbot],
|
513 |
outputs=[msg]
|
514 |
-
).then(
|
515 |
-
lambda:
|
516 |
-
|
|
|
517 |
)
|
518 |
|
519 |
items_output.change(
|
@@ -559,18 +892,31 @@ def create_chat_interface_stacked():
|
|
559 |
save_chat_history_to_db.click(
|
560 |
save_chat_history_to_db_wrapper,
|
561 |
inputs=[chatbot, conversation_id, media_content, chat_media_name],
|
562 |
-
outputs=[conversation_id,
|
563 |
)
|
564 |
|
565 |
regenerate_button.click(
|
566 |
regenerate_last_message,
|
567 |
inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temp, system_prompt],
|
568 |
outputs=[chatbot, gr.Textbox(label="Regenerate Status")]
|
|
|
|
|
|
|
|
|
569 |
)
|
570 |
|
571 |
|
572 |
-
# FIXME - System prompts
|
573 |
def create_chat_interface_multi_api():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
574 |
custom_css = """
|
575 |
.chatbot-container .message-wrap .message {
|
576 |
font-size: 14px !important;
|
@@ -596,9 +942,31 @@ def create_chat_interface_multi_api():
|
|
596 |
use_summary = gr.Checkbox(label="Use Summary")
|
597 |
use_prompt = gr.Checkbox(label="Use Prompt")
|
598 |
with gr.Column():
|
599 |
-
|
600 |
-
|
601 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
602 |
|
603 |
with gr.Row():
|
604 |
chatbots = []
|
@@ -606,17 +974,23 @@ def create_chat_interface_multi_api():
|
|
606 |
api_keys = []
|
607 |
temperatures = []
|
608 |
regenerate_buttons = []
|
|
|
609 |
for i in range(3):
|
610 |
with gr.Column():
|
611 |
gr.Markdown(f"### Chat Window {i + 1}")
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
|
|
|
|
616 |
api_key = gr.Textbox(label=f"API Key {i + 1} (if required)", type="password")
|
617 |
temperature = gr.Slider(label=f"Temperature {i + 1}", minimum=0.0, maximum=1.0, step=0.05,
|
618 |
value=0.7)
|
619 |
chatbot = gr.Chatbot(height=800, elem_classes="chat-window")
|
|
|
|
|
|
|
620 |
regenerate_button = gr.Button(f"Regenerate Last Message {i + 1}")
|
621 |
chatbots.append(chatbot)
|
622 |
api_endpoints.append(api_endpoint)
|
@@ -642,16 +1016,103 @@ def create_chat_interface_multi_api():
|
|
642 |
outputs=[items_output, item_mapping]
|
643 |
)
|
644 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
preset_prompt.change(update_user_prompt, inputs=preset_prompt, outputs=user_prompt)
|
646 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
647 |
|
648 |
def clear_all_chats():
|
649 |
-
return [[]] * 3 + [[]] * 3
|
650 |
|
651 |
clear_chat_button.click(
|
652 |
clear_all_chats,
|
653 |
-
outputs=chatbots + chat_history
|
654 |
)
|
|
|
655 |
def chat_wrapper_multi(message, custom_prompt, system_prompt, *args):
|
656 |
chat_histories = args[:3]
|
657 |
chatbots = args[3:6]
|
@@ -681,6 +1142,11 @@ def create_chat_interface_multi_api():
|
|
681 |
|
682 |
return [gr.update(value="")] + new_chatbots + new_chat_histories
|
683 |
|
|
|
|
|
|
|
|
|
|
|
684 |
|
685 |
def regenerate_last_message(chat_history, chatbot, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature, system_prompt):
|
686 |
if not chat_history:
|
@@ -717,8 +1183,13 @@ def create_chat_interface_multi_api():
|
|
717 |
for i in range(3):
|
718 |
regenerate_buttons[i].click(
|
719 |
regenerate_last_message,
|
720 |
-
inputs=[chat_history[i], chatbots[i], media_content, selected_parts, api_endpoints[i], api_keys[i],
|
|
|
721 |
outputs=[chatbots[i], chat_history[i], gr.Textbox(label=f"Regenerate Status {i + 1}")]
|
|
|
|
|
|
|
|
|
722 |
)
|
723 |
|
724 |
# In the create_chat_interface_multi_api function:
|
@@ -731,6 +1202,10 @@ def create_chat_interface_multi_api():
|
|
731 |
).then(
|
732 |
lambda: (gr.update(value=""), gr.update(value="")),
|
733 |
outputs=[msg, user_prompt]
|
|
|
|
|
|
|
|
|
734 |
)
|
735 |
|
736 |
items_output.change(
|
@@ -747,8 +1222,17 @@ def create_chat_interface_multi_api():
|
|
747 |
)
|
748 |
|
749 |
|
750 |
-
|
751 |
def create_chat_interface_four():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
752 |
custom_css = """
|
753 |
.chatbot-container .message-wrap .message {
|
754 |
font-size: 14px !important;
|
@@ -762,17 +1246,32 @@ def create_chat_interface_four():
|
|
762 |
with gr.TabItem("Four Independent API Chats", visible=True):
|
763 |
gr.Markdown("# Four Independent API Chat Interfaces")
|
764 |
|
|
|
|
|
|
|
|
|
|
|
|
|
765 |
with gr.Row():
|
766 |
with gr.Column():
|
767 |
preset_prompt = gr.Dropdown(
|
768 |
-
label="Select Preset Prompt",
|
769 |
-
choices=
|
770 |
visible=True
|
771 |
)
|
|
|
|
|
|
|
772 |
user_prompt = gr.Textbox(
|
773 |
-
label="Modify Prompt",
|
774 |
lines=3
|
775 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
776 |
with gr.Column():
|
777 |
gr.Markdown("Scroll down for the chat windows...")
|
778 |
|
@@ -781,13 +1280,11 @@ def create_chat_interface_four():
|
|
781 |
def create_single_chat_interface(index, user_prompt_component):
|
782 |
with gr.Column():
|
783 |
gr.Markdown(f"### Chat Window {index + 1}")
|
|
|
784 |
api_endpoint = gr.Dropdown(
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
"DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold",
|
789 |
-
"Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"
|
790 |
-
]
|
791 |
)
|
792 |
api_key = gr.Textbox(
|
793 |
label=f"API Key {index + 1} (if required)",
|
@@ -804,6 +1301,8 @@ def create_chat_interface_four():
|
|
804 |
msg = gr.Textbox(label=f"Enter your message for Chat {index + 1}")
|
805 |
submit = gr.Button(f"Submit to Chat {index + 1}")
|
806 |
regenerate_button = gr.Button(f"Regenerate Last Message {index + 1}")
|
|
|
|
|
807 |
clear_chat_button = gr.Button(f"Clear Chat {index + 1}")
|
808 |
|
809 |
# State to maintain chat history
|
@@ -819,7 +1318,8 @@ def create_chat_interface_four():
|
|
819 |
'submit': submit,
|
820 |
'regenerate_button': regenerate_button,
|
821 |
'clear_chat_button': clear_chat_button,
|
822 |
-
'chat_history': chat_history
|
|
|
823 |
})
|
824 |
|
825 |
# Create four chat interfaces arranged in a 2x2 grid
|
@@ -830,10 +1330,47 @@ def create_chat_interface_four():
|
|
830 |
create_single_chat_interface(i * 2 + j, user_prompt)
|
831 |
|
832 |
# Update user_prompt based on preset_prompt selection
|
|
|
|
|
|
|
|
|
833 |
preset_prompt.change(
|
834 |
-
fn=
|
835 |
-
inputs=preset_prompt,
|
836 |
-
outputs=user_prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
837 |
)
|
838 |
|
839 |
def chat_wrapper_single(message, chat_history, api_endpoint, api_key, temperature, user_prompt):
|
@@ -913,6 +1450,10 @@ def create_chat_interface_four():
|
|
913 |
interface['chatbot'],
|
914 |
interface['chat_history']
|
915 |
]
|
|
|
|
|
|
|
|
|
916 |
)
|
917 |
|
918 |
interface['regenerate_button'].click(
|
@@ -929,12 +1470,18 @@ def create_chat_interface_four():
|
|
929 |
interface['chat_history'],
|
930 |
gr.Textbox(label="Regenerate Status")
|
931 |
]
|
|
|
|
|
|
|
|
|
932 |
)
|
933 |
|
|
|
|
|
|
|
934 |
interface['clear_chat_button'].click(
|
935 |
clear_chat_single,
|
936 |
-
|
937 |
-
outputs=[interface['chatbot'], interface['chat_history']]
|
938 |
)
|
939 |
|
940 |
|
@@ -953,233 +1500,11 @@ def chat_wrapper_single(message, chat_history, chatbot, api_endpoint, api_key, t
|
|
953 |
|
954 |
return new_msg, updated_chatbot, new_history, new_conv_id
|
955 |
|
956 |
-
|
957 |
-
# FIXME - Finish implementing functions + testing/valdidation
|
958 |
-
def create_chat_management_tab():
|
959 |
-
with gr.TabItem("Chat Management", visible=True):
|
960 |
-
gr.Markdown("# Chat Management")
|
961 |
-
|
962 |
-
with gr.Row():
|
963 |
-
search_query = gr.Textbox(label="Search Conversations")
|
964 |
-
search_button = gr.Button("Search")
|
965 |
-
|
966 |
-
conversation_list = gr.Dropdown(label="Select Conversation", choices=[])
|
967 |
-
conversation_mapping = gr.State({})
|
968 |
-
|
969 |
-
with gr.Tabs():
|
970 |
-
with gr.TabItem("Edit", visible=True):
|
971 |
-
chat_content = gr.TextArea(label="Chat Content (JSON)", lines=20, max_lines=50)
|
972 |
-
save_button = gr.Button("Save Changes")
|
973 |
-
delete_button = gr.Button("Delete Conversation", variant="stop")
|
974 |
-
|
975 |
-
with gr.TabItem("Preview", visible=True):
|
976 |
-
chat_preview = gr.HTML(label="Chat Preview")
|
977 |
-
result_message = gr.Markdown("")
|
978 |
-
|
979 |
-
def search_conversations(query):
|
980 |
-
conversations = search_chat_conversations(query)
|
981 |
-
choices = [f"{conv['conversation_name']} (Media: {conv['media_title']}, ID: {conv['id']})" for conv in
|
982 |
-
conversations]
|
983 |
-
mapping = {choice: conv['id'] for choice, conv in zip(choices, conversations)}
|
984 |
-
return gr.update(choices=choices), mapping
|
985 |
-
|
986 |
-
def load_conversations(selected, conversation_mapping):
|
987 |
-
logging.info(f"Selected: {selected}")
|
988 |
-
logging.info(f"Conversation mapping: {conversation_mapping}")
|
989 |
-
|
990 |
-
try:
|
991 |
-
if selected and selected in conversation_mapping:
|
992 |
-
conversation_id = conversation_mapping[selected]
|
993 |
-
messages = get_chat_messages(conversation_id)
|
994 |
-
conversation_data = {
|
995 |
-
"conversation_id": conversation_id,
|
996 |
-
"messages": messages
|
997 |
-
}
|
998 |
-
json_content = json.dumps(conversation_data, indent=2)
|
999 |
-
|
1000 |
-
# Create HTML preview
|
1001 |
-
html_preview = "<div style='max-height: 500px; overflow-y: auto;'>"
|
1002 |
-
for msg in messages:
|
1003 |
-
sender_style = "background-color: #e6f3ff;" if msg[
|
1004 |
-
'sender'] == 'user' else "background-color: #f0f0f0;"
|
1005 |
-
html_preview += f"<div style='margin-bottom: 10px; padding: 10px; border-radius: 5px; {sender_style}'>"
|
1006 |
-
html_preview += f"<strong>{msg['sender']}:</strong> {html.escape(msg['message'])}<br>"
|
1007 |
-
html_preview += f"<small>Timestamp: {msg['timestamp']}</small>"
|
1008 |
-
html_preview += "</div>"
|
1009 |
-
html_preview += "</div>"
|
1010 |
-
|
1011 |
-
logging.info("Returning json_content and html_preview")
|
1012 |
-
return json_content, html_preview
|
1013 |
-
else:
|
1014 |
-
logging.warning("No conversation selected or not in mapping")
|
1015 |
-
return "", "<p>No conversation selected</p>"
|
1016 |
-
except Exception as e:
|
1017 |
-
logging.error(f"Error in load_conversations: {str(e)}")
|
1018 |
-
return f"Error: {str(e)}", "<p>Error loading conversation</p>"
|
1019 |
-
|
1020 |
-
def validate_conversation_json(content):
|
1021 |
-
try:
|
1022 |
-
data = json.loads(content)
|
1023 |
-
if not isinstance(data, dict):
|
1024 |
-
return False, "Invalid JSON structure: root should be an object"
|
1025 |
-
if "conversation_id" not in data or not isinstance(data["conversation_id"], int):
|
1026 |
-
return False, "Missing or invalid conversation_id"
|
1027 |
-
if "messages" not in data or not isinstance(data["messages"], list):
|
1028 |
-
return False, "Missing or invalid messages array"
|
1029 |
-
for msg in data["messages"]:
|
1030 |
-
if not all(key in msg for key in ["sender", "message"]):
|
1031 |
-
return False, "Invalid message structure: missing required fields"
|
1032 |
-
return True, data
|
1033 |
-
except json.JSONDecodeError as e:
|
1034 |
-
return False, f"Invalid JSON: {str(e)}"
|
1035 |
-
|
1036 |
-
def save_conversation(selected, conversation_mapping, content):
|
1037 |
-
if not selected or selected not in conversation_mapping:
|
1038 |
-
return "Please select a conversation before saving.", "<p>No changes made</p>"
|
1039 |
-
|
1040 |
-
conversation_id = conversation_mapping[selected]
|
1041 |
-
is_valid, result = validate_conversation_json(content)
|
1042 |
-
|
1043 |
-
if not is_valid:
|
1044 |
-
return f"Error: {result}", "<p>No changes made due to error</p>"
|
1045 |
-
|
1046 |
-
conversation_data = result
|
1047 |
-
if conversation_data["conversation_id"] != conversation_id:
|
1048 |
-
return "Error: Conversation ID mismatch.", "<p>No changes made due to ID mismatch</p>"
|
1049 |
-
|
1050 |
-
try:
|
1051 |
-
with db.get_connection() as conn:
|
1052 |
-
conn.execute("BEGIN TRANSACTION")
|
1053 |
-
cursor = conn.cursor()
|
1054 |
-
|
1055 |
-
# Backup original conversation
|
1056 |
-
cursor.execute("SELECT * FROM ChatMessages WHERE conversation_id = ?", (conversation_id,))
|
1057 |
-
original_messages = cursor.fetchall()
|
1058 |
-
backup_data = json.dumps({"conversation_id": conversation_id, "messages": original_messages})
|
1059 |
-
|
1060 |
-
# You might want to save this backup_data somewhere
|
1061 |
-
|
1062 |
-
# Delete existing messages
|
1063 |
-
cursor.execute("DELETE FROM ChatMessages WHERE conversation_id = ?", (conversation_id,))
|
1064 |
-
|
1065 |
-
# Insert updated messages
|
1066 |
-
for message in conversation_data["messages"]:
|
1067 |
-
cursor.execute('''
|
1068 |
-
INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
|
1069 |
-
VALUES (?, ?, ?, COALESCE(?, CURRENT_TIMESTAMP))
|
1070 |
-
''', (conversation_id, message["sender"], message["message"], message.get("timestamp")))
|
1071 |
-
|
1072 |
-
conn.commit()
|
1073 |
-
|
1074 |
-
# Create updated HTML preview
|
1075 |
-
html_preview = "<div style='max-height: 500px; overflow-y: auto;'>"
|
1076 |
-
for msg in conversation_data["messages"]:
|
1077 |
-
sender_style = "background-color: #e6f3ff;" if msg[
|
1078 |
-
'sender'] == 'user' else "background-color: #f0f0f0;"
|
1079 |
-
html_preview += f"<div style='margin-bottom: 10px; padding: 10px; border-radius: 5px; {sender_style}'>"
|
1080 |
-
html_preview += f"<strong>{msg['sender']}:</strong> {html.escape(msg['message'])}<br>"
|
1081 |
-
html_preview += f"<small>Timestamp: {msg.get('timestamp', 'N/A')}</small>"
|
1082 |
-
html_preview += "</div>"
|
1083 |
-
html_preview += "</div>"
|
1084 |
-
|
1085 |
-
return "Conversation updated successfully.", html_preview
|
1086 |
-
except sqlite3.Error as e:
|
1087 |
-
conn.rollback()
|
1088 |
-
logging.error(f"Database error in save_conversation: {e}")
|
1089 |
-
return f"Error updating conversation: {str(e)}", "<p>Error occurred while saving</p>"
|
1090 |
-
except Exception as e:
|
1091 |
-
conn.rollback()
|
1092 |
-
logging.error(f"Unexpected error in save_conversation: {e}")
|
1093 |
-
return f"Unexpected error: {str(e)}", "<p>Unexpected error occurred</p>"
|
1094 |
-
|
1095 |
-
def delete_conversation(selected, conversation_mapping):
|
1096 |
-
if not selected or selected not in conversation_mapping:
|
1097 |
-
return "Please select a conversation before deleting.", "<p>No changes made</p>", gr.update(choices=[])
|
1098 |
-
|
1099 |
-
conversation_id = conversation_mapping[selected]
|
1100 |
-
|
1101 |
-
try:
|
1102 |
-
with db.get_connection() as conn:
|
1103 |
-
cursor = conn.cursor()
|
1104 |
-
|
1105 |
-
# Delete messages associated with the conversation
|
1106 |
-
cursor.execute("DELETE FROM ChatMessages WHERE conversation_id = ?", (conversation_id,))
|
1107 |
-
|
1108 |
-
# Delete the conversation itself
|
1109 |
-
cursor.execute("DELETE FROM ChatConversations WHERE id = ?", (conversation_id,))
|
1110 |
-
|
1111 |
-
conn.commit()
|
1112 |
-
|
1113 |
-
# Update the conversation list
|
1114 |
-
remaining_conversations = [choice for choice in conversation_mapping.keys() if choice != selected]
|
1115 |
-
updated_mapping = {choice: conversation_mapping[choice] for choice in remaining_conversations}
|
1116 |
-
|
1117 |
-
return "Conversation deleted successfully.", "<p>Conversation deleted</p>", gr.update(choices=remaining_conversations)
|
1118 |
-
except sqlite3.Error as e:
|
1119 |
-
conn.rollback()
|
1120 |
-
logging.error(f"Database error in delete_conversation: {e}")
|
1121 |
-
return f"Error deleting conversation: {str(e)}", "<p>Error occurred while deleting</p>", gr.update()
|
1122 |
-
except Exception as e:
|
1123 |
-
conn.rollback()
|
1124 |
-
logging.error(f"Unexpected error in delete_conversation: {e}")
|
1125 |
-
return f"Unexpected error: {str(e)}", "<p>Unexpected error occurred</p>", gr.update()
|
1126 |
-
|
1127 |
-
def parse_formatted_content(formatted_content):
|
1128 |
-
lines = formatted_content.split('\n')
|
1129 |
-
conversation_id = int(lines[0].split(': ')[1])
|
1130 |
-
timestamp = lines[1].split(': ')[1]
|
1131 |
-
history = []
|
1132 |
-
current_role = None
|
1133 |
-
current_content = None
|
1134 |
-
for line in lines[3:]:
|
1135 |
-
if line.startswith("Role: "):
|
1136 |
-
if current_role is not None:
|
1137 |
-
history.append({"role": current_role, "content": ["", current_content]})
|
1138 |
-
current_role = line.split(': ')[1]
|
1139 |
-
elif line.startswith("Content: "):
|
1140 |
-
current_content = line.split(': ', 1)[1]
|
1141 |
-
if current_role is not None:
|
1142 |
-
history.append({"role": current_role, "content": ["", current_content]})
|
1143 |
-
return json.dumps({
|
1144 |
-
"conversation_id": conversation_id,
|
1145 |
-
"timestamp": timestamp,
|
1146 |
-
"history": history
|
1147 |
-
}, indent=2)
|
1148 |
-
|
1149 |
-
search_button.click(
|
1150 |
-
search_conversations,
|
1151 |
-
inputs=[search_query],
|
1152 |
-
outputs=[conversation_list, conversation_mapping]
|
1153 |
-
)
|
1154 |
-
|
1155 |
-
conversation_list.change(
|
1156 |
-
load_conversations,
|
1157 |
-
inputs=[conversation_list, conversation_mapping],
|
1158 |
-
outputs=[chat_content, chat_preview]
|
1159 |
-
)
|
1160 |
-
|
1161 |
-
save_button.click(
|
1162 |
-
save_conversation,
|
1163 |
-
inputs=[conversation_list, conversation_mapping, chat_content],
|
1164 |
-
outputs=[result_message, chat_preview]
|
1165 |
-
)
|
1166 |
-
|
1167 |
-
delete_button.click(
|
1168 |
-
delete_conversation,
|
1169 |
-
inputs=[conversation_list, conversation_mapping],
|
1170 |
-
outputs=[result_message, chat_preview, conversation_list]
|
1171 |
-
)
|
1172 |
-
|
1173 |
-
return search_query, search_button, conversation_list, conversation_mapping, chat_content, save_button, delete_button, result_message, chat_preview
|
1174 |
-
|
1175 |
-
|
1176 |
-
|
1177 |
# Mock function to simulate LLM processing
|
1178 |
def process_with_llm(workflow, context, prompt, api_endpoint, api_key):
|
1179 |
api_key_snippet = api_key[:5] + "..." if api_key else "Not provided"
|
1180 |
return f"LLM output using {api_endpoint} (API Key: {api_key_snippet}) for {workflow} with context: {context[:30]}... and prompt: {prompt[:30]}..."
|
1181 |
|
1182 |
-
|
1183 |
#
|
1184 |
# End of Chat_ui.py
|
1185 |
#######################################################################################################################
|
|
|
2 |
# Description: Chat interface functions for Gradio
|
3 |
#
|
4 |
# Imports
|
|
|
|
|
5 |
import logging
|
6 |
import os
|
7 |
import sqlite3
|
8 |
+
import time
|
9 |
from datetime import datetime
|
10 |
#
|
11 |
# External Imports
|
12 |
import gradio as gr
|
13 |
#
|
14 |
# Local Imports
|
15 |
+
from App_Function_Libraries.Chat.Chat_Functions import approximate_token_count, chat, save_chat_history, \
|
16 |
+
update_chat_content, save_chat_history_to_db_wrapper
|
17 |
+
from App_Function_Libraries.DB.DB_Manager import db, load_chat_history, start_new_conversation, \
|
18 |
+
save_message, search_conversations_by_keywords, \
|
19 |
+
get_all_conversations, delete_messages_in_conversation, search_media_db, list_prompts
|
20 |
+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_db_connection
|
21 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_user_prompt
|
22 |
+
from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
|
23 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, format_api_name, global_api_endpoints
|
24 |
#
|
25 |
#
|
26 |
########################################################################################################################
|
|
|
93 |
# Create a new conversation
|
94 |
media_id = media_content.get('id', None)
|
95 |
conversation_name = f"Chat about {media_content.get('title', 'Unknown Media')} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
96 |
+
conversation_id = start_new_conversation(title=conversation_name, media_id=media_id)
|
|
|
97 |
# Add user message to the database
|
98 |
+
user_message_id = save_message(conversation_id, role="user", content=message)
|
99 |
|
100 |
# Include the selected parts and custom_prompt only for the first message
|
101 |
if not history and selected_parts:
|
|
|
114 |
|
115 |
if save_conversation:
|
116 |
# Add assistant message to the database
|
117 |
+
save_message(conversation_id, role="assistant", content=bot_message)
|
118 |
|
119 |
# Update history
|
120 |
new_history = history + [(message, bot_message)]
|
|
|
124 |
logging.error(f"Error in chat wrapper: {str(e)}")
|
125 |
return "An error occurred.", history, conversation_id
|
126 |
|
127 |
+
|
128 |
def search_conversations(query):
|
129 |
+
"""Convert existing chat search to use RAG chat functions"""
|
130 |
try:
|
131 |
+
# Use the RAG search function - search by title if given a query
|
132 |
+
if query and query.strip():
|
133 |
+
results, _, _ = search_conversations_by_keywords(
|
134 |
+
title_query=query.strip()
|
135 |
+
)
|
136 |
+
else:
|
137 |
+
# Get all conversations if no query
|
138 |
+
results, _, _ = get_all_conversations()
|
139 |
+
|
140 |
+
if not results:
|
141 |
return gr.update(choices=[])
|
142 |
|
143 |
+
# Format choices to match existing UI format
|
144 |
conversation_options = [
|
145 |
+
(f"{conv['title']} (ID: {conv['conversation_id'][:8]})", conv['conversation_id'])
|
146 |
+
for conv in results
|
147 |
]
|
148 |
+
|
149 |
return gr.update(choices=conversation_options)
|
150 |
except Exception as e:
|
151 |
+
logging.error(f"Error searching conversations: {str(e)}")
|
152 |
return gr.update(choices=[])
|
153 |
|
154 |
|
155 |
def load_conversation(conversation_id):
|
156 |
+
"""Convert existing load to use RAG chat functions"""
|
157 |
if not conversation_id:
|
158 |
return [], None
|
159 |
|
160 |
+
try:
|
161 |
+
# Use RAG load function
|
162 |
+
messages, _, _ = load_chat_history(conversation_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
+
# Convert to chatbot history format
|
165 |
+
history = [
|
166 |
+
(content, None) if role == 'user' else (None, content)
|
167 |
+
for role, content in messages
|
168 |
+
]
|
169 |
|
170 |
+
return history, conversation_id
|
171 |
+
except Exception as e:
|
172 |
+
logging.error(f"Error loading conversation: {str(e)}")
|
173 |
+
return [], None
|
174 |
|
175 |
|
176 |
+
def regenerate_last_message(history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature,
|
177 |
+
system_prompt):
|
178 |
if not history:
|
179 |
return history, "No messages to regenerate."
|
180 |
|
|
|
207 |
|
208 |
return new_history, "Last message regenerated successfully."
|
209 |
|
210 |
+
|
211 |
+
def update_dropdown_multiple(query, search_type, keywords=""):
|
212 |
+
"""Updated function to handle multiple search results using search_media_db"""
|
213 |
+
try:
|
214 |
+
# Define search fields based on search type
|
215 |
+
search_fields = []
|
216 |
+
if search_type.lower() == "keyword":
|
217 |
+
# When searching by keyword, we'll search across multiple fields
|
218 |
+
search_fields = ["title", "content", "author"]
|
219 |
+
else:
|
220 |
+
# Otherwise use the specific field
|
221 |
+
search_fields = [search_type.lower()]
|
222 |
+
|
223 |
+
# Perform the search
|
224 |
+
results = search_media_db(
|
225 |
+
search_query=query,
|
226 |
+
search_fields=search_fields,
|
227 |
+
keywords=keywords,
|
228 |
+
page=1,
|
229 |
+
results_per_page=50 # Adjust as needed
|
230 |
+
)
|
231 |
+
|
232 |
+
# Process results
|
233 |
+
item_map = {}
|
234 |
+
formatted_results = []
|
235 |
+
|
236 |
+
for row in results:
|
237 |
+
id, url, title, type_, content, author, date, prompt, summary = row
|
238 |
+
# Create a display text that shows relevant info
|
239 |
+
display_text = f"{title} - {author or 'Unknown'} ({date})"
|
240 |
+
formatted_results.append(display_text)
|
241 |
+
item_map[display_text] = id
|
242 |
+
|
243 |
+
return gr.update(choices=formatted_results), item_map
|
244 |
+
except Exception as e:
|
245 |
+
logging.error(f"Error in update_dropdown_multiple: {str(e)}")
|
246 |
+
return gr.update(choices=[]), {}
|
247 |
+
|
248 |
+
|
249 |
def create_chat_interface():
|
250 |
+
try:
|
251 |
+
default_value = None
|
252 |
+
if default_api_endpoint:
|
253 |
+
if default_api_endpoint in global_api_endpoints:
|
254 |
+
default_value = format_api_name(default_api_endpoint)
|
255 |
+
else:
|
256 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
257 |
+
except Exception as e:
|
258 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
259 |
+
default_value = None
|
260 |
custom_css = """
|
261 |
.chatbot-container .message-wrap .message {
|
262 |
font-size: 14px !important;
|
|
|
271 |
|
272 |
with gr.Row():
|
273 |
with gr.Column(scale=1):
|
274 |
+
search_query_input = gr.Textbox(
|
275 |
+
label="Search Query",
|
276 |
+
placeholder="Enter your search query here..."
|
277 |
+
)
|
278 |
+
search_type_input = gr.Radio(
|
279 |
+
choices=["Title", "Content", "Author", "Keyword"],
|
280 |
+
value="Keyword",
|
281 |
+
label="Search By"
|
282 |
+
)
|
283 |
+
keyword_filter_input = gr.Textbox(
|
284 |
+
label="Filter by Keywords (comma-separated)",
|
285 |
+
placeholder="ml, ai, python, etc..."
|
286 |
+
)
|
287 |
search_button = gr.Button("Search")
|
288 |
items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
|
289 |
item_mapping = gr.State({})
|
|
|
303 |
with gr.Row():
|
304 |
load_conversations_btn = gr.Button("Load Selected Conversation")
|
305 |
|
306 |
+
# Refactored API selection dropdown
|
307 |
+
api_endpoint = gr.Dropdown(
|
308 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
309 |
+
value=default_value,
|
310 |
+
label="API for Chat Interaction (Optional)"
|
311 |
+
)
|
312 |
api_key = gr.Textbox(label="API Key (if required)", type="password")
|
313 |
+
|
314 |
+
# Initialize state variables for pagination
|
315 |
+
current_page_state = gr.State(value=1)
|
316 |
+
total_pages_state = gr.State(value=1)
|
317 |
+
|
318 |
custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
|
319 |
value=False,
|
320 |
visible=True)
|
321 |
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
|
322 |
value=False,
|
323 |
visible=True)
|
324 |
+
with gr.Row():
|
325 |
+
# Add pagination controls
|
326 |
+
preset_prompt = gr.Dropdown(label="Select Preset Prompt",
|
327 |
+
choices=[],
|
328 |
+
visible=False)
|
329 |
+
with gr.Row():
|
330 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
331 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
332 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
333 |
+
system_prompt_input = gr.Textbox(label="System Prompt",
|
334 |
+
value="You are a helpful AI assistant",
|
335 |
+
lines=3,
|
336 |
+
visible=False)
|
337 |
+
with gr.Row():
|
338 |
+
user_prompt = gr.Textbox(label="Custom Prompt",
|
339 |
+
placeholder="Enter custom prompt here",
|
340 |
+
lines=3,
|
341 |
+
visible=False)
|
342 |
with gr.Column(scale=2):
|
343 |
+
chatbot = gr.Chatbot(height=800, elem_classes="chatbot-container")
|
344 |
msg = gr.Textbox(label="Enter your message")
|
345 |
submit = gr.Button("Submit")
|
346 |
regenerate_button = gr.Button("Regenerate Last Message")
|
347 |
+
token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False)
|
348 |
clear_chat_button = gr.Button("Clear Chat")
|
349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
chat_media_name = gr.Textbox(label="Custom Chat Name(optional)")
|
351 |
save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
|
352 |
+
save_status = gr.Textbox(label="Save Status", interactive=False)
|
353 |
save_chat_history_as_file = gr.Button("Save Chat History as File")
|
354 |
download_file = gr.File(label="Download Chat History")
|
|
|
355 |
|
356 |
# Restore original functionality
|
357 |
search_button.click(
|
358 |
+
fn=update_dropdown_multiple,
|
359 |
+
inputs=[search_query_input, search_type_input, keyword_filter_input],
|
360 |
outputs=[items_output, item_mapping]
|
361 |
)
|
362 |
|
|
|
387 |
clear_chat,
|
388 |
outputs=[chatbot, conversation_id]
|
389 |
)
|
390 |
+
|
391 |
+
# Function to handle preset prompt checkbox change
|
392 |
+
def on_preset_prompt_checkbox_change(is_checked):
|
393 |
+
if is_checked:
|
394 |
+
prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
|
395 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
396 |
+
return (
|
397 |
+
gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
|
398 |
+
gr.update(visible=True), # prev_page_button
|
399 |
+
gr.update(visible=True), # next_page_button
|
400 |
+
gr.update(value=page_display_text, visible=True), # page_display
|
401 |
+
current_page, # current_page_state
|
402 |
+
total_pages # total_pages_state
|
403 |
+
)
|
404 |
+
else:
|
405 |
+
return (
|
406 |
+
gr.update(visible=False, interactive=False), # preset_prompt
|
407 |
+
gr.update(visible=False), # prev_page_button
|
408 |
+
gr.update(visible=False), # next_page_button
|
409 |
+
gr.update(visible=False), # page_display
|
410 |
+
1, # current_page_state
|
411 |
+
1 # total_pages_state
|
412 |
+
)
|
413 |
+
|
414 |
+
preset_prompt_checkbox.change(
|
415 |
+
fn=on_preset_prompt_checkbox_change,
|
416 |
+
inputs=[preset_prompt_checkbox],
|
417 |
+
outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
|
418 |
+
)
|
419 |
+
|
420 |
+
def on_prev_page_click(current_page, total_pages):
|
421 |
+
new_page = max(current_page - 1, 1)
|
422 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
423 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
424 |
+
return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
425 |
+
|
426 |
+
prev_page_button.click(
|
427 |
+
fn=on_prev_page_click,
|
428 |
+
inputs=[current_page_state, total_pages_state],
|
429 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
430 |
+
)
|
431 |
+
|
432 |
+
def on_next_page_click(current_page, total_pages):
|
433 |
+
new_page = min(current_page + 1, total_pages)
|
434 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
435 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
436 |
+
return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
437 |
+
|
438 |
+
next_page_button.click(
|
439 |
+
fn=on_next_page_click,
|
440 |
+
inputs=[current_page_state, total_pages_state],
|
441 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
442 |
+
)
|
443 |
+
|
444 |
preset_prompt.change(
|
445 |
update_prompts,
|
446 |
+
inputs=[preset_prompt],
|
447 |
outputs=[user_prompt, system_prompt_input]
|
448 |
)
|
449 |
+
|
450 |
custom_prompt_checkbox.change(
|
451 |
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
452 |
inputs=[custom_prompt_checkbox],
|
453 |
outputs=[user_prompt, system_prompt_input]
|
454 |
)
|
455 |
+
|
|
|
|
|
|
|
|
|
456 |
submit.click(
|
457 |
chat_wrapper,
|
458 |
inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, conversation_id,
|
|
|
465 |
).then( # Clear the user prompt after the first message
|
466 |
lambda: (gr.update(value=""), gr.update(value="")),
|
467 |
outputs=[user_prompt, system_prompt_input]
|
468 |
+
).then(
|
469 |
+
lambda history: approximate_token_count(history),
|
470 |
+
inputs=[chatbot],
|
471 |
+
outputs=[token_count_display]
|
472 |
)
|
473 |
|
474 |
items_output.change(
|
|
|
476 |
inputs=[items_output, use_content, use_summary, use_prompt, item_mapping],
|
477 |
outputs=[media_content, selected_parts]
|
478 |
)
|
479 |
+
|
480 |
use_content.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
|
481 |
outputs=[selected_parts])
|
482 |
use_summary.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
|
|
|
506 |
outputs=[chat_history]
|
507 |
)
|
508 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
509 |
save_chat_history_as_file.click(
|
510 |
save_chat_history,
|
511 |
inputs=[chatbot, conversation_id],
|
|
|
520 |
|
521 |
regenerate_button.click(
|
522 |
regenerate_last_message,
|
523 |
+
inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temperature,
|
524 |
+
system_prompt_input],
|
525 |
outputs=[chatbot, save_status]
|
526 |
+
).then(
|
527 |
+
lambda history: approximate_token_count(history),
|
528 |
+
inputs=[chatbot],
|
529 |
+
outputs=[token_count_display]
|
530 |
)
|
531 |
|
|
|
|
|
|
|
532 |
|
533 |
def create_chat_interface_stacked():
|
534 |
+
try:
|
535 |
+
default_value = None
|
536 |
+
if default_api_endpoint:
|
537 |
+
if default_api_endpoint in global_api_endpoints:
|
538 |
+
default_value = format_api_name(default_api_endpoint)
|
539 |
+
else:
|
540 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
541 |
+
except Exception as e:
|
542 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
543 |
+
default_value = None
|
544 |
+
|
545 |
custom_css = """
|
546 |
.chatbot-container .message-wrap .message {
|
547 |
font-size: 14px !important;
|
|
|
556 |
|
557 |
with gr.Row():
|
558 |
with gr.Column():
|
559 |
+
search_query_input = gr.Textbox(
|
560 |
+
label="Search Query",
|
561 |
+
placeholder="Enter your search query here..."
|
562 |
+
)
|
563 |
+
search_type_input = gr.Radio(
|
564 |
+
choices=["Title", "Content", "Author", "Keyword"],
|
565 |
+
value="Keyword",
|
566 |
+
label="Search By"
|
567 |
+
)
|
568 |
+
keyword_filter_input = gr.Textbox(
|
569 |
+
label="Filter by Keywords (comma-separated)",
|
570 |
+
placeholder="ml, ai, python, etc..."
|
571 |
+
)
|
572 |
search_button = gr.Button("Search")
|
573 |
items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
|
574 |
item_mapping = gr.State({})
|
|
|
586 |
search_conversations_btn = gr.Button("Search Conversations")
|
587 |
load_conversations_btn = gr.Button("Load Selected Conversation")
|
588 |
with gr.Column():
|
589 |
+
# Refactored API selection dropdown
|
590 |
+
api_endpoint = gr.Dropdown(
|
591 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
592 |
+
value=default_value,
|
593 |
+
label="API for Chat Interaction (Optional)"
|
594 |
+
)
|
595 |
api_key = gr.Textbox(label="API Key (if required)", type="password")
|
596 |
+
|
597 |
+
# Initialize state variables for pagination
|
598 |
+
current_page_state = gr.State(value=1)
|
599 |
+
total_pages_state = gr.State(value=1)
|
600 |
+
|
601 |
+
custom_prompt_checkbox = gr.Checkbox(
|
602 |
+
label="Use a Custom Prompt",
|
603 |
+
value=False,
|
604 |
+
visible=True
|
605 |
+
)
|
606 |
+
preset_prompt_checkbox = gr.Checkbox(
|
607 |
+
label="Use a pre-set Prompt",
|
608 |
+
value=False,
|
609 |
+
visible=True
|
610 |
+
)
|
611 |
+
|
612 |
+
with gr.Row():
|
613 |
+
preset_prompt = gr.Dropdown(
|
614 |
+
label="Select Preset Prompt",
|
615 |
+
choices=[],
|
616 |
+
visible=False
|
617 |
+
)
|
618 |
+
with gr.Row():
|
619 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
620 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
621 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
622 |
+
|
623 |
+
system_prompt = gr.Textbox(
|
624 |
+
label="System Prompt",
|
625 |
+
value="You are a helpful AI assistant.",
|
626 |
+
lines=4,
|
627 |
+
visible=False
|
628 |
+
)
|
629 |
+
user_prompt = gr.Textbox(
|
630 |
+
label="Custom User Prompt",
|
631 |
+
placeholder="Enter custom prompt here",
|
632 |
+
lines=4,
|
633 |
+
visible=False
|
634 |
+
)
|
635 |
gr.Markdown("Scroll down for the chat window...")
|
636 |
with gr.Row():
|
637 |
with gr.Column(scale=1):
|
638 |
+
chatbot = gr.Chatbot(height=800, elem_classes="chatbot-container")
|
639 |
msg = gr.Textbox(label="Enter your message")
|
640 |
with gr.Row():
|
641 |
with gr.Column():
|
642 |
submit = gr.Button("Submit")
|
643 |
regenerate_button = gr.Button("Regenerate Last Message")
|
644 |
+
token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False)
|
645 |
clear_chat_button = gr.Button("Clear Chat")
|
646 |
chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True)
|
647 |
save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
|
648 |
+
save_status = gr.Textbox(label="Save Status", interactive=False)
|
649 |
save_chat_history_as_file = gr.Button("Save Chat History as File")
|
650 |
with gr.Column():
|
651 |
download_file = gr.File(label="Download Chat History")
|
652 |
|
653 |
# Restore original functionality
|
654 |
search_button.click(
|
655 |
+
fn=update_dropdown_multiple,
|
656 |
+
inputs=[search_query_input, search_type_input, keyword_filter_input],
|
657 |
outputs=[items_output, item_mapping]
|
658 |
)
|
659 |
|
660 |
+
def search_conversations(query):
|
661 |
+
try:
|
662 |
+
# Use RAG search with title search
|
663 |
+
if query and query.strip():
|
664 |
+
results, _, _ = search_conversations_by_keywords(title_query=query.strip())
|
665 |
+
else:
|
666 |
+
results, _, _ = get_all_conversations()
|
667 |
+
|
668 |
+
if not results:
|
669 |
+
return gr.update(choices=[])
|
670 |
+
|
671 |
+
# Format choices to match UI
|
672 |
+
conversation_options = [
|
673 |
+
(f"{conv['title']} (ID: {conv['conversation_id'][:8]})", conv['conversation_id'])
|
674 |
+
for conv in results
|
675 |
+
]
|
676 |
+
|
677 |
+
return gr.update(choices=conversation_options)
|
678 |
+
except Exception as e:
|
679 |
+
logging.error(f"Error searching conversations: {str(e)}")
|
680 |
+
return gr.update(choices=[])
|
681 |
+
|
682 |
+
def load_conversation(conversation_id):
|
683 |
+
if not conversation_id:
|
684 |
+
return [], None
|
685 |
+
|
686 |
+
try:
|
687 |
+
# Use RAG load function
|
688 |
+
messages, _, _ = load_chat_history(conversation_id)
|
689 |
+
|
690 |
+
# Convert to chatbot history format
|
691 |
+
history = [
|
692 |
+
(content, None) if role == 'user' else (None, content)
|
693 |
+
for role, content in messages
|
694 |
+
]
|
695 |
+
|
696 |
+
return history, conversation_id
|
697 |
+
except Exception as e:
|
698 |
+
logging.error(f"Error loading conversation: {str(e)}")
|
699 |
+
return [], None
|
700 |
+
|
701 |
+
def save_chat_history_to_db_wrapper(chatbot, conversation_id, media_content, chat_name=None):
|
702 |
+
log_counter("save_chat_history_to_db_attempt")
|
703 |
+
start_time = time.time()
|
704 |
+
logging.info(f"Attempting to save chat history. Media content type: {type(media_content)}")
|
705 |
+
|
706 |
+
try:
|
707 |
+
# First check if we can access the database
|
708 |
+
try:
|
709 |
+
with get_db_connection() as conn:
|
710 |
+
cursor = conn.cursor()
|
711 |
+
cursor.execute("SELECT 1")
|
712 |
+
except sqlite3.DatabaseError as db_error:
|
713 |
+
logging.error(f"Database is corrupted or inaccessible: {str(db_error)}")
|
714 |
+
return conversation_id, gr.update(
|
715 |
+
value="Database error: The database file appears to be corrupted. Please contact support.")
|
716 |
+
|
717 |
+
# For both new and existing conversations
|
718 |
+
try:
|
719 |
+
if not conversation_id:
|
720 |
+
title = chat_name if chat_name else "Untitled Conversation"
|
721 |
+
conversation_id = start_new_conversation(title=title)
|
722 |
+
logging.info(f"Created new conversation with ID: {conversation_id}")
|
723 |
+
|
724 |
+
# Update existing messages
|
725 |
+
delete_messages_in_conversation(conversation_id)
|
726 |
+
for user_msg, assistant_msg in chatbot:
|
727 |
+
if user_msg:
|
728 |
+
save_message(conversation_id, "user", user_msg)
|
729 |
+
if assistant_msg:
|
730 |
+
save_message(conversation_id, "assistant", assistant_msg)
|
731 |
+
except sqlite3.DatabaseError as db_error:
|
732 |
+
logging.error(f"Database error during message save: {str(db_error)}")
|
733 |
+
return conversation_id, gr.update(
|
734 |
+
value="Database error: Unable to save messages. Please try again or contact support.")
|
735 |
+
|
736 |
+
save_duration = time.time() - start_time
|
737 |
+
log_histogram("save_chat_history_to_db_duration", save_duration)
|
738 |
+
log_counter("save_chat_history_to_db_success")
|
739 |
+
|
740 |
+
return conversation_id, gr.update(value="Chat history saved successfully!")
|
741 |
+
|
742 |
+
except Exception as e:
|
743 |
+
log_counter("save_chat_history_to_db_error", labels={"error": str(e)})
|
744 |
+
error_message = f"Failed to save chat history: {str(e)}"
|
745 |
+
logging.error(error_message, exc_info=True)
|
746 |
+
return conversation_id, gr.update(value=error_message)
|
747 |
+
|
748 |
def update_prompts(preset_name):
|
749 |
prompts = update_user_prompt(preset_name)
|
750 |
return (
|
|
|
752 |
gr.update(value=prompts["system_prompt"], visible=True)
|
753 |
)
|
754 |
|
755 |
+
def clear_chat():
|
756 |
+
return [], None, 0 # Empty history, conversation_id, and token count
|
757 |
+
|
758 |
clear_chat_button.click(
|
759 |
clear_chat,
|
760 |
+
outputs=[chatbot, conversation_id, token_count_display]
|
761 |
)
|
762 |
+
|
763 |
+
# Handle custom prompt checkbox change
|
764 |
+
def on_custom_prompt_checkbox_change(is_checked):
|
765 |
+
return (
|
766 |
+
gr.update(visible=is_checked),
|
767 |
+
gr.update(visible=is_checked)
|
768 |
+
)
|
769 |
+
|
770 |
+
custom_prompt_checkbox.change(
|
771 |
+
fn=on_custom_prompt_checkbox_change,
|
772 |
+
inputs=[custom_prompt_checkbox],
|
773 |
+
outputs=[user_prompt, system_prompt]
|
774 |
+
)
|
775 |
+
|
776 |
+
# Handle preset prompt checkbox change
|
777 |
+
def on_preset_prompt_checkbox_change(is_checked):
|
778 |
+
if is_checked:
|
779 |
+
prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
|
780 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
781 |
+
return (
|
782 |
+
gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
|
783 |
+
gr.update(visible=True), # prev_page_button
|
784 |
+
gr.update(visible=True), # next_page_button
|
785 |
+
gr.update(value=page_display_text, visible=True), # page_display
|
786 |
+
current_page, # current_page_state
|
787 |
+
total_pages # total_pages_state
|
788 |
+
)
|
789 |
+
else:
|
790 |
+
return (
|
791 |
+
gr.update(visible=False, interactive=False), # preset_prompt
|
792 |
+
gr.update(visible=False), # prev_page_button
|
793 |
+
gr.update(visible=False), # next_page_button
|
794 |
+
gr.update(visible=False), # page_display
|
795 |
+
1, # current_page_state
|
796 |
+
1 # total_pages_state
|
797 |
+
)
|
798 |
+
|
799 |
+
preset_prompt_checkbox.change(
|
800 |
+
fn=on_preset_prompt_checkbox_change,
|
801 |
+
inputs=[preset_prompt_checkbox],
|
802 |
+
outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
|
803 |
+
)
|
804 |
+
|
805 |
+
# Pagination button functions
|
806 |
+
def on_prev_page_click(current_page, total_pages):
|
807 |
+
new_page = max(current_page - 1, 1)
|
808 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
809 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
810 |
+
return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
811 |
+
|
812 |
+
prev_page_button.click(
|
813 |
+
fn=on_prev_page_click,
|
814 |
+
inputs=[current_page_state, total_pages_state],
|
815 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
816 |
+
)
|
817 |
+
|
818 |
+
def on_next_page_click(current_page, total_pages):
|
819 |
+
new_page = min(current_page + 1, total_pages)
|
820 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
821 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
822 |
+
return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
823 |
+
|
824 |
+
next_page_button.click(
|
825 |
+
fn=on_next_page_click,
|
826 |
+
inputs=[current_page_state, total_pages_state],
|
827 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
828 |
+
)
|
829 |
+
|
830 |
+
# Update prompts when a preset is selected
|
831 |
preset_prompt.change(
|
832 |
update_prompts,
|
833 |
+
inputs=[preset_prompt],
|
834 |
outputs=[user_prompt, system_prompt]
|
835 |
)
|
836 |
|
|
|
839 |
inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt,
|
840 |
conversation_id, save_conversation, temp, system_prompt],
|
841 |
outputs=[msg, chatbot, conversation_id]
|
842 |
+
).then(
|
843 |
lambda x: gr.update(value=""),
|
844 |
inputs=[chatbot],
|
845 |
outputs=[msg]
|
846 |
+
).then(
|
847 |
+
lambda history: approximate_token_count(history),
|
848 |
+
inputs=[chatbot],
|
849 |
+
outputs=[token_count_display]
|
850 |
)
|
851 |
|
852 |
items_output.change(
|
|
|
892 |
save_chat_history_to_db.click(
|
893 |
save_chat_history_to_db_wrapper,
|
894 |
inputs=[chatbot, conversation_id, media_content, chat_media_name],
|
895 |
+
outputs=[conversation_id, save_status]
|
896 |
)
|
897 |
|
898 |
regenerate_button.click(
|
899 |
regenerate_last_message,
|
900 |
inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temp, system_prompt],
|
901 |
outputs=[chatbot, gr.Textbox(label="Regenerate Status")]
|
902 |
+
).then(
|
903 |
+
lambda history: approximate_token_count(history),
|
904 |
+
inputs=[chatbot],
|
905 |
+
outputs=[token_count_display]
|
906 |
)
|
907 |
|
908 |
|
|
|
909 |
def create_chat_interface_multi_api():
|
910 |
+
try:
|
911 |
+
default_value = None
|
912 |
+
if default_api_endpoint:
|
913 |
+
if default_api_endpoint in global_api_endpoints:
|
914 |
+
default_value = format_api_name(default_api_endpoint)
|
915 |
+
else:
|
916 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
917 |
+
except Exception as e:
|
918 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
919 |
+
default_value = None
|
920 |
custom_css = """
|
921 |
.chatbot-container .message-wrap .message {
|
922 |
font-size: 14px !important;
|
|
|
942 |
use_summary = gr.Checkbox(label="Use Summary")
|
943 |
use_prompt = gr.Checkbox(label="Use Prompt")
|
944 |
with gr.Column():
|
945 |
+
# Initialize state variables for pagination
|
946 |
+
current_page_state = gr.State(value=1)
|
947 |
+
total_pages_state = gr.State(value=1)
|
948 |
+
|
949 |
+
custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
|
950 |
+
value=False,
|
951 |
+
visible=True)
|
952 |
+
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
|
953 |
+
value=False,
|
954 |
+
visible=True)
|
955 |
+
with gr.Row():
|
956 |
+
# Add pagination controls
|
957 |
+
preset_prompt = gr.Dropdown(label="Select Preset Prompt",
|
958 |
+
choices=[],
|
959 |
+
visible=False)
|
960 |
+
with gr.Row():
|
961 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
962 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
963 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
964 |
+
system_prompt = gr.Textbox(label="System Prompt",
|
965 |
+
value="You are a helpful AI assistant.",
|
966 |
+
lines=5,
|
967 |
+
visible=True)
|
968 |
+
user_prompt = gr.Textbox(label="Modify Prompt (Prefixed to your message every time)", lines=5,
|
969 |
+
value="", visible=True)
|
970 |
|
971 |
with gr.Row():
|
972 |
chatbots = []
|
|
|
974 |
api_keys = []
|
975 |
temperatures = []
|
976 |
regenerate_buttons = []
|
977 |
+
token_count_displays = []
|
978 |
for i in range(3):
|
979 |
with gr.Column():
|
980 |
gr.Markdown(f"### Chat Window {i + 1}")
|
981 |
+
# Refactored API selection dropdown
|
982 |
+
api_endpoint = gr.Dropdown(
|
983 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
984 |
+
value=default_value,
|
985 |
+
label="API for Chat Interaction (Optional)"
|
986 |
+
)
|
987 |
api_key = gr.Textbox(label=f"API Key {i + 1} (if required)", type="password")
|
988 |
temperature = gr.Slider(label=f"Temperature {i + 1}", minimum=0.0, maximum=1.0, step=0.05,
|
989 |
value=0.7)
|
990 |
chatbot = gr.Chatbot(height=800, elem_classes="chat-window")
|
991 |
+
token_count_display = gr.Number(label=f"Approximate Token Count {i + 1}", value=0,
|
992 |
+
interactive=False)
|
993 |
+
token_count_displays.append(token_count_display)
|
994 |
regenerate_button = gr.Button(f"Regenerate Last Message {i + 1}")
|
995 |
chatbots.append(chatbot)
|
996 |
api_endpoints.append(api_endpoint)
|
|
|
1016 |
outputs=[items_output, item_mapping]
|
1017 |
)
|
1018 |
|
1019 |
+
def update_prompts(preset_name):
|
1020 |
+
prompts = update_user_prompt(preset_name)
|
1021 |
+
return (
|
1022 |
+
gr.update(value=prompts["user_prompt"], visible=True),
|
1023 |
+
gr.update(value=prompts["system_prompt"], visible=True)
|
1024 |
+
)
|
1025 |
+
|
1026 |
+
def on_custom_prompt_checkbox_change(is_checked):
|
1027 |
+
return (
|
1028 |
+
gr.update(visible=is_checked),
|
1029 |
+
gr.update(visible=is_checked)
|
1030 |
+
)
|
1031 |
+
|
1032 |
+
custom_prompt_checkbox.change(
|
1033 |
+
fn=on_custom_prompt_checkbox_change,
|
1034 |
+
inputs=[custom_prompt_checkbox],
|
1035 |
+
outputs=[user_prompt, system_prompt]
|
1036 |
+
)
|
1037 |
+
|
1038 |
+
def clear_all_chats():
|
1039 |
+
return [[]] * 3 + [[]] * 3 + [0] * 3
|
1040 |
+
|
1041 |
+
clear_chat_button.click(
|
1042 |
+
clear_all_chats,
|
1043 |
+
outputs=chatbots + chat_history + token_count_displays
|
1044 |
+
)
|
1045 |
+
|
1046 |
+
def on_preset_prompt_checkbox_change(is_checked):
|
1047 |
+
if is_checked:
|
1048 |
+
prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
|
1049 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
1050 |
+
return (
|
1051 |
+
gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
|
1052 |
+
gr.update(visible=True), # prev_page_button
|
1053 |
+
gr.update(visible=True), # next_page_button
|
1054 |
+
gr.update(value=page_display_text, visible=True), # page_display
|
1055 |
+
current_page, # current_page_state
|
1056 |
+
total_pages # total_pages_state
|
1057 |
+
)
|
1058 |
+
else:
|
1059 |
+
return (
|
1060 |
+
gr.update(visible=False, interactive=False), # preset_prompt
|
1061 |
+
gr.update(visible=False), # prev_page_button
|
1062 |
+
gr.update(visible=False), # next_page_button
|
1063 |
+
gr.update(visible=False), # page_display
|
1064 |
+
1, # current_page_state
|
1065 |
+
1 # total_pages_state
|
1066 |
+
)
|
1067 |
+
|
1068 |
preset_prompt.change(update_user_prompt, inputs=preset_prompt, outputs=user_prompt)
|
1069 |
|
1070 |
+
preset_prompt_checkbox.change(
|
1071 |
+
fn=on_preset_prompt_checkbox_change,
|
1072 |
+
inputs=[preset_prompt_checkbox],
|
1073 |
+
outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state,
|
1074 |
+
total_pages_state]
|
1075 |
+
)
|
1076 |
+
|
1077 |
+
def on_prev_page_click(current_page, total_pages):
|
1078 |
+
new_page = max(current_page - 1, 1)
|
1079 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
|
1080 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
1081 |
+
return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
1082 |
+
|
1083 |
+
prev_page_button.click(
|
1084 |
+
fn=on_prev_page_click,
|
1085 |
+
inputs=[current_page_state, total_pages_state],
|
1086 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
1087 |
+
)
|
1088 |
+
|
1089 |
+
def on_next_page_click(current_page, total_pages):
|
1090 |
+
new_page = min(current_page + 1, total_pages)
|
1091 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
|
1092 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
1093 |
+
return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
1094 |
+
|
1095 |
+
next_page_button.click(
|
1096 |
+
fn=on_next_page_click,
|
1097 |
+
inputs=[current_page_state, total_pages_state],
|
1098 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
1099 |
+
)
|
1100 |
+
|
1101 |
+
# Update prompts when a preset is selected
|
1102 |
+
preset_prompt.change(
|
1103 |
+
update_prompts,
|
1104 |
+
inputs=[preset_prompt],
|
1105 |
+
outputs=[user_prompt, system_prompt]
|
1106 |
+
)
|
1107 |
|
1108 |
def clear_all_chats():
|
1109 |
+
return [[]] * 3 + [[]] * 3 + [0] * 3
|
1110 |
|
1111 |
clear_chat_button.click(
|
1112 |
clear_all_chats,
|
1113 |
+
outputs=chatbots + chat_history + token_count_displays
|
1114 |
)
|
1115 |
+
|
1116 |
def chat_wrapper_multi(message, custom_prompt, system_prompt, *args):
|
1117 |
chat_histories = args[:3]
|
1118 |
chatbots = args[3:6]
|
|
|
1142 |
|
1143 |
return [gr.update(value="")] + new_chatbots + new_chat_histories
|
1144 |
|
1145 |
+
def update_token_counts(*histories):
|
1146 |
+
token_counts = []
|
1147 |
+
for history in histories:
|
1148 |
+
token_counts.append(approximate_token_count(history))
|
1149 |
+
return token_counts
|
1150 |
|
1151 |
def regenerate_last_message(chat_history, chatbot, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature, system_prompt):
|
1152 |
if not chat_history:
|
|
|
1183 |
for i in range(3):
|
1184 |
regenerate_buttons[i].click(
|
1185 |
regenerate_last_message,
|
1186 |
+
inputs=[chat_history[i], chatbots[i], media_content, selected_parts, api_endpoints[i], api_keys[i],
|
1187 |
+
user_prompt, temperatures[i], system_prompt],
|
1188 |
outputs=[chatbots[i], chat_history[i], gr.Textbox(label=f"Regenerate Status {i + 1}")]
|
1189 |
+
).then(
|
1190 |
+
lambda history: approximate_token_count(history),
|
1191 |
+
inputs=[chat_history[i]],
|
1192 |
+
outputs=[token_count_displays[i]]
|
1193 |
)
|
1194 |
|
1195 |
# In the create_chat_interface_multi_api function:
|
|
|
1202 |
).then(
|
1203 |
lambda: (gr.update(value=""), gr.update(value="")),
|
1204 |
outputs=[msg, user_prompt]
|
1205 |
+
).then(
|
1206 |
+
update_token_counts,
|
1207 |
+
inputs=chat_history,
|
1208 |
+
outputs=token_count_displays
|
1209 |
)
|
1210 |
|
1211 |
items_output.change(
|
|
|
1222 |
)
|
1223 |
|
1224 |
|
|
|
1225 |
def create_chat_interface_four():
|
1226 |
+
try:
|
1227 |
+
default_value = None
|
1228 |
+
if default_api_endpoint:
|
1229 |
+
if default_api_endpoint in global_api_endpoints:
|
1230 |
+
default_value = format_api_name(default_api_endpoint)
|
1231 |
+
else:
|
1232 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
1233 |
+
except Exception as e:
|
1234 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
1235 |
+
default_value = None
|
1236 |
custom_css = """
|
1237 |
.chatbot-container .message-wrap .message {
|
1238 |
font-size: 14px !important;
|
|
|
1246 |
with gr.TabItem("Four Independent API Chats", visible=True):
|
1247 |
gr.Markdown("# Four Independent API Chat Interfaces")
|
1248 |
|
1249 |
+
# Initialize prompts during component creation
|
1250 |
+
prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
|
1251 |
+
current_page_state = gr.State(value=current_page)
|
1252 |
+
total_pages_state = gr.State(value=total_pages)
|
1253 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
1254 |
+
|
1255 |
with gr.Row():
|
1256 |
with gr.Column():
|
1257 |
preset_prompt = gr.Dropdown(
|
1258 |
+
label="Select Preset Prompt (This will be prefixed to your messages, recommend copy/pasting and then clearing the User Prompt box)",
|
1259 |
+
choices=prompts,
|
1260 |
visible=True
|
1261 |
)
|
1262 |
+
prev_page_button = gr.Button("Previous Page", visible=True)
|
1263 |
+
page_display = gr.Markdown(page_display_text, visible=True)
|
1264 |
+
next_page_button = gr.Button("Next Page", visible=True)
|
1265 |
user_prompt = gr.Textbox(
|
1266 |
+
label="Modify User Prompt",
|
1267 |
lines=3
|
1268 |
)
|
1269 |
+
system_prompt = gr.Textbox(
|
1270 |
+
label="System Prompt",
|
1271 |
+
value="You are a helpful AI assistant.",
|
1272 |
+
lines=3
|
1273 |
+
)
|
1274 |
+
|
1275 |
with gr.Column():
|
1276 |
gr.Markdown("Scroll down for the chat windows...")
|
1277 |
|
|
|
1280 |
def create_single_chat_interface(index, user_prompt_component):
|
1281 |
with gr.Column():
|
1282 |
gr.Markdown(f"### Chat Window {index + 1}")
|
1283 |
+
# Refactored API selection dropdown
|
1284 |
api_endpoint = gr.Dropdown(
|
1285 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
1286 |
+
value=default_value,
|
1287 |
+
label="API for Chat Interaction (Optional)"
|
|
|
|
|
|
|
1288 |
)
|
1289 |
api_key = gr.Textbox(
|
1290 |
label=f"API Key {index + 1} (if required)",
|
|
|
1301 |
msg = gr.Textbox(label=f"Enter your message for Chat {index + 1}")
|
1302 |
submit = gr.Button(f"Submit to Chat {index + 1}")
|
1303 |
regenerate_button = gr.Button(f"Regenerate Last Message {index + 1}")
|
1304 |
+
token_count_display = gr.Number(label=f"Approximate Token Count {index + 1}", value=0,
|
1305 |
+
interactive=False)
|
1306 |
clear_chat_button = gr.Button(f"Clear Chat {index + 1}")
|
1307 |
|
1308 |
# State to maintain chat history
|
|
|
1318 |
'submit': submit,
|
1319 |
'regenerate_button': regenerate_button,
|
1320 |
'clear_chat_button': clear_chat_button,
|
1321 |
+
'chat_history': chat_history,
|
1322 |
+
'token_count_display': token_count_display
|
1323 |
})
|
1324 |
|
1325 |
# Create four chat interfaces arranged in a 2x2 grid
|
|
|
1330 |
create_single_chat_interface(i * 2 + j, user_prompt)
|
1331 |
|
1332 |
# Update user_prompt based on preset_prompt selection
|
1333 |
+
def update_prompts(preset_name):
|
1334 |
+
prompts = update_user_prompt(preset_name)
|
1335 |
+
return gr.update(value=prompts["user_prompt"]), gr.update(value=prompts["system_prompt"])
|
1336 |
+
|
1337 |
preset_prompt.change(
|
1338 |
+
fn=update_prompts,
|
1339 |
+
inputs=[preset_prompt],
|
1340 |
+
outputs=[user_prompt, system_prompt]
|
1341 |
+
)
|
1342 |
+
|
1343 |
+
# Pagination button functions
|
1344 |
+
def on_prev_page_click(current_page, total_pages):
|
1345 |
+
new_page = max(current_page - 1, 1)
|
1346 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
|
1347 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
1348 |
+
return (
|
1349 |
+
gr.update(choices=prompts),
|
1350 |
+
gr.update(value=page_display_text),
|
1351 |
+
current_page
|
1352 |
+
)
|
1353 |
+
|
1354 |
+
prev_page_button.click(
|
1355 |
+
fn=on_prev_page_click,
|
1356 |
+
inputs=[current_page_state, total_pages_state],
|
1357 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
1358 |
+
)
|
1359 |
+
|
1360 |
+
def on_next_page_click(current_page, total_pages):
|
1361 |
+
new_page = min(current_page + 1, total_pages)
|
1362 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
|
1363 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
1364 |
+
return (
|
1365 |
+
gr.update(choices=prompts),
|
1366 |
+
gr.update(value=page_display_text),
|
1367 |
+
current_page
|
1368 |
+
)
|
1369 |
+
|
1370 |
+
next_page_button.click(
|
1371 |
+
fn=on_next_page_click,
|
1372 |
+
inputs=[current_page_state, total_pages_state],
|
1373 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
1374 |
)
|
1375 |
|
1376 |
def chat_wrapper_single(message, chat_history, api_endpoint, api_key, temperature, user_prompt):
|
|
|
1450 |
interface['chatbot'],
|
1451 |
interface['chat_history']
|
1452 |
]
|
1453 |
+
).then(
|
1454 |
+
lambda history: approximate_token_count(history),
|
1455 |
+
inputs=[interface['chat_history']],
|
1456 |
+
outputs=[interface['token_count_display']]
|
1457 |
)
|
1458 |
|
1459 |
interface['regenerate_button'].click(
|
|
|
1470 |
interface['chat_history'],
|
1471 |
gr.Textbox(label="Regenerate Status")
|
1472 |
]
|
1473 |
+
).then(
|
1474 |
+
lambda history: approximate_token_count(history),
|
1475 |
+
inputs=[interface['chat_history']],
|
1476 |
+
outputs=[interface['token_count_display']]
|
1477 |
)
|
1478 |
|
1479 |
+
def clear_chat_single():
|
1480 |
+
return [], [], 0
|
1481 |
+
|
1482 |
interface['clear_chat_button'].click(
|
1483 |
clear_chat_single,
|
1484 |
+
outputs=[interface['chatbot'], interface['chat_history'], interface['token_count_display']]
|
|
|
1485 |
)
|
1486 |
|
1487 |
|
|
|
1500 |
|
1501 |
return new_msg, updated_chatbot, new_history, new_conv_id
|
1502 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1503 |
# Mock function to simulate LLM processing
|
1504 |
def process_with_llm(workflow, context, prompt, api_endpoint, api_key):
|
1505 |
api_key_snippet = api_key[:5] + "..." if api_key else "Not provided"
|
1506 |
return f"LLM output using {api_endpoint} (API Key: {api_key_snippet}) for {workflow} with context: {context[:30]}... and prompt: {prompt[:30]}..."
|
1507 |
|
|
|
1508 |
#
|
1509 |
# End of Chat_ui.py
|
1510 |
#######################################################################################################################
|
App_Function_Libraries/Gradio_UI/Embeddings_tab.py
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
# Imports
|
5 |
import json
|
6 |
import logging
|
|
|
7 |
#
|
8 |
# External Imports
|
9 |
import gradio as gr
|
@@ -11,26 +12,58 @@ import numpy as np
|
|
11 |
from tqdm import tqdm
|
12 |
#
|
13 |
# Local Imports
|
14 |
-
from App_Function_Libraries.DB.DB_Manager import get_all_content_from_database
|
|
|
|
|
15 |
from App_Function_Libraries.RAG.ChromaDB_Library import chroma_client, \
|
16 |
store_in_chroma, situate_context
|
17 |
from App_Function_Libraries.RAG.Embeddings_Create import create_embedding, create_embeddings_batch
|
18 |
from App_Function_Libraries.Chunk_Lib import improved_chunking_process, chunk_for_embedding
|
|
|
|
|
|
|
19 |
#
|
20 |
########################################################################################################################
|
21 |
#
|
22 |
# Functions:
|
23 |
|
24 |
def create_embeddings_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
with gr.TabItem("Create Embeddings", visible=True):
|
26 |
gr.Markdown("# Create Embeddings for All Content")
|
27 |
|
28 |
with gr.Row():
|
29 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
embedding_provider = gr.Radio(
|
31 |
choices=["huggingface", "local", "openai"],
|
32 |
label="Select Embedding Provider",
|
33 |
-
value="huggingface"
|
34 |
)
|
35 |
gr.Markdown("Note: Local provider requires a running Llama.cpp/llamafile server.")
|
36 |
gr.Markdown("OpenAI provider requires a valid API key.")
|
@@ -65,22 +98,24 @@ def create_embeddings_tab():
|
|
65 |
|
66 |
embedding_api_url = gr.Textbox(
|
67 |
label="API URL (for local provider)",
|
68 |
-
value=
|
69 |
visible=False
|
70 |
)
|
71 |
|
72 |
-
# Add chunking options
|
73 |
chunking_method = gr.Dropdown(
|
74 |
choices=["words", "sentences", "paragraphs", "tokens", "semantic"],
|
75 |
label="Chunking Method",
|
76 |
value="words"
|
77 |
)
|
78 |
max_chunk_size = gr.Slider(
|
79 |
-
minimum=1, maximum=8000, step=1,
|
|
|
80 |
label="Max Chunk Size"
|
81 |
)
|
82 |
chunk_overlap = gr.Slider(
|
83 |
-
minimum=0, maximum=4000, step=1,
|
|
|
84 |
label="Chunk Overlap"
|
85 |
)
|
86 |
adaptive_chunking = gr.Checkbox(
|
@@ -92,6 +127,7 @@ def create_embeddings_tab():
|
|
92 |
|
93 |
with gr.Column():
|
94 |
status_output = gr.Textbox(label="Status", lines=10)
|
|
|
95 |
|
96 |
def update_provider_options(provider):
|
97 |
if provider == "huggingface":
|
@@ -107,23 +143,54 @@ def create_embeddings_tab():
|
|
107 |
else:
|
108 |
return gr.update(visible=False)
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
fn=update_huggingface_options,
|
118 |
-
inputs=[huggingface_model],
|
119 |
-
outputs=[custom_embedding_model]
|
120 |
-
)
|
121 |
|
122 |
-
def create_all_embeddings(provider, hf_model, openai_model, custom_model, api_url, method,
|
|
|
123 |
try:
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
if not all_content:
|
126 |
-
return "No content found in the database."
|
127 |
|
128 |
chunk_options = {
|
129 |
'method': method,
|
@@ -132,7 +199,7 @@ def create_embeddings_tab():
|
|
132 |
'adaptive': adaptive
|
133 |
}
|
134 |
|
135 |
-
collection_name = "
|
136 |
collection = chroma_client.get_or_create_collection(name=collection_name)
|
137 |
|
138 |
# Determine the model to use
|
@@ -141,55 +208,113 @@ def create_embeddings_tab():
|
|
141 |
elif provider == "openai":
|
142 |
model = openai_model
|
143 |
else:
|
144 |
-
model =
|
|
|
|
|
|
|
|
|
145 |
|
146 |
-
|
147 |
-
media_id = item['id']
|
148 |
text = item['content']
|
149 |
|
150 |
chunks = improved_chunking_process(text, chunk_options)
|
151 |
-
for
|
152 |
chunk_text = chunk['text']
|
153 |
-
chunk_id = f"
|
154 |
-
|
155 |
-
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
continue
|
158 |
|
159 |
-
|
160 |
-
metadata = {
|
161 |
-
"media_id": str(media_id),
|
162 |
-
"chunk_index": i,
|
163 |
-
"total_chunks": len(chunks),
|
164 |
-
"chunking_method": method,
|
165 |
-
"max_chunk_size": max_size,
|
166 |
-
"chunk_overlap": overlap,
|
167 |
-
"adaptive_chunking": adaptive,
|
168 |
-
"embedding_model": model,
|
169 |
-
"embedding_provider": provider,
|
170 |
-
**chunk['metadata']
|
171 |
-
}
|
172 |
-
store_in_chroma(collection_name, [chunk_text], [embedding], [chunk_id], [metadata])
|
173 |
-
|
174 |
-
return "Embeddings created and stored successfully for all content."
|
175 |
except Exception as e:
|
176 |
logging.error(f"Error during embedding creation: {str(e)}")
|
177 |
return f"Error: {str(e)}"
|
178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
create_button.click(
|
180 |
fn=create_all_embeddings,
|
181 |
-
inputs=[
|
182 |
-
|
|
|
|
|
|
|
183 |
outputs=status_output
|
184 |
)
|
185 |
|
186 |
|
187 |
def create_view_embeddings_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
with gr.TabItem("View/Update Embeddings", visible=True):
|
189 |
gr.Markdown("# View and Update Embeddings")
|
190 |
-
item_mapping
|
|
|
|
|
191 |
with gr.Row():
|
192 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
item_dropdown = gr.Dropdown(label="Select Item", choices=[], interactive=True)
|
194 |
refresh_button = gr.Button("Refresh Item List")
|
195 |
embedding_status = gr.Textbox(label="Embedding Status", interactive=False)
|
@@ -236,9 +361,10 @@ def create_view_embeddings_tab():
|
|
236 |
|
237 |
embedding_api_url = gr.Textbox(
|
238 |
label="API URL (for local provider)",
|
239 |
-
value=
|
240 |
visible=False
|
241 |
)
|
|
|
242 |
chunking_method = gr.Dropdown(
|
243 |
choices=["words", "sentences", "paragraphs", "tokens", "semantic"],
|
244 |
label="Chunking Method",
|
@@ -267,15 +393,45 @@ def create_view_embeddings_tab():
|
|
267 |
)
|
268 |
contextual_api_key = gr.Textbox(label="API Key", lines=1)
|
269 |
|
270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
try:
|
272 |
-
items
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
choices = []
|
275 |
new_item_mapping = {}
|
276 |
for item in items:
|
277 |
try:
|
278 |
-
|
|
|
279 |
embedding_exists = result is not None and result.get('ids') and len(result['ids']) > 0
|
280 |
status = "Embedding exists" if embedding_exists else "No embedding"
|
281 |
except Exception as e:
|
@@ -303,40 +459,62 @@ def create_view_embeddings_tab():
|
|
303 |
else:
|
304 |
return gr.update(visible=False)
|
305 |
|
306 |
-
def check_embedding_status(selected_item, item_mapping):
|
307 |
if not selected_item:
|
308 |
return "Please select an item", "", ""
|
309 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
try:
|
311 |
item_id = item_mapping.get(selected_item)
|
312 |
if item_id is None:
|
313 |
return f"Invalid item selected: {selected_item}", "", ""
|
314 |
|
315 |
item_title = selected_item.rsplit(' (', 1)[0]
|
316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
|
318 |
-
result
|
319 |
-
|
|
|
320 |
|
321 |
-
if
|
|
|
322 |
return f"No embedding found for item '{item_title}' (ID: {item_id})", "", ""
|
323 |
|
324 |
-
if
|
|
|
325 |
return f"Embedding data missing for item '{item_title}' (ID: {item_id})", "", ""
|
326 |
|
327 |
embedding = result['embeddings'][0]
|
328 |
-
metadata = result
|
329 |
embedding_preview = str(embedding[:50])
|
330 |
status = f"Embedding exists for item '{item_title}' (ID: {item_id})"
|
331 |
return status, f"First 50 elements of embedding:\n{embedding_preview}", json.dumps(metadata, indent=2)
|
332 |
|
333 |
except Exception as e:
|
334 |
-
logging.error(f"Error in check_embedding_status: {str(e)}")
|
335 |
return f"Error processing item: {selected_item}. Details: {str(e)}", "", ""
|
336 |
|
337 |
-
def
|
338 |
-
|
339 |
-
|
|
|
|
|
|
|
|
|
340 |
if not selected_item:
|
341 |
return "Please select an item", "", ""
|
342 |
|
@@ -345,8 +523,26 @@ def create_view_embeddings_tab():
|
|
345 |
if item_id is None:
|
346 |
return f"Invalid item selected: {selected_item}", "", ""
|
347 |
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
if not item:
|
351 |
return f"Item not found: {item_id}", "", ""
|
352 |
|
@@ -359,11 +555,11 @@ def create_view_embeddings_tab():
|
|
359 |
|
360 |
logging.info(f"Chunking content for item: {item['title']} (ID: {item_id})")
|
361 |
chunks = chunk_for_embedding(item['content'], item['title'], chunk_options)
|
362 |
-
collection_name = "
|
363 |
collection = chroma_client.get_or_create_collection(name=collection_name)
|
364 |
|
365 |
# Delete existing embeddings for this item
|
366 |
-
existing_ids = [f"
|
367 |
collection.delete(ids=existing_ids)
|
368 |
logging.info(f"Deleted {len(existing_ids)} existing embeddings for item {item_id}")
|
369 |
|
@@ -381,7 +577,7 @@ def create_view_embeddings_tab():
|
|
381 |
contextualized_text = chunk_text
|
382 |
context = None
|
383 |
|
384 |
-
chunk_id = f"
|
385 |
|
386 |
# Determine the model to use
|
387 |
if provider == "huggingface":
|
@@ -392,7 +588,7 @@ def create_view_embeddings_tab():
|
|
392 |
model = custom_model
|
393 |
|
394 |
metadata = {
|
395 |
-
"
|
396 |
"chunk_index": i,
|
397 |
"total_chunks": len(chunks),
|
398 |
"chunking_method": method,
|
@@ -441,15 +637,25 @@ def create_view_embeddings_tab():
|
|
441 |
logging.error(f"Error in create_new_embedding_for_item: {str(e)}", exc_info=True)
|
442 |
return f"Error creating embedding: {str(e)}", "", ""
|
443 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
444 |
refresh_button.click(
|
445 |
get_items_with_embedding_status,
|
|
|
446 |
outputs=[item_dropdown, item_mapping]
|
447 |
)
|
|
|
448 |
item_dropdown.change(
|
449 |
check_embedding_status,
|
450 |
-
inputs=[item_dropdown, item_mapping],
|
451 |
outputs=[embedding_status, embedding_preview, embedding_metadata]
|
452 |
)
|
|
|
453 |
create_new_embedding_button.click(
|
454 |
create_new_embedding_for_item,
|
455 |
inputs=[item_dropdown, embedding_provider, huggingface_model, openai_model, custom_embedding_model, embedding_api_url,
|
@@ -469,9 +675,10 @@ def create_view_embeddings_tab():
|
|
469 |
)
|
470 |
|
471 |
return (item_dropdown, refresh_button, embedding_status, embedding_preview, embedding_metadata,
|
472 |
-
create_new_embedding_button, embedding_provider, huggingface_model, openai_model,
|
473 |
-
|
474 |
-
|
|
|
475 |
|
476 |
|
477 |
def create_purge_embeddings_tab():
|
|
|
4 |
# Imports
|
5 |
import json
|
6 |
import logging
|
7 |
+
import os
|
8 |
#
|
9 |
# External Imports
|
10 |
import gradio as gr
|
|
|
12 |
from tqdm import tqdm
|
13 |
#
|
14 |
# Local Imports
|
15 |
+
from App_Function_Libraries.DB.DB_Manager import get_all_content_from_database, get_all_conversations, \
|
16 |
+
get_conversation_text, get_note_by_id
|
17 |
+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_all_notes
|
18 |
from App_Function_Libraries.RAG.ChromaDB_Library import chroma_client, \
|
19 |
store_in_chroma, situate_context
|
20 |
from App_Function_Libraries.RAG.Embeddings_Create import create_embedding, create_embeddings_batch
|
21 |
from App_Function_Libraries.Chunk_Lib import improved_chunking_process, chunk_for_embedding
|
22 |
+
from App_Function_Libraries.Utils.Utils import load_and_log_configs
|
23 |
+
|
24 |
+
|
25 |
#
|
26 |
########################################################################################################################
|
27 |
#
|
28 |
# Functions:
|
29 |
|
30 |
def create_embeddings_tab():
|
31 |
+
# Load configuration first
|
32 |
+
config = load_and_log_configs()
|
33 |
+
if not config:
|
34 |
+
raise ValueError("Could not load configuration")
|
35 |
+
|
36 |
+
# Get database paths from config
|
37 |
+
db_config = config['db_config']
|
38 |
+
media_db_path = db_config['sqlite_path']
|
39 |
+
rag_qa_db_path = os.path.join(os.path.dirname(media_db_path), "rag_qa.db")
|
40 |
+
character_chat_db_path = os.path.join(os.path.dirname(media_db_path), "chatDB.db")
|
41 |
+
chroma_db_path = db_config['chroma_db_path']
|
42 |
+
|
43 |
with gr.TabItem("Create Embeddings", visible=True):
|
44 |
gr.Markdown("# Create Embeddings for All Content")
|
45 |
|
46 |
with gr.Row():
|
47 |
with gr.Column():
|
48 |
+
# Database selection at the top
|
49 |
+
database_selection = gr.Radio(
|
50 |
+
choices=["Media DB", "RAG Chat", "Character Chat"],
|
51 |
+
label="Select Content Source",
|
52 |
+
value="Media DB",
|
53 |
+
info="Choose which database to create embeddings from"
|
54 |
+
)
|
55 |
+
|
56 |
+
# Add database path display
|
57 |
+
current_db_path = gr.Textbox(
|
58 |
+
label="Current Database Path",
|
59 |
+
value=media_db_path,
|
60 |
+
interactive=False
|
61 |
+
)
|
62 |
+
|
63 |
embedding_provider = gr.Radio(
|
64 |
choices=["huggingface", "local", "openai"],
|
65 |
label="Select Embedding Provider",
|
66 |
+
value=config['embedding_config']['embedding_provider'] or "huggingface"
|
67 |
)
|
68 |
gr.Markdown("Note: Local provider requires a running Llama.cpp/llamafile server.")
|
69 |
gr.Markdown("OpenAI provider requires a valid API key.")
|
|
|
98 |
|
99 |
embedding_api_url = gr.Textbox(
|
100 |
label="API URL (for local provider)",
|
101 |
+
value=config['embedding_config']['embedding_api_url'],
|
102 |
visible=False
|
103 |
)
|
104 |
|
105 |
+
# Add chunking options with config defaults
|
106 |
chunking_method = gr.Dropdown(
|
107 |
choices=["words", "sentences", "paragraphs", "tokens", "semantic"],
|
108 |
label="Chunking Method",
|
109 |
value="words"
|
110 |
)
|
111 |
max_chunk_size = gr.Slider(
|
112 |
+
minimum=1, maximum=8000, step=1,
|
113 |
+
value=config['embedding_config']['chunk_size'],
|
114 |
label="Max Chunk Size"
|
115 |
)
|
116 |
chunk_overlap = gr.Slider(
|
117 |
+
minimum=0, maximum=4000, step=1,
|
118 |
+
value=config['embedding_config']['overlap'],
|
119 |
label="Chunk Overlap"
|
120 |
)
|
121 |
adaptive_chunking = gr.Checkbox(
|
|
|
127 |
|
128 |
with gr.Column():
|
129 |
status_output = gr.Textbox(label="Status", lines=10)
|
130 |
+
progress = gr.Progress()
|
131 |
|
132 |
def update_provider_options(provider):
|
133 |
if provider == "huggingface":
|
|
|
143 |
else:
|
144 |
return gr.update(visible=False)
|
145 |
|
146 |
+
def update_database_path(database_type):
|
147 |
+
if database_type == "Media DB":
|
148 |
+
return media_db_path
|
149 |
+
elif database_type == "RAG Chat":
|
150 |
+
return rag_qa_db_path
|
151 |
+
else: # Character Chat
|
152 |
+
return character_chat_db_path
|
|
|
|
|
|
|
|
|
153 |
|
154 |
+
def create_all_embeddings(provider, hf_model, openai_model, custom_model, api_url, method,
|
155 |
+
max_size, overlap, adaptive, database_type, progress=gr.Progress()):
|
156 |
try:
|
157 |
+
# Initialize content based on database selection
|
158 |
+
if database_type == "Media DB":
|
159 |
+
all_content = get_all_content_from_database()
|
160 |
+
content_type = "media"
|
161 |
+
elif database_type == "RAG Chat":
|
162 |
+
all_content = []
|
163 |
+
page = 1
|
164 |
+
while True:
|
165 |
+
conversations, total_pages, _ = get_all_conversations(page=page)
|
166 |
+
if not conversations:
|
167 |
+
break
|
168 |
+
all_content.extend([{
|
169 |
+
'id': conv['conversation_id'],
|
170 |
+
'content': get_conversation_text(conv['conversation_id']),
|
171 |
+
'title': conv['title'],
|
172 |
+
'type': 'conversation'
|
173 |
+
} for conv in conversations])
|
174 |
+
progress(page / total_pages, desc=f"Loading conversations... Page {page}/{total_pages}")
|
175 |
+
page += 1
|
176 |
+
else: # Character Chat
|
177 |
+
all_content = []
|
178 |
+
page = 1
|
179 |
+
while True:
|
180 |
+
notes, total_pages, _ = get_all_notes(page=page)
|
181 |
+
if not notes:
|
182 |
+
break
|
183 |
+
all_content.extend([{
|
184 |
+
'id': note['id'],
|
185 |
+
'content': f"{note['title']}\n\n{note['content']}",
|
186 |
+
'conversation_id': note['conversation_id'],
|
187 |
+
'type': 'note'
|
188 |
+
} for note in notes])
|
189 |
+
progress(page / total_pages, desc=f"Loading notes... Page {page}/{total_pages}")
|
190 |
+
page += 1
|
191 |
+
|
192 |
if not all_content:
|
193 |
+
return "No content found in the selected database."
|
194 |
|
195 |
chunk_options = {
|
196 |
'method': method,
|
|
|
199 |
'adaptive': adaptive
|
200 |
}
|
201 |
|
202 |
+
collection_name = f"{database_type.lower().replace(' ', '_')}_embeddings"
|
203 |
collection = chroma_client.get_or_create_collection(name=collection_name)
|
204 |
|
205 |
# Determine the model to use
|
|
|
208 |
elif provider == "openai":
|
209 |
model = openai_model
|
210 |
else:
|
211 |
+
model = api_url
|
212 |
+
|
213 |
+
total_items = len(all_content)
|
214 |
+
for idx, item in enumerate(all_content):
|
215 |
+
progress((idx + 1) / total_items, desc=f"Processing item {idx + 1} of {total_items}")
|
216 |
|
217 |
+
content_id = item['id']
|
|
|
218 |
text = item['content']
|
219 |
|
220 |
chunks = improved_chunking_process(text, chunk_options)
|
221 |
+
for chunk_idx, chunk in enumerate(chunks):
|
222 |
chunk_text = chunk['text']
|
223 |
+
chunk_id = f"{database_type.lower()}_{content_id}_chunk_{chunk_idx}"
|
224 |
+
|
225 |
+
try:
|
226 |
+
embedding = create_embedding(chunk_text, provider, model, api_url)
|
227 |
+
metadata = {
|
228 |
+
'content_id': str(content_id),
|
229 |
+
'chunk_index': int(chunk_idx),
|
230 |
+
'total_chunks': int(len(chunks)),
|
231 |
+
'chunking_method': method,
|
232 |
+
'max_chunk_size': int(max_size),
|
233 |
+
'chunk_overlap': int(overlap),
|
234 |
+
'adaptive_chunking': bool(adaptive),
|
235 |
+
'embedding_model': model,
|
236 |
+
'embedding_provider': provider,
|
237 |
+
'content_type': item.get('type', 'media'),
|
238 |
+
'conversation_id': item.get('conversation_id'),
|
239 |
+
**{k: (int(v) if isinstance(v, str) and v.isdigit() else v)
|
240 |
+
for k, v in chunk['metadata'].items()}
|
241 |
+
}
|
242 |
+
store_in_chroma(collection_name, [chunk_text], [embedding], [chunk_id], [metadata])
|
243 |
+
|
244 |
+
except Exception as e:
|
245 |
+
logging.error(f"Error processing chunk {chunk_id}: {str(e)}")
|
246 |
continue
|
247 |
|
248 |
+
return f"Embeddings created and stored successfully for all {database_type} content."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
except Exception as e:
|
250 |
logging.error(f"Error during embedding creation: {str(e)}")
|
251 |
return f"Error: {str(e)}"
|
252 |
|
253 |
+
# Event handlers
|
254 |
+
embedding_provider.change(
|
255 |
+
fn=update_provider_options,
|
256 |
+
inputs=[embedding_provider],
|
257 |
+
outputs=[huggingface_model, openai_model, custom_embedding_model, embedding_api_url]
|
258 |
+
)
|
259 |
+
|
260 |
+
huggingface_model.change(
|
261 |
+
fn=update_huggingface_options,
|
262 |
+
inputs=[huggingface_model],
|
263 |
+
outputs=[custom_embedding_model]
|
264 |
+
)
|
265 |
+
|
266 |
+
database_selection.change(
|
267 |
+
fn=update_database_path,
|
268 |
+
inputs=[database_selection],
|
269 |
+
outputs=[current_db_path]
|
270 |
+
)
|
271 |
+
|
272 |
create_button.click(
|
273 |
fn=create_all_embeddings,
|
274 |
+
inputs=[
|
275 |
+
embedding_provider, huggingface_model, openai_model, custom_embedding_model,
|
276 |
+
embedding_api_url, chunking_method, max_chunk_size, chunk_overlap,
|
277 |
+
adaptive_chunking, database_selection
|
278 |
+
],
|
279 |
outputs=status_output
|
280 |
)
|
281 |
|
282 |
|
283 |
def create_view_embeddings_tab():
|
284 |
+
# Load configuration first
|
285 |
+
config = load_and_log_configs()
|
286 |
+
if not config:
|
287 |
+
raise ValueError("Could not load configuration")
|
288 |
+
|
289 |
+
# Get database paths from config
|
290 |
+
db_config = config['db_config']
|
291 |
+
media_db_path = db_config['sqlite_path']
|
292 |
+
rag_qa_db_path = os.path.join(os.path.dirname(media_db_path), "rag_chat.db")
|
293 |
+
character_chat_db_path = os.path.join(os.path.dirname(media_db_path), "character_chat.db")
|
294 |
+
chroma_db_path = db_config['chroma_db_path']
|
295 |
+
|
296 |
with gr.TabItem("View/Update Embeddings", visible=True):
|
297 |
gr.Markdown("# View and Update Embeddings")
|
298 |
+
# Initialize item_mapping as a Gradio State
|
299 |
+
|
300 |
+
|
301 |
with gr.Row():
|
302 |
with gr.Column():
|
303 |
+
# Add database selection
|
304 |
+
database_selection = gr.Radio(
|
305 |
+
choices=["Media DB", "RAG Chat", "Character Chat"],
|
306 |
+
label="Select Content Source",
|
307 |
+
value="Media DB",
|
308 |
+
info="Choose which database to view embeddings from"
|
309 |
+
)
|
310 |
+
|
311 |
+
# Add database path display
|
312 |
+
current_db_path = gr.Textbox(
|
313 |
+
label="Current Database Path",
|
314 |
+
value=media_db_path,
|
315 |
+
interactive=False
|
316 |
+
)
|
317 |
+
|
318 |
item_dropdown = gr.Dropdown(label="Select Item", choices=[], interactive=True)
|
319 |
refresh_button = gr.Button("Refresh Item List")
|
320 |
embedding_status = gr.Textbox(label="Embedding Status", interactive=False)
|
|
|
361 |
|
362 |
embedding_api_url = gr.Textbox(
|
363 |
label="API URL (for local provider)",
|
364 |
+
value=config['embedding_config']['embedding_api_url'],
|
365 |
visible=False
|
366 |
)
|
367 |
+
|
368 |
chunking_method = gr.Dropdown(
|
369 |
choices=["words", "sentences", "paragraphs", "tokens", "semantic"],
|
370 |
label="Chunking Method",
|
|
|
393 |
)
|
394 |
contextual_api_key = gr.Textbox(label="API Key", lines=1)
|
395 |
|
396 |
+
item_mapping = gr.State(value={})
|
397 |
+
|
398 |
+
def update_database_path(database_type):
|
399 |
+
if database_type == "Media DB":
|
400 |
+
return media_db_path
|
401 |
+
elif database_type == "RAG Chat":
|
402 |
+
return rag_qa_db_path
|
403 |
+
else: # Character Chat
|
404 |
+
return character_chat_db_path
|
405 |
+
|
406 |
+
def get_items_with_embedding_status(database_type):
|
407 |
try:
|
408 |
+
# Get items based on database selection
|
409 |
+
if database_type == "Media DB":
|
410 |
+
items = get_all_content_from_database()
|
411 |
+
elif database_type == "RAG Chat":
|
412 |
+
conversations, _, _ = get_all_conversations(page=1)
|
413 |
+
items = [{
|
414 |
+
'id': conv['conversation_id'],
|
415 |
+
'title': conv['title'],
|
416 |
+
'type': 'conversation'
|
417 |
+
} for conv in conversations]
|
418 |
+
else: # Character Chat
|
419 |
+
notes, _, _ = get_all_notes(page=1)
|
420 |
+
items = [{
|
421 |
+
'id': note['id'],
|
422 |
+
'title': note['title'],
|
423 |
+
'type': 'note'
|
424 |
+
} for note in notes]
|
425 |
+
|
426 |
+
collection_name = f"{database_type.lower().replace(' ', '_')}_embeddings"
|
427 |
+
collection = chroma_client.get_or_create_collection(name=collection_name)
|
428 |
+
|
429 |
choices = []
|
430 |
new_item_mapping = {}
|
431 |
for item in items:
|
432 |
try:
|
433 |
+
chunk_id = f"{database_type.lower()}_{item['id']}_chunk_0"
|
434 |
+
result = collection.get(ids=[chunk_id])
|
435 |
embedding_exists = result is not None and result.get('ids') and len(result['ids']) > 0
|
436 |
status = "Embedding exists" if embedding_exists else "No embedding"
|
437 |
except Exception as e:
|
|
|
459 |
else:
|
460 |
return gr.update(visible=False)
|
461 |
|
462 |
+
def check_embedding_status(selected_item, database_type, item_mapping):
|
463 |
if not selected_item:
|
464 |
return "Please select an item", "", ""
|
465 |
|
466 |
+
if item_mapping is None:
|
467 |
+
# If mapping is None, try to refresh it
|
468 |
+
try:
|
469 |
+
_, item_mapping = get_items_with_embedding_status(database_type)
|
470 |
+
except Exception as e:
|
471 |
+
return f"Error initializing item mapping: {str(e)}", "", ""
|
472 |
+
|
473 |
try:
|
474 |
item_id = item_mapping.get(selected_item)
|
475 |
if item_id is None:
|
476 |
return f"Invalid item selected: {selected_item}", "", ""
|
477 |
|
478 |
item_title = selected_item.rsplit(' (', 1)[0]
|
479 |
+
collection_name = f"{database_type.lower().replace(' ', '_')}_embeddings"
|
480 |
+
collection = chroma_client.get_or_create_collection(name=collection_name)
|
481 |
+
chunk_id = f"{database_type.lower()}_{item_id}_chunk_0"
|
482 |
+
|
483 |
+
try:
|
484 |
+
result = collection.get(ids=[chunk_id], include=["embeddings", "metadatas"])
|
485 |
+
except Exception as e:
|
486 |
+
logging.error(f"ChromaDB get error: {str(e)}")
|
487 |
+
return f"Error retrieving embedding for '{item_title}': {str(e)}", "", ""
|
488 |
|
489 |
+
# Check if result exists and has the expected structure
|
490 |
+
if not result or not isinstance(result, dict):
|
491 |
+
return f"No embedding found for item '{item_title}' (ID: {item_id})", "", ""
|
492 |
|
493 |
+
# Check if we have any results
|
494 |
+
if not result.get('ids') or len(result['ids']) == 0:
|
495 |
return f"No embedding found for item '{item_title}' (ID: {item_id})", "", ""
|
496 |
|
497 |
+
# Check if embeddings exist
|
498 |
+
if not result.get('embeddings') or not result['embeddings'][0]:
|
499 |
return f"Embedding data missing for item '{item_title}' (ID: {item_id})", "", ""
|
500 |
|
501 |
embedding = result['embeddings'][0]
|
502 |
+
metadata = result.get('metadatas', [{}])[0] if result.get('metadatas') else {}
|
503 |
embedding_preview = str(embedding[:50])
|
504 |
status = f"Embedding exists for item '{item_title}' (ID: {item_id})"
|
505 |
return status, f"First 50 elements of embedding:\n{embedding_preview}", json.dumps(metadata, indent=2)
|
506 |
|
507 |
except Exception as e:
|
508 |
+
logging.error(f"Error in check_embedding_status: {str(e)}", exc_info=True)
|
509 |
return f"Error processing item: {selected_item}. Details: {str(e)}", "", ""
|
510 |
|
511 |
+
def refresh_and_update(database_type):
|
512 |
+
choices_update, new_mapping = get_items_with_embedding_status(database_type)
|
513 |
+
return choices_update, new_mapping
|
514 |
+
|
515 |
+
def create_new_embedding_for_item(selected_item, database_type, provider, hf_model, openai_model,
|
516 |
+
custom_model, api_url, method, max_size, overlap, adaptive,
|
517 |
+
item_mapping, use_contextual, contextual_api_choice=None):
|
518 |
if not selected_item:
|
519 |
return "Please select an item", "", ""
|
520 |
|
|
|
523 |
if item_id is None:
|
524 |
return f"Invalid item selected: {selected_item}", "", ""
|
525 |
|
526 |
+
# Get item content based on database type
|
527 |
+
if database_type == "Media DB":
|
528 |
+
items = get_all_content_from_database()
|
529 |
+
item = next((item for item in items if item['id'] == item_id), None)
|
530 |
+
elif database_type == "RAG Chat":
|
531 |
+
item = {
|
532 |
+
'id': item_id,
|
533 |
+
'content': get_conversation_text(item_id),
|
534 |
+
'title': selected_item.rsplit(' (', 1)[0],
|
535 |
+
'type': 'conversation'
|
536 |
+
}
|
537 |
+
else: # Character Chat
|
538 |
+
note = get_note_by_id(item_id)
|
539 |
+
item = {
|
540 |
+
'id': item_id,
|
541 |
+
'content': f"{note['title']}\n\n{note['content']}",
|
542 |
+
'title': note['title'],
|
543 |
+
'type': 'note'
|
544 |
+
}
|
545 |
+
|
546 |
if not item:
|
547 |
return f"Item not found: {item_id}", "", ""
|
548 |
|
|
|
555 |
|
556 |
logging.info(f"Chunking content for item: {item['title']} (ID: {item_id})")
|
557 |
chunks = chunk_for_embedding(item['content'], item['title'], chunk_options)
|
558 |
+
collection_name = f"{database_type.lower().replace(' ', '_')}_embeddings"
|
559 |
collection = chroma_client.get_or_create_collection(name=collection_name)
|
560 |
|
561 |
# Delete existing embeddings for this item
|
562 |
+
existing_ids = [f"{database_type.lower()}_{item_id}_chunk_{i}" for i in range(len(chunks))]
|
563 |
collection.delete(ids=existing_ids)
|
564 |
logging.info(f"Deleted {len(existing_ids)} existing embeddings for item {item_id}")
|
565 |
|
|
|
577 |
contextualized_text = chunk_text
|
578 |
context = None
|
579 |
|
580 |
+
chunk_id = f"{database_type.lower()}_{item_id}_chunk_{i}"
|
581 |
|
582 |
# Determine the model to use
|
583 |
if provider == "huggingface":
|
|
|
588 |
model = custom_model
|
589 |
|
590 |
metadata = {
|
591 |
+
"content_id": str(item_id),
|
592 |
"chunk_index": i,
|
593 |
"total_chunks": len(chunks),
|
594 |
"chunking_method": method,
|
|
|
637 |
logging.error(f"Error in create_new_embedding_for_item: {str(e)}", exc_info=True)
|
638 |
return f"Error creating embedding: {str(e)}", "", ""
|
639 |
|
640 |
+
# Wire up all the event handlers
|
641 |
+
database_selection.change(
|
642 |
+
update_database_path,
|
643 |
+
inputs=[database_selection],
|
644 |
+
outputs=[current_db_path]
|
645 |
+
)
|
646 |
+
|
647 |
refresh_button.click(
|
648 |
get_items_with_embedding_status,
|
649 |
+
inputs=[database_selection],
|
650 |
outputs=[item_dropdown, item_mapping]
|
651 |
)
|
652 |
+
|
653 |
item_dropdown.change(
|
654 |
check_embedding_status,
|
655 |
+
inputs=[item_dropdown, database_selection, item_mapping],
|
656 |
outputs=[embedding_status, embedding_preview, embedding_metadata]
|
657 |
)
|
658 |
+
|
659 |
create_new_embedding_button.click(
|
660 |
create_new_embedding_for_item,
|
661 |
inputs=[item_dropdown, embedding_provider, huggingface_model, openai_model, custom_embedding_model, embedding_api_url,
|
|
|
675 |
)
|
676 |
|
677 |
return (item_dropdown, refresh_button, embedding_status, embedding_preview, embedding_metadata,
|
678 |
+
create_new_embedding_button, embedding_provider, huggingface_model, openai_model,
|
679 |
+
custom_embedding_model, embedding_api_url, chunking_method, max_chunk_size,
|
680 |
+
chunk_overlap, adaptive_chunking, use_contextual_embeddings,
|
681 |
+
contextual_api_choice, contextual_api_key)
|
682 |
|
683 |
|
684 |
def create_purge_embeddings_tab():
|
App_Function_Libraries/Gradio_UI/Evaluations_Benchmarks_tab.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1 |
###################################################################################################
|
2 |
# Evaluations_Benchmarks_tab.py - Gradio code for G-Eval testing
|
3 |
# We will use the G-Eval API to evaluate the quality of the generated summaries.
|
|
|
4 |
|
5 |
import gradio as gr
|
6 |
from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
|
|
|
|
|
7 |
|
8 |
def create_geval_tab():
|
9 |
with gr.Tab("G-Eval", visible=True):
|
@@ -31,13 +34,25 @@ def create_geval_tab():
|
|
31 |
|
32 |
|
33 |
def create_infinite_bench_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
with gr.Tab("Infinite Bench", visible=True):
|
35 |
gr.Markdown("# Infinite Bench Evaluation (Coming Soon)")
|
36 |
with gr.Row():
|
37 |
with gr.Column():
|
|
|
38 |
api_name_input = gr.Dropdown(
|
39 |
-
choices=["
|
40 |
-
|
|
|
41 |
)
|
42 |
api_key_input = gr.Textbox(label="API Key (if required)", type="password")
|
43 |
evaluate_button = gr.Button("Evaluate Summary")
|
|
|
1 |
###################################################################################################
|
2 |
# Evaluations_Benchmarks_tab.py - Gradio code for G-Eval testing
|
3 |
# We will use the G-Eval API to evaluate the quality of the generated summaries.
|
4 |
+
import logging
|
5 |
|
6 |
import gradio as gr
|
7 |
from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
|
8 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
9 |
+
|
10 |
|
11 |
def create_geval_tab():
|
12 |
with gr.Tab("G-Eval", visible=True):
|
|
|
34 |
|
35 |
|
36 |
def create_infinite_bench_tab():
|
37 |
+
try:
|
38 |
+
default_value = None
|
39 |
+
if default_api_endpoint:
|
40 |
+
if default_api_endpoint in global_api_endpoints:
|
41 |
+
default_value = format_api_name(default_api_endpoint)
|
42 |
+
else:
|
43 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
44 |
+
except Exception as e:
|
45 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
46 |
+
default_value = None
|
47 |
with gr.Tab("Infinite Bench", visible=True):
|
48 |
gr.Markdown("# Infinite Bench Evaluation (Coming Soon)")
|
49 |
with gr.Row():
|
50 |
with gr.Column():
|
51 |
+
# Refactored API selection dropdown
|
52 |
api_name_input = gr.Dropdown(
|
53 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
54 |
+
value=default_value,
|
55 |
+
label="API for Summarization (Optional)"
|
56 |
)
|
57 |
api_key_input = gr.Textbox(label="API Key (if required)", type="password")
|
58 |
evaluate_button = gr.Button("Evaluate Summary")
|
App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py
CHANGED
@@ -7,7 +7,7 @@ import logging
|
|
7 |
# External Imports
|
8 |
import gradio as gr
|
9 |
|
10 |
-
from App_Function_Libraries.DB.DB_Manager import
|
11 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
|
12 |
#
|
13 |
# Local Imports
|
@@ -17,6 +17,9 @@ from App_Function_Libraries.Summarization.Local_Summarization_Lib import summari
|
|
17 |
from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
|
18 |
summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
|
19 |
summarize_with_huggingface
|
|
|
|
|
|
|
20 |
#
|
21 |
#
|
22 |
############################################################################################################
|
@@ -24,32 +27,62 @@ from App_Function_Libraries.Summarization.Summarization_General_Lib import summa
|
|
24 |
# Functions:
|
25 |
|
26 |
def create_summarize_explain_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
with gr.TabItem("Analyze Text", visible=True):
|
28 |
gr.Markdown("# Analyze / Explain / Summarize Text without ingesting it into the DB")
|
|
|
|
|
|
|
|
|
|
|
29 |
with gr.Row():
|
30 |
with gr.Column():
|
31 |
with gr.Row():
|
32 |
-
text_to_work_input = gr.Textbox(
|
33 |
-
|
34 |
-
|
|
|
|
|
35 |
with gr.Row():
|
36 |
explanation_checkbox = gr.Checkbox(label="Explain Text", value=True)
|
37 |
summarization_checkbox = gr.Checkbox(label="Summarize Text", value=True)
|
38 |
-
custom_prompt_checkbox = gr.Checkbox(
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
44 |
with gr.Row():
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
with gr.Row():
|
49 |
-
custom_prompt_input = gr.Textbox(
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
53 |
with gr.Row():
|
54 |
system_prompt_input = gr.Textbox(label="System Prompt",
|
55 |
value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
@@ -69,19 +102,21 @@ def create_summarize_explain_tab():
|
|
69 |
- Ensure adherence to specified format
|
70 |
- Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
|
71 |
""",
|
72 |
-
lines=
|
73 |
visible=False,
|
74 |
interactive=True)
|
|
|
75 |
api_endpoint = gr.Dropdown(
|
76 |
-
choices=[None
|
77 |
-
|
78 |
-
|
79 |
-
value=None,
|
80 |
-
label="API to be used for request (Mandatory)"
|
81 |
)
|
82 |
with gr.Row():
|
83 |
-
api_key_input = gr.Textbox(
|
84 |
-
|
|
|
|
|
|
|
85 |
with gr.Row():
|
86 |
explain_summarize_button = gr.Button("Explain/Summarize")
|
87 |
|
@@ -90,17 +125,83 @@ def create_summarize_explain_tab():
|
|
90 |
explanation_output = gr.Textbox(label="Explanation:", lines=20)
|
91 |
custom_prompt_output = gr.Textbox(label="Custom Prompt:", lines=20, visible=True)
|
92 |
|
|
|
93 |
custom_prompt_checkbox.change(
|
94 |
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
95 |
inputs=[custom_prompt_checkbox],
|
96 |
outputs=[custom_prompt_input, system_prompt_input]
|
97 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
preset_prompt_checkbox.change(
|
99 |
-
fn=
|
100 |
inputs=[preset_prompt_checkbox],
|
101 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
)
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
def update_prompts(preset_name):
|
105 |
prompts = update_user_prompt(preset_name)
|
106 |
return (
|
@@ -109,18 +210,27 @@ def create_summarize_explain_tab():
|
|
109 |
)
|
110 |
|
111 |
preset_prompt.change(
|
112 |
-
update_prompts,
|
113 |
-
inputs=preset_prompt,
|
114 |
outputs=[custom_prompt_input, system_prompt_input]
|
115 |
)
|
116 |
|
117 |
explain_summarize_button.click(
|
118 |
fn=summarize_explain_text,
|
119 |
-
inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
outputs=[summarization_output, explanation_output, custom_prompt_output]
|
121 |
)
|
122 |
|
123 |
|
|
|
124 |
def summarize_explain_text(message, api_endpoint, api_key, summarization, explanation, custom_prompt, custom_system_prompt,):
|
125 |
global custom_prompt_output
|
126 |
summarization_response = None
|
|
|
7 |
# External Imports
|
8 |
import gradio as gr
|
9 |
|
10 |
+
from App_Function_Libraries.DB.DB_Manager import list_prompts
|
11 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
|
12 |
#
|
13 |
# Local Imports
|
|
|
17 |
from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
|
18 |
summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
|
19 |
summarize_with_huggingface
|
20 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
21 |
+
|
22 |
+
|
23 |
#
|
24 |
#
|
25 |
############################################################################################################
|
|
|
27 |
# Functions:
|
28 |
|
29 |
def create_summarize_explain_tab():
|
30 |
+
try:
|
31 |
+
default_value = None
|
32 |
+
if default_api_endpoint:
|
33 |
+
if default_api_endpoint in global_api_endpoints:
|
34 |
+
default_value = format_api_name(default_api_endpoint)
|
35 |
+
else:
|
36 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
37 |
+
except Exception as e:
|
38 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
39 |
+
default_value = None
|
40 |
+
|
41 |
with gr.TabItem("Analyze Text", visible=True):
|
42 |
gr.Markdown("# Analyze / Explain / Summarize Text without ingesting it into the DB")
|
43 |
+
|
44 |
+
# Initialize state variables for pagination
|
45 |
+
current_page_state = gr.State(value=1)
|
46 |
+
total_pages_state = gr.State(value=1)
|
47 |
+
|
48 |
with gr.Row():
|
49 |
with gr.Column():
|
50 |
with gr.Row():
|
51 |
+
text_to_work_input = gr.Textbox(
|
52 |
+
label="Text to be Explained or Summarized",
|
53 |
+
placeholder="Enter the text you want explained or summarized here",
|
54 |
+
lines=20
|
55 |
+
)
|
56 |
with gr.Row():
|
57 |
explanation_checkbox = gr.Checkbox(label="Explain Text", value=True)
|
58 |
summarization_checkbox = gr.Checkbox(label="Summarize Text", value=True)
|
59 |
+
custom_prompt_checkbox = gr.Checkbox(
|
60 |
+
label="Use a Custom Prompt",
|
61 |
+
value=False,
|
62 |
+
visible=True
|
63 |
+
)
|
64 |
+
preset_prompt_checkbox = gr.Checkbox(
|
65 |
+
label="Use a pre-set Prompt",
|
66 |
+
value=False,
|
67 |
+
visible=True
|
68 |
+
)
|
69 |
with gr.Row():
|
70 |
+
# Add pagination controls
|
71 |
+
preset_prompt = gr.Dropdown(
|
72 |
+
label="Select Preset Prompt",
|
73 |
+
choices=[],
|
74 |
+
visible=False
|
75 |
+
)
|
76 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
77 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
78 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
79 |
with gr.Row():
|
80 |
+
custom_prompt_input = gr.Textbox(
|
81 |
+
label="Custom Prompt",
|
82 |
+
placeholder="Enter custom prompt here",
|
83 |
+
lines=10,
|
84 |
+
visible=False
|
85 |
+
)
|
86 |
with gr.Row():
|
87 |
system_prompt_input = gr.Textbox(label="System Prompt",
|
88 |
value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
|
|
102 |
- Ensure adherence to specified format
|
103 |
- Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
|
104 |
""",
|
105 |
+
lines=10,
|
106 |
visible=False,
|
107 |
interactive=True)
|
108 |
+
# Refactored API selection dropdown
|
109 |
api_endpoint = gr.Dropdown(
|
110 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
111 |
+
value=default_value,
|
112 |
+
label="API for Summarization/Analysis (Optional)"
|
|
|
|
|
113 |
)
|
114 |
with gr.Row():
|
115 |
+
api_key_input = gr.Textbox(
|
116 |
+
label="API Key (if required)",
|
117 |
+
placeholder="Enter your API key here",
|
118 |
+
type="password"
|
119 |
+
)
|
120 |
with gr.Row():
|
121 |
explain_summarize_button = gr.Button("Explain/Summarize")
|
122 |
|
|
|
125 |
explanation_output = gr.Textbox(label="Explanation:", lines=20)
|
126 |
custom_prompt_output = gr.Textbox(label="Custom Prompt:", lines=20, visible=True)
|
127 |
|
128 |
+
# Handle custom prompt checkbox change
|
129 |
custom_prompt_checkbox.change(
|
130 |
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
131 |
inputs=[custom_prompt_checkbox],
|
132 |
outputs=[custom_prompt_input, system_prompt_input]
|
133 |
)
|
134 |
+
|
135 |
+
# Handle preset prompt checkbox change
|
136 |
+
def on_preset_prompt_checkbox_change(is_checked):
|
137 |
+
if is_checked:
|
138 |
+
prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
|
139 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
140 |
+
return (
|
141 |
+
gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
|
142 |
+
gr.update(visible=True), # prev_page_button
|
143 |
+
gr.update(visible=True), # next_page_button
|
144 |
+
gr.update(value=page_display_text, visible=True), # page_display
|
145 |
+
current_page, # current_page_state
|
146 |
+
total_pages # total_pages_state
|
147 |
+
)
|
148 |
+
else:
|
149 |
+
return (
|
150 |
+
gr.update(visible=False, interactive=False), # preset_prompt
|
151 |
+
gr.update(visible=False), # prev_page_button
|
152 |
+
gr.update(visible=False), # next_page_button
|
153 |
+
gr.update(visible=False), # page_display
|
154 |
+
1, # current_page_state
|
155 |
+
1 # total_pages_state
|
156 |
+
)
|
157 |
+
|
158 |
preset_prompt_checkbox.change(
|
159 |
+
fn=on_preset_prompt_checkbox_change,
|
160 |
inputs=[preset_prompt_checkbox],
|
161 |
+
outputs=[
|
162 |
+
preset_prompt,
|
163 |
+
prev_page_button,
|
164 |
+
next_page_button,
|
165 |
+
page_display,
|
166 |
+
current_page_state,
|
167 |
+
total_pages_state
|
168 |
+
]
|
169 |
+
)
|
170 |
+
|
171 |
+
# Pagination button functions
|
172 |
+
def on_prev_page_click(current_page, total_pages):
|
173 |
+
new_page = max(current_page - 1, 1)
|
174 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
175 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
176 |
+
return (
|
177 |
+
gr.update(choices=prompts),
|
178 |
+
gr.update(value=page_display_text),
|
179 |
+
current_page
|
180 |
+
)
|
181 |
+
|
182 |
+
prev_page_button.click(
|
183 |
+
fn=on_prev_page_click,
|
184 |
+
inputs=[current_page_state, total_pages_state],
|
185 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
186 |
)
|
187 |
|
188 |
+
def on_next_page_click(current_page, total_pages):
|
189 |
+
new_page = min(current_page + 1, total_pages)
|
190 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
191 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
192 |
+
return (
|
193 |
+
gr.update(choices=prompts),
|
194 |
+
gr.update(value=page_display_text),
|
195 |
+
current_page
|
196 |
+
)
|
197 |
+
|
198 |
+
next_page_button.click(
|
199 |
+
fn=on_next_page_click,
|
200 |
+
inputs=[current_page_state, total_pages_state],
|
201 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
202 |
+
)
|
203 |
+
|
204 |
+
# Update prompts when a preset is selected
|
205 |
def update_prompts(preset_name):
|
206 |
prompts = update_user_prompt(preset_name)
|
207 |
return (
|
|
|
210 |
)
|
211 |
|
212 |
preset_prompt.change(
|
213 |
+
fn=update_prompts,
|
214 |
+
inputs=[preset_prompt],
|
215 |
outputs=[custom_prompt_input, system_prompt_input]
|
216 |
)
|
217 |
|
218 |
explain_summarize_button.click(
|
219 |
fn=summarize_explain_text,
|
220 |
+
inputs=[
|
221 |
+
text_to_work_input,
|
222 |
+
api_endpoint,
|
223 |
+
api_key_input,
|
224 |
+
summarization_checkbox,
|
225 |
+
explanation_checkbox,
|
226 |
+
custom_prompt_input,
|
227 |
+
system_prompt_input
|
228 |
+
],
|
229 |
outputs=[summarization_output, explanation_output, custom_prompt_output]
|
230 |
)
|
231 |
|
232 |
|
233 |
+
|
234 |
def summarize_explain_text(message, api_endpoint, api_key, summarization, explanation, custom_prompt, custom_system_prompt,):
|
235 |
global custom_prompt_output
|
236 |
summarization_response = None
|
App_Function_Libraries/Gradio_UI/Export_Functionality.py
CHANGED
@@ -6,9 +6,11 @@ import math
|
|
6 |
import logging
|
7 |
import shutil
|
8 |
import tempfile
|
9 |
-
from typing import List, Dict, Optional, Tuple
|
10 |
import gradio as gr
|
11 |
-
from App_Function_Libraries.DB.DB_Manager import DatabaseError
|
|
|
|
|
12 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, browse_items
|
13 |
|
14 |
logger = logging.getLogger(__name__)
|
@@ -36,7 +38,7 @@ def export_items_by_keyword(keyword: str) -> str:
|
|
36 |
items = fetch_items_by_keyword(keyword)
|
37 |
if not items:
|
38 |
logger.warning(f"No items found for keyword: {keyword}")
|
39 |
-
return
|
40 |
|
41 |
# Create a temporary directory to store individual markdown files
|
42 |
with tempfile.TemporaryDirectory() as temp_dir:
|
@@ -66,7 +68,7 @@ def export_items_by_keyword(keyword: str) -> str:
|
|
66 |
return final_zip_path
|
67 |
except Exception as e:
|
68 |
logger.error(f"Error exporting items for keyword '{keyword}': {str(e)}")
|
69 |
-
return
|
70 |
|
71 |
|
72 |
def export_selected_items(selected_items: List[Dict]) -> Tuple[Optional[str], str]:
|
@@ -146,121 +148,747 @@ def display_search_results_export_tab(search_query: str, search_type: str, page:
|
|
146 |
logger.error(error_message)
|
147 |
return [], error_message, 1, 1
|
148 |
|
|
|
|
|
|
|
149 |
|
150 |
-
def create_export_tab():
|
151 |
-
with gr.Tab("Search and Export"):
|
152 |
-
with gr.Row():
|
153 |
-
with gr.Column():
|
154 |
-
gr.Markdown("# Search and Export Items")
|
155 |
-
gr.Markdown("Search for items and export them as markdown files")
|
156 |
-
gr.Markdown("You can also export items by keyword")
|
157 |
-
search_query = gr.Textbox(label="Search Query")
|
158 |
-
search_type = gr.Radio(["Title", "URL", "Keyword", "Content"], label="Search By")
|
159 |
-
search_button = gr.Button("Search")
|
160 |
-
|
161 |
-
with gr.Column():
|
162 |
-
prev_button = gr.Button("Previous Page")
|
163 |
-
next_button = gr.Button("Next Page")
|
164 |
-
|
165 |
-
current_page = gr.State(1)
|
166 |
-
total_pages = gr.State(1)
|
167 |
-
|
168 |
-
search_results = gr.CheckboxGroup(label="Search Results", choices=[])
|
169 |
-
export_selected_button = gr.Button("Export Selected Items")
|
170 |
-
|
171 |
-
keyword_input = gr.Textbox(label="Enter keyword for export")
|
172 |
-
export_by_keyword_button = gr.Button("Export items by keyword")
|
173 |
-
|
174 |
-
export_output = gr.File(label="Download Exported File")
|
175 |
-
error_output = gr.Textbox(label="Status/Error Messages", interactive=False)
|
176 |
-
|
177 |
-
def search_and_update(query, search_type, page):
|
178 |
-
results, message, current, total = display_search_results_export_tab(query, search_type, page)
|
179 |
-
logger.debug(f"search_and_update results: {results}")
|
180 |
-
return results, message, current, total, gr.update(choices=results)
|
181 |
-
|
182 |
-
search_button.click(
|
183 |
-
fn=search_and_update,
|
184 |
-
inputs=[search_query, search_type, current_page],
|
185 |
-
outputs=[search_results, error_output, current_page, total_pages, search_results],
|
186 |
-
show_progress="full"
|
187 |
-
)
|
188 |
-
|
189 |
-
|
190 |
-
def update_page(current, total, direction):
|
191 |
-
new_page = max(1, min(total, current + direction))
|
192 |
-
return new_page
|
193 |
-
|
194 |
-
prev_button.click(
|
195 |
-
fn=update_page,
|
196 |
-
inputs=[current_page, total_pages, gr.State(-1)],
|
197 |
-
outputs=[current_page]
|
198 |
-
).then(
|
199 |
-
fn=search_and_update,
|
200 |
-
inputs=[search_query, search_type, current_page],
|
201 |
-
outputs=[search_results, error_output, current_page, total_pages],
|
202 |
-
show_progress=True
|
203 |
-
)
|
204 |
-
|
205 |
-
next_button.click(
|
206 |
-
fn=update_page,
|
207 |
-
inputs=[current_page, total_pages, gr.State(1)],
|
208 |
-
outputs=[current_page]
|
209 |
-
).then(
|
210 |
-
fn=search_and_update,
|
211 |
-
inputs=[search_query, search_type, current_page],
|
212 |
-
outputs=[search_results, error_output, current_page, total_pages],
|
213 |
-
show_progress=True
|
214 |
-
)
|
215 |
-
|
216 |
-
def handle_export_selected(selected_items):
|
217 |
-
logger.debug(f"Exporting selected items: {selected_items}")
|
218 |
-
return export_selected_items(selected_items)
|
219 |
-
|
220 |
-
export_selected_button.click(
|
221 |
-
fn=handle_export_selected,
|
222 |
-
inputs=[search_results],
|
223 |
-
outputs=[export_output, error_output],
|
224 |
-
show_progress="full"
|
225 |
-
)
|
226 |
-
|
227 |
-
export_by_keyword_button.click(
|
228 |
-
fn=export_items_by_keyword,
|
229 |
-
inputs=[keyword_input],
|
230 |
-
outputs=[export_output, error_output],
|
231 |
-
show_progress="full"
|
232 |
-
)
|
233 |
-
|
234 |
-
def handle_item_selection(selected_items):
|
235 |
-
logger.debug(f"Selected items: {selected_items}")
|
236 |
-
if not selected_items:
|
237 |
-
return None, "No item selected"
|
238 |
-
|
239 |
-
try:
|
240 |
-
# Assuming selected_items is a list of dictionaries
|
241 |
-
selected_item = selected_items[0]
|
242 |
-
logger.debug(f"First selected item: {selected_item}")
|
243 |
-
|
244 |
-
# Check if 'value' is a string (JSON) or already a dictionary
|
245 |
-
if isinstance(selected_item['value'], str):
|
246 |
-
item_data = json.loads(selected_item['value'])
|
247 |
-
else:
|
248 |
-
item_data = selected_item['value']
|
249 |
-
|
250 |
-
logger.debug(f"Item data: {item_data}")
|
251 |
-
|
252 |
-
item_id = item_data['id']
|
253 |
-
return export_item_as_markdown(item_id)
|
254 |
-
except Exception as e:
|
255 |
-
error_message = f"Error processing selected item: {str(e)}"
|
256 |
-
logger.error(error_message)
|
257 |
-
return None, error_message
|
258 |
-
|
259 |
-
search_results.select(
|
260 |
-
fn=handle_item_selection,
|
261 |
-
inputs=[search_results],
|
262 |
-
outputs=[export_output, error_output],
|
263 |
-
show_progress="full"
|
264 |
-
)
|
265 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
|
|
6 |
import logging
|
7 |
import shutil
|
8 |
import tempfile
|
9 |
+
from typing import List, Dict, Optional, Tuple, Any
|
10 |
import gradio as gr
|
11 |
+
from App_Function_Libraries.DB.DB_Manager import DatabaseError, fetch_all_notes, fetch_all_conversations, \
|
12 |
+
get_keywords_for_note, fetch_notes_by_ids, fetch_conversations_by_ids
|
13 |
+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_keywords_for_conversation
|
14 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, browse_items
|
15 |
|
16 |
logger = logging.getLogger(__name__)
|
|
|
38 |
items = fetch_items_by_keyword(keyword)
|
39 |
if not items:
|
40 |
logger.warning(f"No items found for keyword: {keyword}")
|
41 |
+
return f"No items found for keyword: {keyword}"
|
42 |
|
43 |
# Create a temporary directory to store individual markdown files
|
44 |
with tempfile.TemporaryDirectory() as temp_dir:
|
|
|
68 |
return final_zip_path
|
69 |
except Exception as e:
|
70 |
logger.error(f"Error exporting items for keyword '{keyword}': {str(e)}")
|
71 |
+
return f"Error exporting items for keyword '{keyword}': {str(e)}"
|
72 |
|
73 |
|
74 |
def export_selected_items(selected_items: List[Dict]) -> Tuple[Optional[str], str]:
|
|
|
148 |
logger.error(error_message)
|
149 |
return [], error_message, 1, 1
|
150 |
|
151 |
+
#
|
152 |
+
# End of Media DB Export functionality
|
153 |
+
################################################################
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
+
################################################################
|
157 |
+
#
|
158 |
+
# Functions for RAG Chat DB Export functionality
|
159 |
+
|
160 |
+
|
161 |
+
def export_rag_conversations_as_json(
|
162 |
+
selected_conversations: Optional[List[Dict[str, Any]]] = None
|
163 |
+
) -> Tuple[Optional[str], str]:
|
164 |
+
"""
|
165 |
+
Export conversations to a JSON file.
|
166 |
+
|
167 |
+
Args:
|
168 |
+
selected_conversations: Optional list of conversation dictionaries
|
169 |
+
|
170 |
+
Returns:
|
171 |
+
Tuple of (filename or None, status message)
|
172 |
+
"""
|
173 |
+
try:
|
174 |
+
if selected_conversations:
|
175 |
+
# Extract conversation IDs from selected items
|
176 |
+
conversation_ids = []
|
177 |
+
for item in selected_conversations:
|
178 |
+
if isinstance(item, str):
|
179 |
+
item_data = json.loads(item)
|
180 |
+
elif isinstance(item, dict) and 'value' in item:
|
181 |
+
item_data = item['value'] if isinstance(item['value'], dict) else json.loads(item['value'])
|
182 |
+
else:
|
183 |
+
item_data = item
|
184 |
+
conversation_ids.append(item_data['conversation_id'])
|
185 |
+
|
186 |
+
conversations = fetch_conversations_by_ids(conversation_ids)
|
187 |
+
else:
|
188 |
+
conversations = fetch_all_conversations()
|
189 |
+
|
190 |
+
export_data = []
|
191 |
+
for conversation_id, title, messages in conversations:
|
192 |
+
# Get keywords for the conversation
|
193 |
+
keywords = get_keywords_for_conversation(conversation_id)
|
194 |
+
|
195 |
+
conversation_data = {
|
196 |
+
"conversation_id": conversation_id,
|
197 |
+
"title": title,
|
198 |
+
"keywords": keywords,
|
199 |
+
"messages": [
|
200 |
+
{"role": role, "content": content}
|
201 |
+
for role, content in messages
|
202 |
+
]
|
203 |
+
}
|
204 |
+
export_data.append(conversation_data)
|
205 |
+
|
206 |
+
filename = "rag_conversations_export.json"
|
207 |
+
with open(filename, "w", encoding='utf-8') as f:
|
208 |
+
json.dump(export_data, f, indent=2, ensure_ascii=False)
|
209 |
+
|
210 |
+
logger.info(f"Successfully exported {len(export_data)} conversations to {filename}")
|
211 |
+
return filename, f"Successfully exported {len(export_data)} conversations to {filename}"
|
212 |
+
except Exception as e:
|
213 |
+
error_message = f"Error exporting conversations: {str(e)}"
|
214 |
+
logger.error(error_message)
|
215 |
+
return None, error_message
|
216 |
+
|
217 |
+
|
218 |
+
def export_rag_notes_as_json(
|
219 |
+
selected_notes: Optional[List[Dict[str, Any]]] = None
|
220 |
+
) -> Tuple[Optional[str], str]:
|
221 |
+
"""
|
222 |
+
Export notes to a JSON file.
|
223 |
+
|
224 |
+
Args:
|
225 |
+
selected_notes: Optional list of note dictionaries
|
226 |
+
|
227 |
+
Returns:
|
228 |
+
Tuple of (filename or None, status message)
|
229 |
+
"""
|
230 |
+
try:
|
231 |
+
if selected_notes:
|
232 |
+
# Extract note IDs from selected items
|
233 |
+
note_ids = []
|
234 |
+
for item in selected_notes:
|
235 |
+
if isinstance(item, str):
|
236 |
+
item_data = json.loads(item)
|
237 |
+
elif isinstance(item, dict) and 'value' in item:
|
238 |
+
item_data = item['value'] if isinstance(item['value'], dict) else json.loads(item['value'])
|
239 |
+
else:
|
240 |
+
item_data = item
|
241 |
+
note_ids.append(item_data['id'])
|
242 |
+
|
243 |
+
notes = fetch_notes_by_ids(note_ids)
|
244 |
+
else:
|
245 |
+
notes = fetch_all_notes()
|
246 |
+
|
247 |
+
export_data = []
|
248 |
+
for note_id, title, content in notes:
|
249 |
+
# Get keywords for the note
|
250 |
+
keywords = get_keywords_for_note(note_id)
|
251 |
+
|
252 |
+
note_data = {
|
253 |
+
"note_id": note_id,
|
254 |
+
"title": title,
|
255 |
+
"content": content,
|
256 |
+
"keywords": keywords
|
257 |
+
}
|
258 |
+
export_data.append(note_data)
|
259 |
+
|
260 |
+
filename = "rag_notes_export.json"
|
261 |
+
with open(filename, "w", encoding='utf-8') as f:
|
262 |
+
json.dump(export_data, f, indent=2, ensure_ascii=False)
|
263 |
+
|
264 |
+
logger.info(f"Successfully exported {len(export_data)} notes to {filename}")
|
265 |
+
return filename, f"Successfully exported {len(export_data)} notes to {filename}"
|
266 |
+
except Exception as e:
|
267 |
+
error_message = f"Error exporting notes: {str(e)}"
|
268 |
+
logger.error(error_message)
|
269 |
+
return None, error_message
|
270 |
+
|
271 |
+
|
272 |
+
def display_rag_conversations(search_query: str = "", page: int = 1, items_per_page: int = 10):
|
273 |
+
"""Display conversations for selection in the export tab."""
|
274 |
+
try:
|
275 |
+
conversations = fetch_all_conversations()
|
276 |
+
|
277 |
+
if search_query:
|
278 |
+
# Simple search implementation - can be enhanced based on needs
|
279 |
+
conversations = [
|
280 |
+
conv for conv in conversations
|
281 |
+
if search_query.lower() in conv[1].lower() # Search in title
|
282 |
+
]
|
283 |
+
|
284 |
+
# Implement pagination
|
285 |
+
start_idx = (page - 1) * items_per_page
|
286 |
+
end_idx = start_idx + items_per_page
|
287 |
+
paginated_conversations = conversations[start_idx:end_idx]
|
288 |
+
total_pages = (len(conversations) + items_per_page - 1) // items_per_page
|
289 |
+
|
290 |
+
# Format for checkbox group
|
291 |
+
checkbox_data = [
|
292 |
+
{
|
293 |
+
"name": f"Title: {title}\nMessages: {len(messages)}",
|
294 |
+
"value": {"conversation_id": conv_id, "title": title}
|
295 |
+
}
|
296 |
+
for conv_id, title, messages in paginated_conversations
|
297 |
+
]
|
298 |
+
|
299 |
+
return (
|
300 |
+
checkbox_data,
|
301 |
+
f"Found {len(conversations)} conversations (showing page {page} of {total_pages})",
|
302 |
+
page,
|
303 |
+
total_pages
|
304 |
+
)
|
305 |
+
except Exception as e:
|
306 |
+
error_message = f"Error displaying conversations: {str(e)}"
|
307 |
+
logger.error(error_message)
|
308 |
+
return [], error_message, 1, 1
|
309 |
+
|
310 |
+
|
311 |
+
def display_rag_notes(search_query: str = "", page: int = 1, items_per_page: int = 10):
|
312 |
+
"""Display notes for selection in the export tab."""
|
313 |
+
try:
|
314 |
+
notes = fetch_all_notes()
|
315 |
+
|
316 |
+
if search_query:
|
317 |
+
# Simple search implementation - can be enhanced based on needs
|
318 |
+
notes = [
|
319 |
+
note for note in notes
|
320 |
+
if search_query.lower() in note[1].lower() # Search in title
|
321 |
+
or search_query.lower() in note[2].lower() # Search in content
|
322 |
+
]
|
323 |
+
|
324 |
+
# Implement pagination
|
325 |
+
start_idx = (page - 1) * items_per_page
|
326 |
+
end_idx = start_idx + items_per_page
|
327 |
+
paginated_notes = notes[start_idx:end_idx]
|
328 |
+
total_pages = (len(notes) + items_per_page - 1) // items_per_page
|
329 |
+
|
330 |
+
# Format for checkbox group
|
331 |
+
checkbox_data = [
|
332 |
+
{
|
333 |
+
"name": f"Title: {title}\nContent preview: {content[:100]}...",
|
334 |
+
"value": {"id": note_id, "title": title}
|
335 |
+
}
|
336 |
+
for note_id, title, content in paginated_notes
|
337 |
+
]
|
338 |
+
|
339 |
+
return (
|
340 |
+
checkbox_data,
|
341 |
+
f"Found {len(notes)} notes (showing page {page} of {total_pages})",
|
342 |
+
page,
|
343 |
+
total_pages
|
344 |
+
)
|
345 |
+
except Exception as e:
|
346 |
+
error_message = f"Error displaying notes: {str(e)}"
|
347 |
+
logger.error(error_message)
|
348 |
+
return [], error_message, 1, 1
|
349 |
+
|
350 |
+
|
351 |
+
def create_rag_export_tab():
|
352 |
+
"""Create the RAG QA Chat export tab interface."""
|
353 |
+
with gr.Tab("RAG QA Chat Export"):
|
354 |
+
with gr.Tabs():
|
355 |
+
# Conversations Export Tab
|
356 |
+
with gr.Tab("Export Conversations"):
|
357 |
+
with gr.Row():
|
358 |
+
with gr.Column():
|
359 |
+
gr.Markdown("## Export RAG QA Chat Conversations")
|
360 |
+
conversation_search = gr.Textbox(label="Search Conversations")
|
361 |
+
conversation_search_button = gr.Button("Search")
|
362 |
+
|
363 |
+
with gr.Column():
|
364 |
+
conversation_prev_button = gr.Button("Previous Page")
|
365 |
+
conversation_next_button = gr.Button("Next Page")
|
366 |
+
|
367 |
+
conversation_current_page = gr.State(1)
|
368 |
+
conversation_total_pages = gr.State(1)
|
369 |
+
|
370 |
+
conversation_results = gr.CheckboxGroup(label="Select Conversations to Export")
|
371 |
+
export_selected_conversations_button = gr.Button("Export Selected Conversations")
|
372 |
+
export_all_conversations_button = gr.Button("Export All Conversations")
|
373 |
+
|
374 |
+
conversation_export_output = gr.File(label="Download Exported Conversations")
|
375 |
+
conversation_status = gr.Textbox(label="Status", interactive=False)
|
376 |
+
|
377 |
+
# Notes Export Tab
|
378 |
+
with gr.Tab("Export Notes"):
|
379 |
+
with gr.Row():
|
380 |
+
with gr.Column():
|
381 |
+
gr.Markdown("## Export RAG QA Chat Notes")
|
382 |
+
notes_search = gr.Textbox(label="Search Notes")
|
383 |
+
notes_search_button = gr.Button("Search")
|
384 |
+
|
385 |
+
with gr.Column():
|
386 |
+
notes_prev_button = gr.Button("Previous Page")
|
387 |
+
notes_next_button = gr.Button("Next Page")
|
388 |
+
|
389 |
+
notes_current_page = gr.State(1)
|
390 |
+
notes_total_pages = gr.State(1)
|
391 |
+
|
392 |
+
notes_results = gr.CheckboxGroup(label="Select Notes to Export")
|
393 |
+
export_selected_notes_button = gr.Button("Export Selected Notes")
|
394 |
+
export_all_notes_button = gr.Button("Export All Notes")
|
395 |
+
|
396 |
+
notes_export_output = gr.File(label="Download Exported Notes")
|
397 |
+
notes_status = gr.Textbox(label="Status", interactive=False)
|
398 |
+
|
399 |
+
# Event handlers for conversations
|
400 |
+
def search_conversations(query, page):
|
401 |
+
return display_rag_conversations(query, page)
|
402 |
+
|
403 |
+
conversation_search_button.click(
|
404 |
+
fn=search_conversations,
|
405 |
+
inputs=[conversation_search, conversation_current_page],
|
406 |
+
outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
|
407 |
+
)
|
408 |
+
|
409 |
+
def update_conversation_page(current, total, direction):
|
410 |
+
new_page = max(1, min(total, current + direction))
|
411 |
+
return new_page
|
412 |
+
|
413 |
+
conversation_prev_button.click(
|
414 |
+
fn=update_conversation_page,
|
415 |
+
inputs=[conversation_current_page, conversation_total_pages, gr.State(-1)],
|
416 |
+
outputs=[conversation_current_page]
|
417 |
+
).then(
|
418 |
+
fn=search_conversations,
|
419 |
+
inputs=[conversation_search, conversation_current_page],
|
420 |
+
outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
|
421 |
+
)
|
422 |
+
|
423 |
+
conversation_next_button.click(
|
424 |
+
fn=update_conversation_page,
|
425 |
+
inputs=[conversation_current_page, conversation_total_pages, gr.State(1)],
|
426 |
+
outputs=[conversation_current_page]
|
427 |
+
).then(
|
428 |
+
fn=search_conversations,
|
429 |
+
inputs=[conversation_search, conversation_current_page],
|
430 |
+
outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
|
431 |
+
)
|
432 |
+
|
433 |
+
export_selected_conversations_button.click(
|
434 |
+
fn=export_rag_conversations_as_json,
|
435 |
+
inputs=[conversation_results],
|
436 |
+
outputs=[conversation_export_output, conversation_status]
|
437 |
+
)
|
438 |
+
|
439 |
+
export_all_conversations_button.click(
|
440 |
+
fn=lambda: export_rag_conversations_as_json(),
|
441 |
+
outputs=[conversation_export_output, conversation_status]
|
442 |
+
)
|
443 |
+
|
444 |
+
# Event handlers for notes
|
445 |
+
def search_notes(query, page):
|
446 |
+
return display_rag_notes(query, page)
|
447 |
+
|
448 |
+
notes_search_button.click(
|
449 |
+
fn=search_notes,
|
450 |
+
inputs=[notes_search, notes_current_page],
|
451 |
+
outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
|
452 |
+
)
|
453 |
+
|
454 |
+
def update_notes_page(current, total, direction):
|
455 |
+
new_page = max(1, min(total, current + direction))
|
456 |
+
return new_page
|
457 |
+
|
458 |
+
notes_prev_button.click(
|
459 |
+
fn=update_notes_page,
|
460 |
+
inputs=[notes_current_page, notes_total_pages, gr.State(-1)],
|
461 |
+
outputs=[notes_current_page]
|
462 |
+
).then(
|
463 |
+
fn=search_notes,
|
464 |
+
inputs=[notes_search, notes_current_page],
|
465 |
+
outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
|
466 |
+
)
|
467 |
+
|
468 |
+
notes_next_button.click(
|
469 |
+
fn=update_notes_page,
|
470 |
+
inputs=[notes_current_page, notes_total_pages, gr.State(1)],
|
471 |
+
outputs=[notes_current_page]
|
472 |
+
).then(
|
473 |
+
fn=search_notes,
|
474 |
+
inputs=[notes_search, notes_current_page],
|
475 |
+
outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
|
476 |
+
)
|
477 |
+
|
478 |
+
export_selected_notes_button.click(
|
479 |
+
fn=export_rag_notes_as_json,
|
480 |
+
inputs=[notes_results],
|
481 |
+
outputs=[notes_export_output, notes_status]
|
482 |
+
)
|
483 |
+
|
484 |
+
export_all_notes_button.click(
|
485 |
+
fn=lambda: export_rag_notes_as_json(),
|
486 |
+
outputs=[notes_export_output, notes_status]
|
487 |
+
)
|
488 |
+
|
489 |
+
#
|
490 |
+
# End of RAG Chat DB Export functionality
|
491 |
+
#####################################################
|
492 |
+
|
493 |
+
def create_export_tabs():
|
494 |
+
"""Create the unified export interface with all export tabs."""
|
495 |
+
with gr.Tabs():
|
496 |
+
# Media DB Export Tab
|
497 |
+
with gr.Tab("Media DB Export"):
|
498 |
+
with gr.Row():
|
499 |
+
with gr.Column():
|
500 |
+
gr.Markdown("# Search and Export Items")
|
501 |
+
gr.Markdown("Search for items and export them as markdown files")
|
502 |
+
gr.Markdown("You can also export items by keyword")
|
503 |
+
search_query = gr.Textbox(label="Search Query")
|
504 |
+
search_type = gr.Radio(["Title", "URL", "Keyword", "Content"], label="Search By")
|
505 |
+
search_button = gr.Button("Search")
|
506 |
+
|
507 |
+
with gr.Column():
|
508 |
+
prev_button = gr.Button("Previous Page")
|
509 |
+
next_button = gr.Button("Next Page")
|
510 |
+
|
511 |
+
current_page = gr.State(1)
|
512 |
+
total_pages = gr.State(1)
|
513 |
+
|
514 |
+
search_results = gr.CheckboxGroup(label="Search Results", choices=[])
|
515 |
+
export_selected_button = gr.Button("Export Selected Items")
|
516 |
+
|
517 |
+
keyword_input = gr.Textbox(label="Enter keyword for export")
|
518 |
+
export_by_keyword_button = gr.Button("Export items by keyword")
|
519 |
+
|
520 |
+
export_output = gr.File(label="Download Exported File")
|
521 |
+
error_output = gr.Textbox(label="Status/Error Messages", interactive=False)
|
522 |
+
|
523 |
+
# Conversations Export Tab
|
524 |
+
with gr.Tab("RAG Conversations Export"):
|
525 |
+
with gr.Row():
|
526 |
+
with gr.Column():
|
527 |
+
gr.Markdown("## Export RAG QA Chat Conversations")
|
528 |
+
conversation_search = gr.Textbox(label="Search Conversations")
|
529 |
+
conversation_search_button = gr.Button("Search")
|
530 |
+
|
531 |
+
with gr.Column():
|
532 |
+
conversation_prev_button = gr.Button("Previous Page")
|
533 |
+
conversation_next_button = gr.Button("Next Page")
|
534 |
+
|
535 |
+
conversation_current_page = gr.State(1)
|
536 |
+
conversation_total_pages = gr.State(1)
|
537 |
+
|
538 |
+
conversation_results = gr.CheckboxGroup(label="Select Conversations to Export")
|
539 |
+
export_selected_conversations_button = gr.Button("Export Selected Conversations")
|
540 |
+
export_all_conversations_button = gr.Button("Export All Conversations")
|
541 |
+
|
542 |
+
conversation_export_output = gr.File(label="Download Exported Conversations")
|
543 |
+
conversation_status = gr.Textbox(label="Status", interactive=False)
|
544 |
+
|
545 |
+
# Notes Export Tab
|
546 |
+
with gr.Tab("RAG Notes Export"):
|
547 |
+
with gr.Row():
|
548 |
+
with gr.Column():
|
549 |
+
gr.Markdown("## Export RAG QA Chat Notes")
|
550 |
+
notes_search = gr.Textbox(label="Search Notes")
|
551 |
+
notes_search_button = gr.Button("Search")
|
552 |
+
|
553 |
+
with gr.Column():
|
554 |
+
notes_prev_button = gr.Button("Previous Page")
|
555 |
+
notes_next_button = gr.Button("Next Page")
|
556 |
+
|
557 |
+
notes_current_page = gr.State(1)
|
558 |
+
notes_total_pages = gr.State(1)
|
559 |
+
|
560 |
+
notes_results = gr.CheckboxGroup(label="Select Notes to Export")
|
561 |
+
export_selected_notes_button = gr.Button("Export Selected Notes")
|
562 |
+
export_all_notes_button = gr.Button("Export All Notes")
|
563 |
+
|
564 |
+
notes_export_output = gr.File(label="Download Exported Notes")
|
565 |
+
notes_status = gr.Textbox(label="Status", interactive=False)
|
566 |
+
|
567 |
+
# Event handlers for media DB
|
568 |
+
def search_and_update(query, search_type, page):
|
569 |
+
results, message, current, total = display_search_results_export_tab(query, search_type, page)
|
570 |
+
logger.debug(f"search_and_update results: {results}")
|
571 |
+
return results, message, current, total, gr.update(choices=results)
|
572 |
+
|
573 |
+
def update_page(current, total, direction):
|
574 |
+
new_page = max(1, min(total, current + direction))
|
575 |
+
return new_page
|
576 |
+
|
577 |
+
def handle_export_selected(selected_items):
|
578 |
+
logger.debug(f"Exporting selected items: {selected_items}")
|
579 |
+
return export_selected_items(selected_items)
|
580 |
+
|
581 |
+
def handle_item_selection(selected_items):
|
582 |
+
logger.debug(f"Selected items: {selected_items}")
|
583 |
+
if not selected_items:
|
584 |
+
return None, "No item selected"
|
585 |
+
|
586 |
+
try:
|
587 |
+
selected_item = selected_items[0]
|
588 |
+
logger.debug(f"First selected item: {selected_item}")
|
589 |
+
|
590 |
+
if isinstance(selected_item['value'], str):
|
591 |
+
item_data = json.loads(selected_item['value'])
|
592 |
+
else:
|
593 |
+
item_data = selected_item['value']
|
594 |
+
|
595 |
+
logger.debug(f"Item data: {item_data}")
|
596 |
+
item_id = item_data['id']
|
597 |
+
return export_item_as_markdown(item_id)
|
598 |
+
except Exception as e:
|
599 |
+
error_message = f"Error processing selected item: {str(e)}"
|
600 |
+
logger.error(error_message)
|
601 |
+
return None, error_message
|
602 |
+
|
603 |
+
search_button.click(
|
604 |
+
fn=search_and_update,
|
605 |
+
inputs=[search_query, search_type, current_page],
|
606 |
+
outputs=[search_results, error_output, current_page, total_pages, search_results],
|
607 |
+
show_progress="full"
|
608 |
+
)
|
609 |
+
|
610 |
+
prev_button.click(
|
611 |
+
fn=update_page,
|
612 |
+
inputs=[current_page, total_pages, gr.State(-1)],
|
613 |
+
outputs=[current_page]
|
614 |
+
).then(
|
615 |
+
fn=search_and_update,
|
616 |
+
inputs=[search_query, search_type, current_page],
|
617 |
+
outputs=[search_results, error_output, current_page, total_pages],
|
618 |
+
show_progress=True
|
619 |
+
)
|
620 |
+
|
621 |
+
next_button.click(
|
622 |
+
fn=update_page,
|
623 |
+
inputs=[current_page, total_pages, gr.State(1)],
|
624 |
+
outputs=[current_page]
|
625 |
+
).then(
|
626 |
+
fn=search_and_update,
|
627 |
+
inputs=[search_query, search_type, current_page],
|
628 |
+
outputs=[search_results, error_output, current_page, total_pages],
|
629 |
+
show_progress=True
|
630 |
+
)
|
631 |
+
|
632 |
+
export_selected_button.click(
|
633 |
+
fn=handle_export_selected,
|
634 |
+
inputs=[search_results],
|
635 |
+
outputs=[export_output, error_output],
|
636 |
+
show_progress="full"
|
637 |
+
)
|
638 |
+
|
639 |
+
export_by_keyword_button.click(
|
640 |
+
fn=export_items_by_keyword,
|
641 |
+
inputs=[keyword_input],
|
642 |
+
outputs=[export_output, error_output],
|
643 |
+
show_progress="full"
|
644 |
+
)
|
645 |
+
|
646 |
+
search_results.select(
|
647 |
+
fn=handle_item_selection,
|
648 |
+
inputs=[search_results],
|
649 |
+
outputs=[export_output, error_output],
|
650 |
+
show_progress="full"
|
651 |
+
)
|
652 |
+
|
653 |
+
# Event handlers for conversations
|
654 |
+
def search_conversations(query, page):
|
655 |
+
return display_rag_conversations(query, page)
|
656 |
+
|
657 |
+
def update_conversation_page(current, total, direction):
|
658 |
+
new_page = max(1, min(total, current + direction))
|
659 |
+
return new_page
|
660 |
+
|
661 |
+
conversation_search_button.click(
|
662 |
+
fn=search_conversations,
|
663 |
+
inputs=[conversation_search, conversation_current_page],
|
664 |
+
outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
|
665 |
+
)
|
666 |
+
|
667 |
+
conversation_prev_button.click(
|
668 |
+
fn=update_conversation_page,
|
669 |
+
inputs=[conversation_current_page, conversation_total_pages, gr.State(-1)],
|
670 |
+
outputs=[conversation_current_page]
|
671 |
+
).then(
|
672 |
+
fn=search_conversations,
|
673 |
+
inputs=[conversation_search, conversation_current_page],
|
674 |
+
outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
|
675 |
+
)
|
676 |
+
|
677 |
+
conversation_next_button.click(
|
678 |
+
fn=update_conversation_page,
|
679 |
+
inputs=[conversation_current_page, conversation_total_pages, gr.State(1)],
|
680 |
+
outputs=[conversation_current_page]
|
681 |
+
).then(
|
682 |
+
fn=search_conversations,
|
683 |
+
inputs=[conversation_search, conversation_current_page],
|
684 |
+
outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
|
685 |
+
)
|
686 |
+
|
687 |
+
export_selected_conversations_button.click(
|
688 |
+
fn=export_rag_conversations_as_json,
|
689 |
+
inputs=[conversation_results],
|
690 |
+
outputs=[conversation_export_output, conversation_status]
|
691 |
+
)
|
692 |
+
|
693 |
+
export_all_conversations_button.click(
|
694 |
+
fn=lambda: export_rag_conversations_as_json(),
|
695 |
+
outputs=[conversation_export_output, conversation_status]
|
696 |
+
)
|
697 |
+
|
698 |
+
# Event handlers for notes
|
699 |
+
def search_notes(query, page):
|
700 |
+
return display_rag_notes(query, page)
|
701 |
+
|
702 |
+
def update_notes_page(current, total, direction):
|
703 |
+
new_page = max(1, min(total, current + direction))
|
704 |
+
return new_page
|
705 |
+
|
706 |
+
notes_search_button.click(
|
707 |
+
fn=search_notes,
|
708 |
+
inputs=[notes_search, notes_current_page],
|
709 |
+
outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
|
710 |
+
)
|
711 |
+
|
712 |
+
notes_prev_button.click(
|
713 |
+
fn=update_notes_page,
|
714 |
+
inputs=[notes_current_page, notes_total_pages, gr.State(-1)],
|
715 |
+
outputs=[notes_current_page]
|
716 |
+
).then(
|
717 |
+
fn=search_notes,
|
718 |
+
inputs=[notes_search, notes_current_page],
|
719 |
+
outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
|
720 |
+
)
|
721 |
+
|
722 |
+
notes_next_button.click(
|
723 |
+
fn=update_notes_page,
|
724 |
+
inputs=[notes_current_page, notes_total_pages, gr.State(1)],
|
725 |
+
outputs=[notes_current_page]
|
726 |
+
).then(
|
727 |
+
fn=search_notes,
|
728 |
+
inputs=[notes_search, notes_current_page],
|
729 |
+
outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
|
730 |
+
)
|
731 |
+
|
732 |
+
export_selected_notes_button.click(
|
733 |
+
fn=export_rag_notes_as_json,
|
734 |
+
inputs=[notes_results],
|
735 |
+
outputs=[notes_export_output, notes_status]
|
736 |
+
)
|
737 |
+
|
738 |
+
export_all_notes_button.click(
|
739 |
+
fn=lambda: export_rag_notes_as_json(),
|
740 |
+
outputs=[notes_export_output, notes_status]
|
741 |
+
)
|
742 |
+
|
743 |
+
with gr.TabItem("Export Prompts", visible=True):
|
744 |
+
gr.Markdown("# Export Prompts Database Content")
|
745 |
+
|
746 |
+
with gr.Row():
|
747 |
+
with gr.Column():
|
748 |
+
export_type = gr.Radio(
|
749 |
+
choices=["All Prompts", "Prompts by Keyword"],
|
750 |
+
label="Export Type",
|
751 |
+
value="All Prompts"
|
752 |
+
)
|
753 |
+
|
754 |
+
# Keyword selection for filtered export
|
755 |
+
with gr.Column(visible=False) as keyword_col:
|
756 |
+
keyword_input = gr.Textbox(
|
757 |
+
label="Enter Keywords (comma-separated)",
|
758 |
+
placeholder="Enter keywords to filter prompts..."
|
759 |
+
)
|
760 |
+
|
761 |
+
# Export format selection
|
762 |
+
export_format = gr.Radio(
|
763 |
+
choices=["CSV", "Markdown (ZIP)"],
|
764 |
+
label="Export Format",
|
765 |
+
value="CSV"
|
766 |
+
)
|
767 |
+
|
768 |
+
# Export options
|
769 |
+
include_options = gr.CheckboxGroup(
|
770 |
+
choices=[
|
771 |
+
"Include System Prompts",
|
772 |
+
"Include User Prompts",
|
773 |
+
"Include Details",
|
774 |
+
"Include Author",
|
775 |
+
"Include Keywords"
|
776 |
+
],
|
777 |
+
label="Export Options",
|
778 |
+
value=["Include Keywords", "Include Author"]
|
779 |
+
)
|
780 |
+
|
781 |
+
# Markdown-specific options (only visible when Markdown is selected)
|
782 |
+
with gr.Column(visible=False) as markdown_options_col:
|
783 |
+
markdown_template = gr.Radio(
|
784 |
+
choices=[
|
785 |
+
"Basic Template",
|
786 |
+
"Detailed Template",
|
787 |
+
"Custom Template"
|
788 |
+
],
|
789 |
+
label="Markdown Template",
|
790 |
+
value="Basic Template"
|
791 |
+
)
|
792 |
+
custom_template = gr.Textbox(
|
793 |
+
label="Custom Template",
|
794 |
+
placeholder="Use {title}, {author}, {details}, {system}, {user}, {keywords} as placeholders",
|
795 |
+
visible=False
|
796 |
+
)
|
797 |
+
|
798 |
+
export_button = gr.Button("Export Prompts")
|
799 |
+
|
800 |
+
with gr.Column():
|
801 |
+
export_status = gr.Textbox(label="Export Status", interactive=False)
|
802 |
+
export_file = gr.File(label="Download Export")
|
803 |
+
|
804 |
+
def update_ui_visibility(export_type, format_choice, template_choice):
|
805 |
+
"""Update UI elements visibility based on selections"""
|
806 |
+
show_keywords = export_type == "Prompts by Keyword"
|
807 |
+
show_markdown_options = format_choice == "Markdown (ZIP)"
|
808 |
+
show_custom_template = template_choice == "Custom Template" and show_markdown_options
|
809 |
+
|
810 |
+
return [
|
811 |
+
gr.update(visible=show_keywords), # keyword_col
|
812 |
+
gr.update(visible=show_markdown_options), # markdown_options_col
|
813 |
+
gr.update(visible=show_custom_template) # custom_template
|
814 |
+
]
|
815 |
+
|
816 |
+
def handle_export(export_type, keywords, export_format, options, markdown_template, custom_template):
|
817 |
+
"""Handle the export process based on selected options"""
|
818 |
+
try:
|
819 |
+
# Parse options
|
820 |
+
include_system = "Include System Prompts" in options
|
821 |
+
include_user = "Include User Prompts" in options
|
822 |
+
include_details = "Include Details" in options
|
823 |
+
include_author = "Include Author" in options
|
824 |
+
include_keywords = "Include Keywords" in options
|
825 |
+
|
826 |
+
# Handle keyword filtering
|
827 |
+
keyword_list = None
|
828 |
+
if export_type == "Prompts by Keyword" and keywords:
|
829 |
+
keyword_list = [k.strip() for k in keywords.split(",") if k.strip()]
|
830 |
+
|
831 |
+
# Get the appropriate template
|
832 |
+
template = None
|
833 |
+
if export_format == "Markdown (ZIP)":
|
834 |
+
if markdown_template == "Custom Template":
|
835 |
+
template = custom_template
|
836 |
+
else:
|
837 |
+
template = markdown_template
|
838 |
+
|
839 |
+
# Perform export
|
840 |
+
from App_Function_Libraries.DB.Prompts_DB import export_prompts
|
841 |
+
status, file_path = export_prompts(
|
842 |
+
export_format=export_format.split()[0].lower(), # 'csv' or 'markdown'
|
843 |
+
filter_keywords=keyword_list,
|
844 |
+
include_system=include_system,
|
845 |
+
include_user=include_user,
|
846 |
+
include_details=include_details,
|
847 |
+
include_author=include_author,
|
848 |
+
include_keywords=include_keywords,
|
849 |
+
markdown_template=template
|
850 |
+
)
|
851 |
+
|
852 |
+
return status, file_path
|
853 |
+
|
854 |
+
except Exception as e:
|
855 |
+
error_msg = f"Export failed: {str(e)}"
|
856 |
+
logging.error(error_msg)
|
857 |
+
return error_msg, None
|
858 |
+
|
859 |
+
# Event handlers
|
860 |
+
export_type.change(
|
861 |
+
fn=lambda t, f, m: update_ui_visibility(t, f, m),
|
862 |
+
inputs=[export_type, export_format, markdown_template],
|
863 |
+
outputs=[keyword_col, markdown_options_col, custom_template]
|
864 |
+
)
|
865 |
+
|
866 |
+
export_format.change(
|
867 |
+
fn=lambda t, f, m: update_ui_visibility(t, f, m),
|
868 |
+
inputs=[export_type, export_format, markdown_template],
|
869 |
+
outputs=[keyword_col, markdown_options_col, custom_template]
|
870 |
+
)
|
871 |
+
|
872 |
+
markdown_template.change(
|
873 |
+
fn=lambda t, f, m: update_ui_visibility(t, f, m),
|
874 |
+
inputs=[export_type, export_format, markdown_template],
|
875 |
+
outputs=[keyword_col, markdown_options_col, custom_template]
|
876 |
+
)
|
877 |
+
|
878 |
+
export_button.click(
|
879 |
+
fn=handle_export,
|
880 |
+
inputs=[
|
881 |
+
export_type,
|
882 |
+
keyword_input,
|
883 |
+
export_format,
|
884 |
+
include_options,
|
885 |
+
markdown_template,
|
886 |
+
custom_template
|
887 |
+
],
|
888 |
+
outputs=[export_status, export_file]
|
889 |
+
)
|
890 |
+
|
891 |
+
#
|
892 |
+
# End of Export_Functionality.py
|
893 |
+
######################################################################################################################
|
894 |
|
App_Function_Libraries/Gradio_UI/Gradio_Shared.py
CHANGED
@@ -216,11 +216,6 @@ def format_content(content):
|
|
216 |
return formatted_content
|
217 |
|
218 |
|
219 |
-
def update_prompt_dropdown():
|
220 |
-
prompt_names = list_prompts()
|
221 |
-
return gr.update(choices=prompt_names)
|
222 |
-
|
223 |
-
|
224 |
def display_prompt_details(selected_prompt):
|
225 |
if selected_prompt:
|
226 |
prompts = update_user_prompt(selected_prompt)
|
|
|
216 |
return formatted_content
|
217 |
|
218 |
|
|
|
|
|
|
|
|
|
|
|
219 |
def display_prompt_details(selected_prompt):
|
220 |
if selected_prompt:
|
221 |
prompts = update_user_prompt(selected_prompt)
|
App_Function_Libraries/Gradio_UI/Import_Functionality.py
CHANGED
@@ -2,24 +2,31 @@
|
|
2 |
# Functionality to import content into the DB
|
3 |
#
|
4 |
# Imports
|
|
|
5 |
from time import sleep
|
6 |
import logging
|
7 |
import re
|
8 |
import shutil
|
9 |
import tempfile
|
10 |
import os
|
|
|
|
|
11 |
import traceback
|
|
|
|
|
12 |
import zipfile
|
13 |
#
|
14 |
# External Imports
|
15 |
import gradio as gr
|
|
|
|
|
16 |
#
|
17 |
# Local Imports
|
18 |
-
from App_Function_Libraries.DB.DB_Manager import insert_prompt_to_db,
|
19 |
-
add_media_to_database
|
20 |
from App_Function_Libraries.Prompt_Handling import import_prompt_from_file, import_prompts_from_zip#
|
21 |
from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
|
22 |
-
|
23 |
###################################################################################################################
|
24 |
#
|
25 |
# Functions:
|
@@ -203,15 +210,6 @@ def create_import_single_prompt_tab():
|
|
203 |
outputs=save_output
|
204 |
)
|
205 |
|
206 |
-
def update_prompt_dropdown():
|
207 |
-
return gr.update(choices=load_preset_prompts())
|
208 |
-
|
209 |
-
save_button.click(
|
210 |
-
fn=update_prompt_dropdown,
|
211 |
-
inputs=[],
|
212 |
-
outputs=[gr.Dropdown(label="Select Preset Prompt")]
|
213 |
-
)
|
214 |
-
|
215 |
def create_import_item_tab():
|
216 |
with gr.TabItem("Import Markdown/Text Files", visible=True):
|
217 |
gr.Markdown("# Import a markdown file or text file into the database")
|
@@ -250,11 +248,18 @@ def create_import_multiple_prompts_tab():
|
|
250 |
gr.Markdown("# Import multiple prompts into the database")
|
251 |
gr.Markdown("Upload a zip file containing multiple prompt files (txt or md)")
|
252 |
|
|
|
|
|
|
|
|
|
253 |
with gr.Row():
|
254 |
with gr.Column():
|
255 |
zip_file = gr.File(label="Upload zip file for import", file_types=["zip"])
|
256 |
import_button = gr.Button("Import Prompts")
|
257 |
prompts_dropdown = gr.Dropdown(label="Select Prompt to Edit", choices=[])
|
|
|
|
|
|
|
258 |
title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content")
|
259 |
author_input = gr.Textbox(label="Author", placeholder="Enter the author's name")
|
260 |
system_input = gr.Textbox(label="System", placeholder="Enter the system message for the prompt",
|
@@ -268,6 +273,10 @@ def create_import_multiple_prompts_tab():
|
|
268 |
save_output = gr.Textbox(label="Save Status")
|
269 |
prompts_display = gr.Textbox(label="Identified Prompts")
|
270 |
|
|
|
|
|
|
|
|
|
271 |
def handle_zip_import(zip_file):
|
272 |
result = import_prompts_from_zip(zip_file)
|
273 |
if isinstance(result, list):
|
@@ -278,6 +287,13 @@ def create_import_multiple_prompts_tab():
|
|
278 |
else:
|
279 |
return gr.update(value=result), [], gr.update(value=""), []
|
280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
def handle_prompt_selection(selected_title, prompts):
|
282 |
selected_prompt = next((prompt for prompt in prompts if prompt['title'] == selected_title), None)
|
283 |
if selected_prompt:
|
@@ -305,23 +321,68 @@ def create_import_multiple_prompts_tab():
|
|
305 |
outputs=[title_input, author_input, system_input, user_input, keywords_input]
|
306 |
)
|
307 |
|
|
|
308 |
def save_prompt_to_db(title, author, system, user, keywords):
|
309 |
keyword_list = [k.strip() for k in keywords.split(',') if k.strip()]
|
310 |
-
|
|
|
311 |
|
312 |
save_button.click(
|
313 |
fn=save_prompt_to_db,
|
314 |
inputs=[title_input, author_input, system_input, user_input, keywords_input],
|
315 |
-
outputs=save_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
)
|
317 |
|
|
|
318 |
def update_prompt_dropdown():
|
319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
|
|
|
321 |
save_button.click(
|
322 |
fn=update_prompt_dropdown,
|
323 |
inputs=[],
|
324 |
-
outputs=[
|
325 |
)
|
326 |
|
327 |
|
@@ -385,4 +446,392 @@ def import_obsidian_vault(vault_path, progress=gr.Progress()):
|
|
385 |
except Exception as e:
|
386 |
error_msg = f"Error scanning vault: {str(e)}\n{traceback.format_exc()}"
|
387 |
logger.error(error_msg)
|
388 |
-
return 0, 0, [error_msg]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
# Functionality to import content into the DB
|
3 |
#
|
4 |
# Imports
|
5 |
+
from datetime import datetime
|
6 |
from time import sleep
|
7 |
import logging
|
8 |
import re
|
9 |
import shutil
|
10 |
import tempfile
|
11 |
import os
|
12 |
+
from pathlib import Path
|
13 |
+
import sqlite3
|
14 |
import traceback
|
15 |
+
from typing import Optional, List, Dict, Tuple
|
16 |
+
import uuid
|
17 |
import zipfile
|
18 |
#
|
19 |
# External Imports
|
20 |
import gradio as gr
|
21 |
+
from chardet import detect
|
22 |
+
|
23 |
#
|
24 |
# Local Imports
|
25 |
+
from App_Function_Libraries.DB.DB_Manager import insert_prompt_to_db, import_obsidian_note_to_db, \
|
26 |
+
add_media_to_database, list_prompts
|
27 |
from App_Function_Libraries.Prompt_Handling import import_prompt_from_file, import_prompts_from_zip#
|
28 |
from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
|
29 |
+
#
|
30 |
###################################################################################################################
|
31 |
#
|
32 |
# Functions:
|
|
|
210 |
outputs=save_output
|
211 |
)
|
212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
def create_import_item_tab():
|
214 |
with gr.TabItem("Import Markdown/Text Files", visible=True):
|
215 |
gr.Markdown("# Import a markdown file or text file into the database")
|
|
|
248 |
gr.Markdown("# Import multiple prompts into the database")
|
249 |
gr.Markdown("Upload a zip file containing multiple prompt files (txt or md)")
|
250 |
|
251 |
+
# Initialize state variables for pagination
|
252 |
+
current_page_state = gr.State(value=1)
|
253 |
+
total_pages_state = gr.State(value=1)
|
254 |
+
|
255 |
with gr.Row():
|
256 |
with gr.Column():
|
257 |
zip_file = gr.File(label="Upload zip file for import", file_types=["zip"])
|
258 |
import_button = gr.Button("Import Prompts")
|
259 |
prompts_dropdown = gr.Dropdown(label="Select Prompt to Edit", choices=[])
|
260 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
261 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
262 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
263 |
title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content")
|
264 |
author_input = gr.Textbox(label="Author", placeholder="Enter the author's name")
|
265 |
system_input = gr.Textbox(label="System", placeholder="Enter the system message for the prompt",
|
|
|
273 |
save_output = gr.Textbox(label="Save Status")
|
274 |
prompts_display = gr.Textbox(label="Identified Prompts")
|
275 |
|
276 |
+
# State to store imported prompts
|
277 |
+
zip_import_state = gr.State([])
|
278 |
+
|
279 |
+
# Function to handle zip import
|
280 |
def handle_zip_import(zip_file):
|
281 |
result = import_prompts_from_zip(zip_file)
|
282 |
if isinstance(result, list):
|
|
|
287 |
else:
|
288 |
return gr.update(value=result), [], gr.update(value=""), []
|
289 |
|
290 |
+
import_button.click(
|
291 |
+
fn=handle_zip_import,
|
292 |
+
inputs=[zip_file],
|
293 |
+
outputs=[import_output, prompts_dropdown, prompts_display, zip_import_state]
|
294 |
+
)
|
295 |
+
|
296 |
+
# Function to handle prompt selection from imported prompts
|
297 |
def handle_prompt_selection(selected_title, prompts):
|
298 |
selected_prompt = next((prompt for prompt in prompts if prompt['title'] == selected_title), None)
|
299 |
if selected_prompt:
|
|
|
321 |
outputs=[title_input, author_input, system_input, user_input, keywords_input]
|
322 |
)
|
323 |
|
324 |
+
# Function to save prompt to the database
|
325 |
def save_prompt_to_db(title, author, system, user, keywords):
|
326 |
keyword_list = [k.strip() for k in keywords.split(',') if k.strip()]
|
327 |
+
result = insert_prompt_to_db(title, author, system, user, keyword_list)
|
328 |
+
return result
|
329 |
|
330 |
save_button.click(
|
331 |
fn=save_prompt_to_db,
|
332 |
inputs=[title_input, author_input, system_input, user_input, keywords_input],
|
333 |
+
outputs=[save_output]
|
334 |
+
)
|
335 |
+
|
336 |
+
# Adding pagination controls to navigate prompts in the database
|
337 |
+
def on_prev_page_click(current_page, total_pages):
|
338 |
+
new_page = max(current_page - 1, 1)
|
339 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
|
340 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
341 |
+
return (
|
342 |
+
gr.update(choices=prompts),
|
343 |
+
gr.update(value=page_display_text),
|
344 |
+
current_page
|
345 |
+
)
|
346 |
+
|
347 |
+
def on_next_page_click(current_page, total_pages):
|
348 |
+
new_page = min(current_page + 1, total_pages)
|
349 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
|
350 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
351 |
+
return (
|
352 |
+
gr.update(choices=prompts),
|
353 |
+
gr.update(value=page_display_text),
|
354 |
+
current_page
|
355 |
+
)
|
356 |
+
|
357 |
+
prev_page_button.click(
|
358 |
+
fn=on_prev_page_click,
|
359 |
+
inputs=[current_page_state, total_pages_state],
|
360 |
+
outputs=[prompts_dropdown, page_display, current_page_state]
|
361 |
+
)
|
362 |
+
|
363 |
+
next_page_button.click(
|
364 |
+
fn=on_next_page_click,
|
365 |
+
inputs=[current_page_state, total_pages_state],
|
366 |
+
outputs=[prompts_dropdown, page_display, current_page_state]
|
367 |
)
|
368 |
|
369 |
+
# Function to update prompts dropdown after saving to the database
|
370 |
def update_prompt_dropdown():
|
371 |
+
prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
|
372 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
373 |
+
return (
|
374 |
+
gr.update(choices=prompts),
|
375 |
+
gr.update(visible=True),
|
376 |
+
gr.update(value=page_display_text, visible=True),
|
377 |
+
current_page,
|
378 |
+
total_pages
|
379 |
+
)
|
380 |
|
381 |
+
# Update the dropdown after saving
|
382 |
save_button.click(
|
383 |
fn=update_prompt_dropdown,
|
384 |
inputs=[],
|
385 |
+
outputs=[prompts_dropdown, prev_page_button, page_display, current_page_state, total_pages_state]
|
386 |
)
|
387 |
|
388 |
|
|
|
446 |
except Exception as e:
|
447 |
error_msg = f"Error scanning vault: {str(e)}\n{traceback.format_exc()}"
|
448 |
logger.error(error_msg)
|
449 |
+
return 0, 0, [error_msg]
|
450 |
+
|
451 |
+
|
452 |
+
class RAGQABatchImporter:
|
453 |
+
def __init__(self, db_path: str):
|
454 |
+
self.db_path = Path(db_path)
|
455 |
+
self.setup_logging()
|
456 |
+
self.file_processor = FileProcessor()
|
457 |
+
self.zip_validator = ZipValidator()
|
458 |
+
|
459 |
+
def setup_logging(self):
|
460 |
+
logging.basicConfig(
|
461 |
+
level=logging.INFO,
|
462 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
463 |
+
handlers=[
|
464 |
+
logging.FileHandler('rag_qa_import.log'),
|
465 |
+
logging.StreamHandler()
|
466 |
+
]
|
467 |
+
)
|
468 |
+
|
469 |
+
def process_markdown_content(self, content: str) -> List[Dict[str, str]]:
|
470 |
+
"""Process markdown content into a conversation format."""
|
471 |
+
messages = []
|
472 |
+
sections = content.split('\n\n')
|
473 |
+
|
474 |
+
for section in sections:
|
475 |
+
if section.strip():
|
476 |
+
messages.append({
|
477 |
+
'role': 'user',
|
478 |
+
'content': section.strip()
|
479 |
+
})
|
480 |
+
|
481 |
+
return messages
|
482 |
+
|
483 |
+
def process_keywords(self, db: sqlite3.Connection, conversation_id: str, keywords: str):
|
484 |
+
"""Process and link keywords to a conversation."""
|
485 |
+
if not keywords:
|
486 |
+
return
|
487 |
+
|
488 |
+
keyword_list = [k.strip() for k in keywords.split(',')]
|
489 |
+
for keyword in keyword_list:
|
490 |
+
# Insert keyword if it doesn't exist
|
491 |
+
db.execute("""
|
492 |
+
INSERT OR IGNORE INTO rag_qa_keywords (keyword)
|
493 |
+
VALUES (?)
|
494 |
+
""", (keyword,))
|
495 |
+
|
496 |
+
# Get keyword ID
|
497 |
+
keyword_id = db.execute("""
|
498 |
+
SELECT id FROM rag_qa_keywords WHERE keyword = ?
|
499 |
+
""", (keyword,)).fetchone()[0]
|
500 |
+
|
501 |
+
# Link keyword to conversation
|
502 |
+
db.execute("""
|
503 |
+
INSERT INTO rag_qa_conversation_keywords
|
504 |
+
(conversation_id, keyword_id)
|
505 |
+
VALUES (?, ?)
|
506 |
+
""", (conversation_id, keyword_id))
|
507 |
+
|
508 |
+
def import_single_file(
|
509 |
+
self,
|
510 |
+
db: sqlite3.Connection,
|
511 |
+
content: str,
|
512 |
+
filename: str,
|
513 |
+
keywords: str,
|
514 |
+
custom_prompt: Optional[str] = None,
|
515 |
+
rating: Optional[int] = None
|
516 |
+
) -> str:
|
517 |
+
"""Import a single file's content into the database"""
|
518 |
+
conversation_id = str(uuid.uuid4())
|
519 |
+
current_time = datetime.now().isoformat()
|
520 |
+
|
521 |
+
# Process filename into title
|
522 |
+
title = FileProcessor.process_filename_to_title(filename)
|
523 |
+
if title.lower().endswith(('.md', '.txt')):
|
524 |
+
title = title[:-3] if title.lower().endswith('.md') else title[:-4]
|
525 |
+
|
526 |
+
# Insert conversation metadata
|
527 |
+
db.execute("""
|
528 |
+
INSERT INTO conversation_metadata
|
529 |
+
(conversation_id, created_at, last_updated, title, rating)
|
530 |
+
VALUES (?, ?, ?, ?, ?)
|
531 |
+
""", (conversation_id, current_time, current_time, title, rating))
|
532 |
+
|
533 |
+
# Process content and insert messages
|
534 |
+
messages = self.process_markdown_content(content)
|
535 |
+
for msg in messages:
|
536 |
+
db.execute("""
|
537 |
+
INSERT INTO rag_qa_chats
|
538 |
+
(conversation_id, timestamp, role, content)
|
539 |
+
VALUES (?, ?, ?, ?)
|
540 |
+
""", (conversation_id, current_time, msg['role'], msg['content']))
|
541 |
+
|
542 |
+
# Process keywords
|
543 |
+
self.process_keywords(db, conversation_id, keywords)
|
544 |
+
|
545 |
+
return conversation_id
|
546 |
+
|
547 |
+
def extract_zip(self, zip_path: str) -> List[Tuple[str, str]]:
|
548 |
+
"""Extract and validate files from zip"""
|
549 |
+
is_valid, error_msg, valid_files = self.zip_validator.validate_zip_file(zip_path)
|
550 |
+
if not is_valid:
|
551 |
+
raise ValueError(error_msg)
|
552 |
+
|
553 |
+
files = []
|
554 |
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
555 |
+
for filename in valid_files:
|
556 |
+
with zip_ref.open(filename) as f:
|
557 |
+
content = f.read()
|
558 |
+
# Try to decode with detected encoding
|
559 |
+
try:
|
560 |
+
detected_encoding = detect(content)['encoding'] or 'utf-8'
|
561 |
+
content = content.decode(detected_encoding)
|
562 |
+
except UnicodeDecodeError:
|
563 |
+
content = content.decode('utf-8', errors='replace')
|
564 |
+
|
565 |
+
filename = os.path.basename(filename)
|
566 |
+
files.append((filename, content))
|
567 |
+
return files
|
568 |
+
|
569 |
+
def import_files(
|
570 |
+
self,
|
571 |
+
files: List[str],
|
572 |
+
keywords: str = "",
|
573 |
+
custom_prompt: Optional[str] = None,
|
574 |
+
rating: Optional[int] = None,
|
575 |
+
progress=gr.Progress()
|
576 |
+
) -> Tuple[bool, str]:
|
577 |
+
"""Import multiple files or zip files into the RAG QA database."""
|
578 |
+
try:
|
579 |
+
imported_files = []
|
580 |
+
|
581 |
+
with sqlite3.connect(self.db_path) as db:
|
582 |
+
# Process each file
|
583 |
+
for file_path in progress.tqdm(files, desc="Processing files"):
|
584 |
+
filename = os.path.basename(file_path)
|
585 |
+
|
586 |
+
# Handle zip files
|
587 |
+
if filename.lower().endswith('.zip'):
|
588 |
+
zip_files = self.extract_zip(file_path)
|
589 |
+
for zip_filename, content in progress.tqdm(zip_files, desc=f"Processing files from {filename}"):
|
590 |
+
conv_id = self.import_single_file(
|
591 |
+
db=db,
|
592 |
+
content=content,
|
593 |
+
filename=zip_filename,
|
594 |
+
keywords=keywords,
|
595 |
+
custom_prompt=custom_prompt,
|
596 |
+
rating=rating
|
597 |
+
)
|
598 |
+
imported_files.append(zip_filename)
|
599 |
+
|
600 |
+
# Handle individual markdown/text files
|
601 |
+
elif filename.lower().endswith(('.md', '.txt')):
|
602 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
603 |
+
content = f.read()
|
604 |
+
|
605 |
+
conv_id = self.import_single_file(
|
606 |
+
db=db,
|
607 |
+
content=content,
|
608 |
+
filename=filename,
|
609 |
+
keywords=keywords,
|
610 |
+
custom_prompt=custom_prompt,
|
611 |
+
rating=rating
|
612 |
+
)
|
613 |
+
imported_files.append(filename)
|
614 |
+
|
615 |
+
db.commit()
|
616 |
+
|
617 |
+
return True, f"Successfully imported {len(imported_files)} files:\n" + "\n".join(imported_files)
|
618 |
+
|
619 |
+
except Exception as e:
|
620 |
+
logging.error(f"Import failed: {str(e)}")
|
621 |
+
return False, f"Import failed: {str(e)}"
|
622 |
+
|
623 |
+
|
624 |
+
class FileProcessor:
|
625 |
+
"""Handles file reading and name processing"""
|
626 |
+
|
627 |
+
VALID_EXTENSIONS = {'.md', '.txt', '.zip'}
|
628 |
+
ENCODINGS_TO_TRY = [
|
629 |
+
'utf-8',
|
630 |
+
'utf-16',
|
631 |
+
'windows-1252',
|
632 |
+
'iso-8859-1',
|
633 |
+
'ascii'
|
634 |
+
]
|
635 |
+
|
636 |
+
@staticmethod
|
637 |
+
def detect_encoding(file_path: str) -> str:
|
638 |
+
"""Detect the file encoding using chardet"""
|
639 |
+
with open(file_path, 'rb') as file:
|
640 |
+
raw_data = file.read()
|
641 |
+
result = detect(raw_data)
|
642 |
+
return result['encoding'] or 'utf-8'
|
643 |
+
|
644 |
+
@staticmethod
|
645 |
+
def read_file_content(file_path: str) -> str:
|
646 |
+
"""Read file content with automatic encoding detection"""
|
647 |
+
detected_encoding = FileProcessor.detect_encoding(file_path)
|
648 |
+
|
649 |
+
# Try detected encoding first
|
650 |
+
try:
|
651 |
+
with open(file_path, 'r', encoding=detected_encoding) as f:
|
652 |
+
return f.read()
|
653 |
+
except UnicodeDecodeError:
|
654 |
+
# If detected encoding fails, try others
|
655 |
+
for encoding in FileProcessor.ENCODINGS_TO_TRY:
|
656 |
+
try:
|
657 |
+
with open(file_path, 'r', encoding=encoding) as f:
|
658 |
+
return f.read()
|
659 |
+
except UnicodeDecodeError:
|
660 |
+
continue
|
661 |
+
|
662 |
+
# If all encodings fail, use utf-8 with error handling
|
663 |
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
664 |
+
return f.read()
|
665 |
+
|
666 |
+
@staticmethod
|
667 |
+
def process_filename_to_title(filename: str) -> str:
|
668 |
+
"""Convert filename to a readable title"""
|
669 |
+
# Remove extension
|
670 |
+
name = os.path.splitext(filename)[0]
|
671 |
+
|
672 |
+
# Look for date patterns
|
673 |
+
date_pattern = r'(\d{4}[-_]?\d{2}[-_]?\d{2})'
|
674 |
+
date_match = re.search(date_pattern, name)
|
675 |
+
date_str = ""
|
676 |
+
if date_match:
|
677 |
+
try:
|
678 |
+
date = datetime.strptime(date_match.group(1).replace('_', '-'), '%Y-%m-%d')
|
679 |
+
date_str = date.strftime("%b %d, %Y")
|
680 |
+
name = name.replace(date_match.group(1), '').strip('-_')
|
681 |
+
except ValueError:
|
682 |
+
pass
|
683 |
+
|
684 |
+
# Replace separators with spaces
|
685 |
+
name = re.sub(r'[-_]+', ' ', name)
|
686 |
+
|
687 |
+
# Remove redundant spaces
|
688 |
+
name = re.sub(r'\s+', ' ', name).strip()
|
689 |
+
|
690 |
+
# Capitalize words, excluding certain words
|
691 |
+
exclude_words = {'a', 'an', 'the', 'in', 'on', 'at', 'to', 'for', 'of', 'with'}
|
692 |
+
words = name.split()
|
693 |
+
capitalized = []
|
694 |
+
for i, word in enumerate(words):
|
695 |
+
if i == 0 or word not in exclude_words:
|
696 |
+
capitalized.append(word.capitalize())
|
697 |
+
else:
|
698 |
+
capitalized.append(word.lower())
|
699 |
+
name = ' '.join(capitalized)
|
700 |
+
|
701 |
+
# Add date if found
|
702 |
+
if date_str:
|
703 |
+
name = f"{name} - {date_str}"
|
704 |
+
|
705 |
+
return name
|
706 |
+
|
707 |
+
|
708 |
+
class ZipValidator:
|
709 |
+
"""Validates zip file contents and structure"""
|
710 |
+
|
711 |
+
MAX_ZIP_SIZE = 100 * 1024 * 1024 # 100MB
|
712 |
+
MAX_FILES = 100
|
713 |
+
VALID_EXTENSIONS = {'.md', '.txt'}
|
714 |
+
|
715 |
+
@staticmethod
|
716 |
+
def validate_zip_file(zip_path: str) -> Tuple[bool, str, List[str]]:
|
717 |
+
"""
|
718 |
+
Validate zip file and its contents
|
719 |
+
Returns: (is_valid, error_message, valid_files)
|
720 |
+
"""
|
721 |
+
try:
|
722 |
+
# Check zip file size
|
723 |
+
if os.path.getsize(zip_path) > ZipValidator.MAX_ZIP_SIZE:
|
724 |
+
return False, "Zip file too large (max 100MB)", []
|
725 |
+
|
726 |
+
valid_files = []
|
727 |
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
728 |
+
# Check number of files
|
729 |
+
if len(zip_ref.filelist) > ZipValidator.MAX_FILES:
|
730 |
+
return False, f"Too many files in zip (max {ZipValidator.MAX_FILES})", []
|
731 |
+
|
732 |
+
# Check for directory traversal attempts
|
733 |
+
for file_info in zip_ref.filelist:
|
734 |
+
if '..' in file_info.filename or file_info.filename.startswith('/'):
|
735 |
+
return False, "Invalid file paths detected", []
|
736 |
+
|
737 |
+
# Validate each file
|
738 |
+
total_size = 0
|
739 |
+
for file_info in zip_ref.filelist:
|
740 |
+
# Skip directories
|
741 |
+
if file_info.filename.endswith('/'):
|
742 |
+
continue
|
743 |
+
|
744 |
+
# Check file size
|
745 |
+
if file_info.file_size > ZipValidator.MAX_ZIP_SIZE:
|
746 |
+
return False, f"File {file_info.filename} too large", []
|
747 |
+
|
748 |
+
total_size += file_info.file_size
|
749 |
+
if total_size > ZipValidator.MAX_ZIP_SIZE:
|
750 |
+
return False, "Total uncompressed size too large", []
|
751 |
+
|
752 |
+
# Check file extension
|
753 |
+
ext = os.path.splitext(file_info.filename)[1].lower()
|
754 |
+
if ext in ZipValidator.VALID_EXTENSIONS:
|
755 |
+
valid_files.append(file_info.filename)
|
756 |
+
|
757 |
+
if not valid_files:
|
758 |
+
return False, "No valid markdown or text files found in zip", []
|
759 |
+
|
760 |
+
return True, "", valid_files
|
761 |
+
|
762 |
+
except zipfile.BadZipFile:
|
763 |
+
return False, "Invalid or corrupted zip file", []
|
764 |
+
except Exception as e:
|
765 |
+
return False, f"Error processing zip file: {str(e)}", []
|
766 |
+
|
767 |
+
|
768 |
+
def create_conversation_import_tab() -> gr.Tab:
|
769 |
+
"""Create the import tab for the Gradio interface"""
|
770 |
+
with gr.Tab("Import RAG Chats") as tab:
|
771 |
+
gr.Markdown("# Import RAG Chats into the Database")
|
772 |
+
gr.Markdown("""
|
773 |
+
Import your RAG Chat markdown/text files individually or as a zip archive
|
774 |
+
|
775 |
+
Supported file types:
|
776 |
+
- Markdown (.md)
|
777 |
+
- Text (.txt)
|
778 |
+
- Zip archives containing .md or .txt files
|
779 |
+
|
780 |
+
Maximum zip file size: 100MB
|
781 |
+
Maximum files per zip: 100
|
782 |
+
""")
|
783 |
+
with gr.Row():
|
784 |
+
with gr.Column():
|
785 |
+
import_files = gr.File(
|
786 |
+
label="Upload Files",
|
787 |
+
file_types=["txt", "md", "zip"],
|
788 |
+
file_count="multiple"
|
789 |
+
)
|
790 |
+
|
791 |
+
keywords_input = gr.Textbox(
|
792 |
+
label="Keywords",
|
793 |
+
placeholder="Enter keywords to apply to all imported files (comma-separated)"
|
794 |
+
)
|
795 |
+
|
796 |
+
custom_prompt_input = gr.Textbox(
|
797 |
+
label="Custom Prompt",
|
798 |
+
placeholder="Enter a custom prompt for processing (optional)"
|
799 |
+
)
|
800 |
+
|
801 |
+
rating_input = gr.Slider(
|
802 |
+
minimum=1,
|
803 |
+
maximum=3,
|
804 |
+
step=1,
|
805 |
+
label="Rating (1-3)",
|
806 |
+
value=None
|
807 |
+
)
|
808 |
+
|
809 |
+
with gr.Column():
|
810 |
+
import_button = gr.Button("Import Files")
|
811 |
+
import_output = gr.Textbox(
|
812 |
+
label="Import Status",
|
813 |
+
lines=10
|
814 |
+
)
|
815 |
+
|
816 |
+
def handle_import(files, keywords, custom_prompt, rating):
|
817 |
+
importer = RAGQABatchImporter("rag_qa.db") # Update with your DB path
|
818 |
+
success, message = importer.import_files(
|
819 |
+
files=[f.name for f in files],
|
820 |
+
keywords=keywords,
|
821 |
+
custom_prompt=custom_prompt,
|
822 |
+
rating=rating
|
823 |
+
)
|
824 |
+
return message
|
825 |
+
|
826 |
+
import_button.click(
|
827 |
+
fn=handle_import,
|
828 |
+
inputs=[
|
829 |
+
import_files,
|
830 |
+
keywords_input,
|
831 |
+
custom_prompt_input,
|
832 |
+
rating_input
|
833 |
+
],
|
834 |
+
outputs=import_output
|
835 |
+
)
|
836 |
+
|
837 |
+
return tab
|
App_Function_Libraries/Gradio_UI/Keywords.py
CHANGED
@@ -4,22 +4,29 @@
|
|
4 |
# The Keywords tab allows the user to add, delete, view, and export keywords from the database.
|
5 |
#
|
6 |
# Imports:
|
7 |
-
|
8 |
#
|
9 |
# External Imports
|
10 |
import gradio as gr
|
|
|
|
|
|
|
11 |
#
|
12 |
# Internal Imports
|
13 |
from App_Function_Libraries.DB.DB_Manager import add_keyword, delete_keyword, keywords_browser_interface, export_keywords_to_csv
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
#
|
16 |
######################################################################################################################
|
17 |
#
|
18 |
# Functions:
|
19 |
|
20 |
-
|
21 |
def create_export_keywords_tab():
|
22 |
-
with gr.TabItem("Export Keywords", visible=True):
|
23 |
with gr.Row():
|
24 |
with gr.Column():
|
25 |
export_keywords_button = gr.Button("Export Keywords")
|
@@ -33,8 +40,8 @@ def create_export_keywords_tab():
|
|
33 |
)
|
34 |
|
35 |
def create_view_keywords_tab():
|
36 |
-
with gr.TabItem("View Keywords", visible=True):
|
37 |
-
gr.Markdown("# Browse Keywords")
|
38 |
with gr.Column():
|
39 |
browse_output = gr.Markdown()
|
40 |
browse_button = gr.Button("View Existing Keywords")
|
@@ -42,7 +49,7 @@ def create_view_keywords_tab():
|
|
42 |
|
43 |
|
44 |
def create_add_keyword_tab():
|
45 |
-
with gr.TabItem("Add Keywords", visible=True):
|
46 |
with gr.Row():
|
47 |
with gr.Column():
|
48 |
gr.Markdown("# Add Keywords to the Database")
|
@@ -54,7 +61,7 @@ def create_add_keyword_tab():
|
|
54 |
|
55 |
|
56 |
def create_delete_keyword_tab():
|
57 |
-
with gr.Tab("Delete Keywords", visible=True):
|
58 |
with gr.Row():
|
59 |
with gr.Column():
|
60 |
gr.Markdown("# Delete Keywords from the Database")
|
@@ -63,3 +70,289 @@ def create_delete_keyword_tab():
|
|
63 |
with gr.Row():
|
64 |
delete_output = gr.Textbox(label="Result")
|
65 |
delete_button.click(fn=delete_keyword, inputs=delete_input, outputs=delete_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
# The Keywords tab allows the user to add, delete, view, and export keywords from the database.
|
5 |
#
|
6 |
# Imports:
|
|
|
7 |
#
|
8 |
# External Imports
|
9 |
import gradio as gr
|
10 |
+
|
11 |
+
from App_Function_Libraries.DB.Character_Chat_DB import view_char_keywords, add_char_keywords, delete_char_keyword, \
|
12 |
+
export_char_keywords_to_csv
|
13 |
#
|
14 |
# Internal Imports
|
15 |
from App_Function_Libraries.DB.DB_Manager import add_keyword, delete_keyword, keywords_browser_interface, export_keywords_to_csv
|
16 |
+
from App_Function_Libraries.DB.Prompts_DB import view_prompt_keywords, delete_prompt_keyword, \
|
17 |
+
export_prompt_keywords_to_csv
|
18 |
+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import view_rag_keywords, get_all_collections, \
|
19 |
+
get_keywords_for_collection, create_keyword_collection, add_keyword_to_collection, delete_rag_keyword, \
|
20 |
+
export_rag_keywords_to_csv
|
21 |
+
|
22 |
+
|
23 |
#
|
24 |
######################################################################################################################
|
25 |
#
|
26 |
# Functions:
|
27 |
|
|
|
28 |
def create_export_keywords_tab():
|
29 |
+
with gr.TabItem("Export MediaDB Keywords", visible=True):
|
30 |
with gr.Row():
|
31 |
with gr.Column():
|
32 |
export_keywords_button = gr.Button("Export Keywords")
|
|
|
40 |
)
|
41 |
|
42 |
def create_view_keywords_tab():
|
43 |
+
with gr.TabItem("View MediaDB Keywords", visible=True):
|
44 |
+
gr.Markdown("# Browse MediaDB Keywords")
|
45 |
with gr.Column():
|
46 |
browse_output = gr.Markdown()
|
47 |
browse_button = gr.Button("View Existing Keywords")
|
|
|
49 |
|
50 |
|
51 |
def create_add_keyword_tab():
|
52 |
+
with gr.TabItem("Add MediaDB Keywords", visible=True):
|
53 |
with gr.Row():
|
54 |
with gr.Column():
|
55 |
gr.Markdown("# Add Keywords to the Database")
|
|
|
61 |
|
62 |
|
63 |
def create_delete_keyword_tab():
|
64 |
+
with gr.Tab("Delete MediaDB Keywords", visible=True):
|
65 |
with gr.Row():
|
66 |
with gr.Column():
|
67 |
gr.Markdown("# Delete Keywords from the Database")
|
|
|
70 |
with gr.Row():
|
71 |
delete_output = gr.Textbox(label="Result")
|
72 |
delete_button.click(fn=delete_keyword, inputs=delete_input, outputs=delete_output)
|
73 |
+
|
74 |
+
#
|
75 |
+
# End of Media DB Keyword tabs
|
76 |
+
##########################################################
|
77 |
+
|
78 |
+
|
79 |
+
############################################################
|
80 |
+
#
|
81 |
+
# Character DB Keyword functions
|
82 |
+
|
83 |
+
def create_character_keywords_tab():
|
84 |
+
"""Creates the Character Keywords management tab"""
|
85 |
+
with gr.Tab("Character Keywords"):
|
86 |
+
gr.Markdown("# Character Keywords Management")
|
87 |
+
|
88 |
+
with gr.Tabs():
|
89 |
+
# View Character Keywords Tab
|
90 |
+
with gr.TabItem("View Keywords"):
|
91 |
+
with gr.Column():
|
92 |
+
refresh_char_keywords = gr.Button("Refresh Character Keywords")
|
93 |
+
char_keywords_output = gr.Markdown()
|
94 |
+
view_char_keywords()
|
95 |
+
refresh_char_keywords.click(
|
96 |
+
fn=view_char_keywords,
|
97 |
+
outputs=char_keywords_output
|
98 |
+
)
|
99 |
+
|
100 |
+
# Add Character Keywords Tab
|
101 |
+
with gr.TabItem("Add Keywords"):
|
102 |
+
with gr.Column():
|
103 |
+
char_name = gr.Textbox(label="Character Name")
|
104 |
+
new_keywords = gr.Textbox(label="New Keywords (comma-separated)")
|
105 |
+
add_char_keyword_btn = gr.Button("Add Keywords")
|
106 |
+
add_char_result = gr.Markdown()
|
107 |
+
|
108 |
+
add_char_keyword_btn.click(
|
109 |
+
fn=add_char_keywords,
|
110 |
+
inputs=[char_name, new_keywords],
|
111 |
+
outputs=add_char_result
|
112 |
+
)
|
113 |
+
|
114 |
+
# Delete Character Keywords Tab (New)
|
115 |
+
with gr.TabItem("Delete Keywords"):
|
116 |
+
with gr.Column():
|
117 |
+
delete_char_name = gr.Textbox(label="Character Name")
|
118 |
+
delete_char_keyword_input = gr.Textbox(label="Keyword to Delete")
|
119 |
+
delete_char_keyword_btn = gr.Button("Delete Keyword")
|
120 |
+
delete_char_result = gr.Markdown()
|
121 |
+
|
122 |
+
delete_char_keyword_btn.click(
|
123 |
+
fn=delete_char_keyword,
|
124 |
+
inputs=[delete_char_name, delete_char_keyword_input],
|
125 |
+
outputs=delete_char_result
|
126 |
+
)
|
127 |
+
|
128 |
+
# Export Character Keywords Tab (New)
|
129 |
+
with gr.TabItem("Export Keywords"):
|
130 |
+
with gr.Column():
|
131 |
+
export_char_keywords_btn = gr.Button("Export Character Keywords")
|
132 |
+
export_char_file = gr.File(label="Download Exported Keywords")
|
133 |
+
export_char_status = gr.Textbox(label="Export Status")
|
134 |
+
|
135 |
+
export_char_keywords_btn.click(
|
136 |
+
fn=export_char_keywords_to_csv,
|
137 |
+
outputs=[export_char_status, export_char_file]
|
138 |
+
)
|
139 |
+
|
140 |
+
#
|
141 |
+
# End of Character Keywords tab
|
142 |
+
##########################################################
|
143 |
+
|
144 |
+
############################################################
|
145 |
+
#
|
146 |
+
# RAG QA Keywords functions
|
147 |
+
|
148 |
+
def create_rag_qa_keywords_tab():
|
149 |
+
"""Creates the RAG QA Keywords management tab"""
|
150 |
+
with gr.Tab("RAG QA Keywords"):
|
151 |
+
gr.Markdown("# RAG QA Keywords Management")
|
152 |
+
|
153 |
+
with gr.Tabs():
|
154 |
+
# View RAG QA Keywords Tab
|
155 |
+
with gr.TabItem("View Keywords"):
|
156 |
+
with gr.Column():
|
157 |
+
refresh_rag_keywords = gr.Button("Refresh RAG QA Keywords")
|
158 |
+
rag_keywords_output = gr.Markdown()
|
159 |
+
|
160 |
+
view_rag_keywords()
|
161 |
+
|
162 |
+
refresh_rag_keywords.click(
|
163 |
+
fn=view_rag_keywords,
|
164 |
+
outputs=rag_keywords_output
|
165 |
+
)
|
166 |
+
|
167 |
+
# Add RAG QA Keywords Tab
|
168 |
+
with gr.TabItem("Add Keywords"):
|
169 |
+
with gr.Column():
|
170 |
+
new_rag_keywords = gr.Textbox(label="New Keywords (comma-separated)")
|
171 |
+
add_rag_keyword_btn = gr.Button("Add Keywords")
|
172 |
+
add_rag_result = gr.Markdown()
|
173 |
+
|
174 |
+
add_rag_keyword_btn.click(
|
175 |
+
fn=add_keyword,
|
176 |
+
inputs=new_rag_keywords,
|
177 |
+
outputs=add_rag_result
|
178 |
+
)
|
179 |
+
|
180 |
+
# Delete RAG QA Keywords Tab (New)
|
181 |
+
with gr.TabItem("Delete Keywords"):
|
182 |
+
with gr.Column():
|
183 |
+
delete_rag_keyword_input = gr.Textbox(label="Keyword to Delete")
|
184 |
+
delete_rag_keyword_btn = gr.Button("Delete Keyword")
|
185 |
+
delete_rag_result = gr.Markdown()
|
186 |
+
|
187 |
+
delete_rag_keyword_btn.click(
|
188 |
+
fn=delete_rag_keyword,
|
189 |
+
inputs=delete_rag_keyword_input,
|
190 |
+
outputs=delete_rag_result
|
191 |
+
)
|
192 |
+
|
193 |
+
# Export RAG QA Keywords Tab (New)
|
194 |
+
with gr.TabItem("Export Keywords"):
|
195 |
+
with gr.Column():
|
196 |
+
export_rag_keywords_btn = gr.Button("Export RAG QA Keywords")
|
197 |
+
export_rag_file = gr.File(label="Download Exported Keywords")
|
198 |
+
export_rag_status = gr.Textbox(label="Export Status")
|
199 |
+
|
200 |
+
export_rag_keywords_btn.click(
|
201 |
+
fn=export_rag_keywords_to_csv,
|
202 |
+
outputs=[export_rag_status, export_rag_file]
|
203 |
+
)
|
204 |
+
|
205 |
+
#
|
206 |
+
# End of RAG QA Keywords tab
|
207 |
+
##########################################################
|
208 |
+
|
209 |
+
|
210 |
+
############################################################
|
211 |
+
#
|
212 |
+
# Prompt Keywords functions
|
213 |
+
|
214 |
+
def create_prompt_keywords_tab():
|
215 |
+
"""Creates the Prompt Keywords management tab"""
|
216 |
+
with gr.Tab("Prompt Keywords"):
|
217 |
+
gr.Markdown("# Prompt Keywords Management")
|
218 |
+
|
219 |
+
with gr.Tabs():
|
220 |
+
# View Keywords Tab
|
221 |
+
with gr.TabItem("View Keywords"):
|
222 |
+
with gr.Column():
|
223 |
+
refresh_prompt_keywords = gr.Button("Refresh Prompt Keywords")
|
224 |
+
prompt_keywords_output = gr.Markdown()
|
225 |
+
|
226 |
+
refresh_prompt_keywords.click(
|
227 |
+
fn=view_prompt_keywords,
|
228 |
+
outputs=prompt_keywords_output
|
229 |
+
)
|
230 |
+
|
231 |
+
# Add Keywords Tab (using existing prompt management functions)
|
232 |
+
with gr.TabItem("Add Keywords"):
|
233 |
+
gr.Markdown("""
|
234 |
+
To add keywords to prompts, please use the Prompt Management interface.
|
235 |
+
Keywords can be added when creating or editing a prompt.
|
236 |
+
""")
|
237 |
+
|
238 |
+
# Delete Keywords Tab
|
239 |
+
with gr.TabItem("Delete Keywords"):
|
240 |
+
with gr.Column():
|
241 |
+
delete_prompt_keyword_input = gr.Textbox(label="Keyword to Delete")
|
242 |
+
delete_prompt_keyword_btn = gr.Button("Delete Keyword")
|
243 |
+
delete_prompt_result = gr.Markdown()
|
244 |
+
|
245 |
+
delete_prompt_keyword_btn.click(
|
246 |
+
fn=delete_prompt_keyword,
|
247 |
+
inputs=delete_prompt_keyword_input,
|
248 |
+
outputs=delete_prompt_result
|
249 |
+
)
|
250 |
+
|
251 |
+
# Export Keywords Tab
|
252 |
+
with gr.TabItem("Export Keywords"):
|
253 |
+
with gr.Column():
|
254 |
+
export_prompt_keywords_btn = gr.Button("Export Prompt Keywords")
|
255 |
+
export_prompt_status = gr.Textbox(label="Export Status", interactive=False)
|
256 |
+
export_prompt_file = gr.File(label="Download Exported Keywords", interactive=False)
|
257 |
+
|
258 |
+
def handle_export():
|
259 |
+
status, file_path = export_prompt_keywords_to_csv()
|
260 |
+
if file_path:
|
261 |
+
return status, file_path
|
262 |
+
return status, None
|
263 |
+
|
264 |
+
export_prompt_keywords_btn.click(
|
265 |
+
fn=handle_export,
|
266 |
+
outputs=[export_prompt_status, export_prompt_file]
|
267 |
+
)
|
268 |
+
#
|
269 |
+
# End of Prompt Keywords tab
|
270 |
+
############################################################
|
271 |
+
|
272 |
+
|
273 |
+
############################################################
|
274 |
+
#
|
275 |
+
# Meta-Keywords functions
|
276 |
+
|
277 |
+
def create_meta_keywords_tab():
|
278 |
+
"""Creates the Meta-Keywords management tab"""
|
279 |
+
with gr.Tab("Meta-Keywords"):
|
280 |
+
gr.Markdown("# Meta-Keywords Management")
|
281 |
+
|
282 |
+
with gr.Tabs():
|
283 |
+
# View Meta-Keywords Tab
|
284 |
+
with gr.TabItem("View Collections"):
|
285 |
+
with gr.Column():
|
286 |
+
refresh_collections = gr.Button("Refresh Collections")
|
287 |
+
collections_output = gr.Markdown()
|
288 |
+
|
289 |
+
def view_collections():
|
290 |
+
try:
|
291 |
+
collections, _, _ = get_all_collections()
|
292 |
+
if collections:
|
293 |
+
result = "### Keyword Collections:\n"
|
294 |
+
for collection in collections:
|
295 |
+
keywords = get_keywords_for_collection(collection)
|
296 |
+
result += f"\n**{collection}**:\n"
|
297 |
+
result += "\n".join([f"- {k}" for k in keywords])
|
298 |
+
result += "\n"
|
299 |
+
return result
|
300 |
+
return "No collections found."
|
301 |
+
except Exception as e:
|
302 |
+
return f"Error retrieving collections: {str(e)}"
|
303 |
+
|
304 |
+
refresh_collections.click(
|
305 |
+
fn=view_collections,
|
306 |
+
outputs=collections_output
|
307 |
+
)
|
308 |
+
|
309 |
+
# Create Collection Tab
|
310 |
+
with gr.TabItem("Create Collection"):
|
311 |
+
with gr.Column():
|
312 |
+
collection_name = gr.Textbox(label="Collection Name")
|
313 |
+
create_collection_btn = gr.Button("Create Collection")
|
314 |
+
create_result = gr.Markdown()
|
315 |
+
|
316 |
+
def create_collection(name: str):
|
317 |
+
try:
|
318 |
+
create_keyword_collection(name)
|
319 |
+
return f"Successfully created collection: {name}"
|
320 |
+
except Exception as e:
|
321 |
+
return f"Error creating collection: {str(e)}"
|
322 |
+
|
323 |
+
create_collection_btn.click(
|
324 |
+
fn=create_collection,
|
325 |
+
inputs=collection_name,
|
326 |
+
outputs=create_result
|
327 |
+
)
|
328 |
+
|
329 |
+
# Add Keywords to Collection Tab
|
330 |
+
with gr.TabItem("Add to Collection"):
|
331 |
+
with gr.Column():
|
332 |
+
collection_select = gr.Textbox(label="Collection Name")
|
333 |
+
keywords_to_add = gr.Textbox(label="Keywords to Add (comma-separated)")
|
334 |
+
add_to_collection_btn = gr.Button("Add Keywords to Collection")
|
335 |
+
add_to_collection_result = gr.Markdown()
|
336 |
+
|
337 |
+
def add_keywords_to_collection(collection: str, keywords: str):
|
338 |
+
try:
|
339 |
+
keywords_list = [k.strip() for k in keywords.split(",") if k.strip()]
|
340 |
+
for keyword in keywords_list:
|
341 |
+
add_keyword_to_collection(collection, keyword)
|
342 |
+
return f"Successfully added {len(keywords_list)} keywords to collection {collection}"
|
343 |
+
except Exception as e:
|
344 |
+
return f"Error adding keywords to collection: {str(e)}"
|
345 |
+
|
346 |
+
add_to_collection_btn.click(
|
347 |
+
fn=add_keywords_to_collection,
|
348 |
+
inputs=[collection_select, keywords_to_add],
|
349 |
+
outputs=add_to_collection_result
|
350 |
+
)
|
351 |
+
|
352 |
+
#
|
353 |
+
# End of Meta-Keywords tab
|
354 |
+
##########################################################
|
355 |
+
|
356 |
+
#
|
357 |
+
# End of Keywords.py
|
358 |
+
######################################################################################################################
|
App_Function_Libraries/Gradio_UI/Live_Recording.py
CHANGED
@@ -13,6 +13,8 @@ from App_Function_Libraries.Audio.Audio_Transcription_Lib import (record_audio,
|
|
13 |
stop_recording)
|
14 |
from App_Function_Libraries.DB.DB_Manager import add_media_to_database
|
15 |
from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
|
|
|
|
|
16 |
#
|
17 |
#######################################################################################################################
|
18 |
#
|
@@ -22,6 +24,16 @@ whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large",
|
|
22 |
"distil-large-v2", "distil-medium.en", "distil-small.en"]
|
23 |
|
24 |
def create_live_recording_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
with gr.Tab("Live Recording and Transcription", visible=True):
|
26 |
gr.Markdown("# Live Audio Recording and Transcription")
|
27 |
with gr.Row():
|
@@ -34,6 +46,13 @@ def create_live_recording_tab():
|
|
34 |
custom_title = gr.Textbox(label="Custom Title (for database)", visible=False)
|
35 |
record_button = gr.Button("Start Recording")
|
36 |
stop_button = gr.Button("Stop Recording")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
with gr.Column():
|
38 |
output = gr.Textbox(label="Transcription", lines=10)
|
39 |
audio_output = gr.Audio(label="Recorded Audio", visible=False)
|
|
|
13 |
stop_recording)
|
14 |
from App_Function_Libraries.DB.DB_Manager import add_media_to_database
|
15 |
from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
|
16 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
17 |
+
|
18 |
#
|
19 |
#######################################################################################################################
|
20 |
#
|
|
|
24 |
"distil-large-v2", "distil-medium.en", "distil-small.en"]
|
25 |
|
26 |
def create_live_recording_tab():
|
27 |
+
try:
|
28 |
+
default_value = None
|
29 |
+
if default_api_endpoint:
|
30 |
+
if default_api_endpoint in global_api_endpoints:
|
31 |
+
default_value = format_api_name(default_api_endpoint)
|
32 |
+
else:
|
33 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
34 |
+
except Exception as e:
|
35 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
36 |
+
default_value = None
|
37 |
with gr.Tab("Live Recording and Transcription", visible=True):
|
38 |
gr.Markdown("# Live Audio Recording and Transcription")
|
39 |
with gr.Row():
|
|
|
46 |
custom_title = gr.Textbox(label="Custom Title (for database)", visible=False)
|
47 |
record_button = gr.Button("Start Recording")
|
48 |
stop_button = gr.Button("Stop Recording")
|
49 |
+
# FIXME - Add a button to perform analysis/summarization on the transcription
|
50 |
+
# Refactored API selection dropdown
|
51 |
+
# api_name_input = gr.Dropdown(
|
52 |
+
# choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
53 |
+
# value=default_value,
|
54 |
+
# label="API for Summarization (Optional)"
|
55 |
+
# )
|
56 |
with gr.Column():
|
57 |
output = gr.Textbox(label="Transcription", lines=10)
|
58 |
audio_output = gr.Audio(label="Recorded Audio", visible=False)
|
App_Function_Libraries/Gradio_UI/Llamafile_tab.py
ADDED
@@ -0,0 +1,327 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Llamafile_tab.py
|
2 |
+
# Description: Gradio interface for configuring and launching Llamafile with Local LLMs
|
3 |
+
|
4 |
+
# Imports
|
5 |
+
import os
|
6 |
+
import logging
|
7 |
+
from typing import Tuple, Optional
|
8 |
+
import gradio as gr
|
9 |
+
|
10 |
+
|
11 |
+
from App_Function_Libraries.Local_LLM.Local_LLM_Inference_Engine_Lib import (
|
12 |
+
download_llm_model,
|
13 |
+
llm_models,
|
14 |
+
start_llamafile,
|
15 |
+
get_gguf_llamafile_files
|
16 |
+
)
|
17 |
+
#
|
18 |
+
#######################################################################################################################
|
19 |
+
#
|
20 |
+
# Functions:
|
21 |
+
|
22 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
23 |
+
MODELS_DIR = os.path.join(BASE_DIR, "Models")
|
24 |
+
|
25 |
+
def create_chat_with_llamafile_tab():
|
26 |
+
# Function to update model path based on selection
|
27 |
+
def on_local_model_change(selected_model: str, search_directory: str) -> str:
|
28 |
+
if selected_model and isinstance(search_directory, str):
|
29 |
+
model_path = os.path.abspath(os.path.join(search_directory, selected_model))
|
30 |
+
logging.debug(f"Selected model path: {model_path}") # Debug print for selected model path
|
31 |
+
return model_path
|
32 |
+
return "Invalid selection or directory."
|
33 |
+
|
34 |
+
# Function to update the dropdown with available models
|
35 |
+
def update_dropdowns(search_directory: str) -> Tuple[dict, str]:
|
36 |
+
logging.debug(f"User-entered directory: {search_directory}") # Debug print for directory
|
37 |
+
if not os.path.isdir(search_directory):
|
38 |
+
logging.debug(f"Directory does not exist: {search_directory}") # Debug print for non-existing directory
|
39 |
+
return gr.update(choices=[], value=None), "Directory does not exist."
|
40 |
+
|
41 |
+
try:
|
42 |
+
logging.debug(f"Directory exists: {search_directory}, scanning for files...") # Confirm directory exists
|
43 |
+
model_files = get_gguf_llamafile_files(search_directory)
|
44 |
+
logging.debug("Completed scanning for model files.")
|
45 |
+
except Exception as e:
|
46 |
+
logging.error(f"Error scanning directory: {e}")
|
47 |
+
return gr.update(choices=[], value=None), f"Error scanning directory: {e}"
|
48 |
+
|
49 |
+
if not model_files:
|
50 |
+
logging.debug(f"No model files found in {search_directory}") # Debug print for no files found
|
51 |
+
return gr.update(choices=[], value=None), "No model files found in the specified directory."
|
52 |
+
|
53 |
+
# Update the dropdown choices with the model files found
|
54 |
+
logging.debug(f"Models loaded from {search_directory}: {model_files}") # Debug: Print model files loaded
|
55 |
+
return gr.update(choices=model_files, value=None), f"Models loaded from {search_directory}."
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
def download_preset_model(selected_model: str) -> Tuple[str, str]:
|
60 |
+
"""
|
61 |
+
Downloads the selected preset model.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
selected_model (str): The key of the selected preset model.
|
65 |
+
|
66 |
+
Returns:
|
67 |
+
Tuple[str, str]: Status message and the path to the downloaded model.
|
68 |
+
"""
|
69 |
+
model_info = llm_models.get(selected_model)
|
70 |
+
if not model_info:
|
71 |
+
return "Invalid model selection.", ""
|
72 |
+
|
73 |
+
try:
|
74 |
+
model_path = download_llm_model(
|
75 |
+
model_name=model_info["name"],
|
76 |
+
model_url=model_info["url"],
|
77 |
+
model_filename=model_info["filename"],
|
78 |
+
model_hash=model_info["hash"]
|
79 |
+
)
|
80 |
+
return f"Model '{model_info['name']}' downloaded successfully.", model_path
|
81 |
+
except Exception as e:
|
82 |
+
logging.error(f"Error downloading model: {e}")
|
83 |
+
return f"Failed to download model: {e}", ""
|
84 |
+
|
85 |
+
with gr.TabItem("Local LLM with Llamafile", visible=True):
|
86 |
+
gr.Markdown("# Settings for Llamafile")
|
87 |
+
|
88 |
+
with gr.Row():
|
89 |
+
with gr.Column():
|
90 |
+
am_noob = gr.Checkbox(label="Enable Sane Defaults", value=False, visible=True)
|
91 |
+
advanced_mode_toggle = gr.Checkbox(label="Advanced Mode - Show All Settings", value=False)
|
92 |
+
# Advanced Inputs
|
93 |
+
verbose_checked = gr.Checkbox(label="Enable Verbose Output", value=False, visible=False)
|
94 |
+
threads_checked = gr.Checkbox(label="Set CPU Threads", value=False, visible=False)
|
95 |
+
threads_value = gr.Number(label="Number of CPU Threads", value=None, precision=0, visible=False)
|
96 |
+
threads_batched_checked = gr.Checkbox(label="Enable Batched Inference", value=False, visible=False)
|
97 |
+
threads_batched_value = gr.Number(label="Batch Size for Inference", value=None, precision=0, visible=False)
|
98 |
+
model_alias_checked = gr.Checkbox(label="Set Model Alias", value=False, visible=False)
|
99 |
+
model_alias_value = gr.Textbox(label="Model Alias", value="", visible=False)
|
100 |
+
ctx_size_checked = gr.Checkbox(label="Set Prompt Context Size", value=False, visible=False)
|
101 |
+
ctx_size_value = gr.Number(label="Prompt Context Size", value=8124, precision=0, visible=False)
|
102 |
+
ngl_checked = gr.Checkbox(label="Enable GPU Layers", value=False, visible=True)
|
103 |
+
ngl_value = gr.Number(label="Number of GPU Layers", value=None, precision=0, visible=True)
|
104 |
+
batch_size_checked = gr.Checkbox(label="Set Batch Size", value=False, visible=False)
|
105 |
+
batch_size_value = gr.Number(label="Batch Size", value=512, visible=False)
|
106 |
+
memory_f32_checked = gr.Checkbox(label="Use 32-bit Floating Point", value=False, visible=False)
|
107 |
+
numa_checked = gr.Checkbox(label="Enable NUMA", value=False, visible=False)
|
108 |
+
server_timeout_value = gr.Number(label="Server Timeout", value=600, precision=0, visible=False)
|
109 |
+
host_checked = gr.Checkbox(label="Set IP to Listen On", value=False, visible=False)
|
110 |
+
host_value = gr.Textbox(label="Host IP Address", value="", visible=False)
|
111 |
+
port_checked = gr.Checkbox(label="Set Server Port", value=False, visible=False)
|
112 |
+
port_value = gr.Number(label="Port Number", value=8080, precision=0, visible=False)
|
113 |
+
api_key_checked = gr.Checkbox(label="Set API Key", value=False, visible=False)
|
114 |
+
api_key_value = gr.Textbox(label="API Key", value="", visible=False)
|
115 |
+
http_threads_checked = gr.Checkbox(label="Set HTTP Server Threads", value=False, visible=False)
|
116 |
+
http_threads_value = gr.Number(label="Number of HTTP Server Threads", value=None, precision=0, visible=False)
|
117 |
+
hf_repo_checked = gr.Checkbox(label="Use Huggingface Repo Model", value=False, visible=False)
|
118 |
+
hf_repo_value = gr.Textbox(label="Huggingface Repo Name", value="", visible=False)
|
119 |
+
hf_file_checked = gr.Checkbox(label="Set Huggingface Model File", value=False, visible=False)
|
120 |
+
hf_file_value = gr.Textbox(label="Huggingface Model File", value="", visible=False)
|
121 |
+
|
122 |
+
with gr.Column():
|
123 |
+
# Model Selection Section
|
124 |
+
gr.Markdown("## Model Selection")
|
125 |
+
|
126 |
+
# Option 1: Select from Local Filesystem
|
127 |
+
with gr.Row():
|
128 |
+
search_directory = gr.Textbox(
|
129 |
+
label="Model Directory",
|
130 |
+
placeholder="Enter directory path (currently './Models')",
|
131 |
+
value=MODELS_DIR,
|
132 |
+
interactive=True
|
133 |
+
)
|
134 |
+
|
135 |
+
# Initial population of local models
|
136 |
+
initial_dropdown_update, _ = update_dropdowns(MODELS_DIR)
|
137 |
+
logging.debug(f"Scanning directory: {MODELS_DIR}")
|
138 |
+
refresh_button = gr.Button("Refresh Models")
|
139 |
+
local_model_dropdown = gr.Dropdown(
|
140 |
+
label="Select Model from Directory",
|
141 |
+
choices=initial_dropdown_update["choices"],
|
142 |
+
value=None
|
143 |
+
)
|
144 |
+
# Display selected model path
|
145 |
+
model_value = gr.Textbox(label="Selected Model File Path", value="", interactive=False)
|
146 |
+
|
147 |
+
# Option 2: Download Preset Models
|
148 |
+
gr.Markdown("## Download Preset Models")
|
149 |
+
|
150 |
+
preset_model_dropdown = gr.Dropdown(
|
151 |
+
label="Select a Preset Model",
|
152 |
+
choices=list(llm_models.keys()),
|
153 |
+
value=None,
|
154 |
+
interactive=True,
|
155 |
+
info="Choose a preset model to download."
|
156 |
+
)
|
157 |
+
download_preset_button = gr.Button("Download Selected Preset")
|
158 |
+
|
159 |
+
with gr.Row():
|
160 |
+
with gr.Column():
|
161 |
+
start_button = gr.Button("Start Llamafile")
|
162 |
+
stop_button = gr.Button("Stop Llamafile (doesn't work)")
|
163 |
+
output_display = gr.Markdown()
|
164 |
+
|
165 |
+
|
166 |
+
# Show/hide advanced inputs based on toggle
|
167 |
+
def update_visibility(show_advanced: bool):
|
168 |
+
components = [
|
169 |
+
verbose_checked, threads_checked, threads_value,
|
170 |
+
http_threads_checked, http_threads_value,
|
171 |
+
hf_repo_checked, hf_repo_value,
|
172 |
+
hf_file_checked, hf_file_value,
|
173 |
+
ctx_size_checked, ctx_size_value,
|
174 |
+
ngl_checked, ngl_value,
|
175 |
+
host_checked, host_value,
|
176 |
+
port_checked, port_value
|
177 |
+
]
|
178 |
+
return [gr.update(visible=show_advanced) for _ in components]
|
179 |
+
|
180 |
+
def on_start_button_click(
|
181 |
+
am_noob: bool,
|
182 |
+
verbose_checked: bool,
|
183 |
+
threads_checked: bool,
|
184 |
+
threads_value: Optional[int],
|
185 |
+
threads_batched_checked: bool,
|
186 |
+
threads_batched_value: Optional[int],
|
187 |
+
model_alias_checked: bool,
|
188 |
+
model_alias_value: str,
|
189 |
+
http_threads_checked: bool,
|
190 |
+
http_threads_value: Optional[int],
|
191 |
+
model_value: str,
|
192 |
+
hf_repo_checked: bool,
|
193 |
+
hf_repo_value: str,
|
194 |
+
hf_file_checked: bool,
|
195 |
+
hf_file_value: str,
|
196 |
+
ctx_size_checked: bool,
|
197 |
+
ctx_size_value: Optional[int],
|
198 |
+
ngl_checked: bool,
|
199 |
+
ngl_value: Optional[int],
|
200 |
+
batch_size_checked: bool,
|
201 |
+
batch_size_value: Optional[int],
|
202 |
+
memory_f32_checked: bool,
|
203 |
+
numa_checked: bool,
|
204 |
+
server_timeout_value: Optional[int],
|
205 |
+
host_checked: bool,
|
206 |
+
host_value: str,
|
207 |
+
port_checked: bool,
|
208 |
+
port_value: Optional[int],
|
209 |
+
api_key_checked: bool,
|
210 |
+
api_key_value: str
|
211 |
+
) -> str:
|
212 |
+
"""
|
213 |
+
Event handler for the Start Llamafile button.
|
214 |
+
"""
|
215 |
+
try:
|
216 |
+
result = start_llamafile(
|
217 |
+
am_noob,
|
218 |
+
verbose_checked,
|
219 |
+
threads_checked,
|
220 |
+
threads_value,
|
221 |
+
threads_batched_checked,
|
222 |
+
threads_batched_value,
|
223 |
+
model_alias_checked,
|
224 |
+
model_alias_value,
|
225 |
+
http_threads_checked,
|
226 |
+
http_threads_value,
|
227 |
+
model_value,
|
228 |
+
hf_repo_checked,
|
229 |
+
hf_repo_value,
|
230 |
+
hf_file_checked,
|
231 |
+
hf_file_value,
|
232 |
+
ctx_size_checked,
|
233 |
+
ctx_size_value,
|
234 |
+
ngl_checked,
|
235 |
+
ngl_value,
|
236 |
+
batch_size_checked,
|
237 |
+
batch_size_value,
|
238 |
+
memory_f32_checked,
|
239 |
+
numa_checked,
|
240 |
+
server_timeout_value,
|
241 |
+
host_checked,
|
242 |
+
host_value,
|
243 |
+
port_checked,
|
244 |
+
port_value,
|
245 |
+
api_key_checked,
|
246 |
+
api_key_value
|
247 |
+
)
|
248 |
+
return result
|
249 |
+
except Exception as e:
|
250 |
+
logging.error(f"Error starting Llamafile: {e}")
|
251 |
+
return f"Failed to start Llamafile: {e}"
|
252 |
+
|
253 |
+
advanced_mode_toggle.change(
|
254 |
+
fn=update_visibility,
|
255 |
+
inputs=[advanced_mode_toggle],
|
256 |
+
outputs=[
|
257 |
+
verbose_checked, threads_checked, threads_value,
|
258 |
+
http_threads_checked, http_threads_value,
|
259 |
+
hf_repo_checked, hf_repo_value,
|
260 |
+
hf_file_checked, hf_file_value,
|
261 |
+
ctx_size_checked, ctx_size_value,
|
262 |
+
ngl_checked, ngl_value,
|
263 |
+
host_checked, host_value,
|
264 |
+
port_checked, port_value
|
265 |
+
]
|
266 |
+
)
|
267 |
+
|
268 |
+
start_button.click(
|
269 |
+
fn=on_start_button_click,
|
270 |
+
inputs=[
|
271 |
+
am_noob,
|
272 |
+
verbose_checked,
|
273 |
+
threads_checked,
|
274 |
+
threads_value,
|
275 |
+
threads_batched_checked,
|
276 |
+
threads_batched_value,
|
277 |
+
model_alias_checked,
|
278 |
+
model_alias_value,
|
279 |
+
http_threads_checked,
|
280 |
+
http_threads_value,
|
281 |
+
model_value,
|
282 |
+
hf_repo_checked,
|
283 |
+
hf_repo_value,
|
284 |
+
hf_file_checked,
|
285 |
+
hf_file_value,
|
286 |
+
ctx_size_checked,
|
287 |
+
ctx_size_value,
|
288 |
+
ngl_checked,
|
289 |
+
ngl_value,
|
290 |
+
batch_size_checked,
|
291 |
+
batch_size_value,
|
292 |
+
memory_f32_checked,
|
293 |
+
numa_checked,
|
294 |
+
server_timeout_value,
|
295 |
+
host_checked,
|
296 |
+
host_value,
|
297 |
+
port_checked,
|
298 |
+
port_value,
|
299 |
+
api_key_checked,
|
300 |
+
api_key_value
|
301 |
+
],
|
302 |
+
outputs=output_display
|
303 |
+
)
|
304 |
+
|
305 |
+
download_preset_button.click(
|
306 |
+
fn=download_preset_model,
|
307 |
+
inputs=[preset_model_dropdown],
|
308 |
+
outputs=[output_display, model_value]
|
309 |
+
)
|
310 |
+
|
311 |
+
# Click event for refreshing models
|
312 |
+
refresh_button.click(
|
313 |
+
fn=update_dropdowns,
|
314 |
+
inputs=[search_directory], # Ensure that the directory path (string) is passed
|
315 |
+
outputs=[local_model_dropdown, output_display] # Update dropdown and status
|
316 |
+
)
|
317 |
+
|
318 |
+
# Event to update model_value when a model is selected from the dropdown
|
319 |
+
local_model_dropdown.change(
|
320 |
+
fn=on_local_model_change, # Function that calculates the model path
|
321 |
+
inputs=[local_model_dropdown, search_directory], # Inputs: selected model and directory
|
322 |
+
outputs=[model_value] # Output: Update the model_value textbox with the selected model path
|
323 |
+
)
|
324 |
+
|
325 |
+
#
|
326 |
+
#
|
327 |
+
#######################################################################################################################
|
App_Function_Libraries/Gradio_UI/Media_edit.py
CHANGED
@@ -10,13 +10,13 @@ import gradio as gr
|
|
10 |
#
|
11 |
# Local Imports
|
12 |
from App_Function_Libraries.DB.DB_Manager import add_prompt, update_media_content, db, add_or_update_prompt, \
|
13 |
-
load_prompt_details, fetch_keywords_for_media, update_keywords_for_media
|
14 |
-
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown
|
15 |
from App_Function_Libraries.DB.SQLite_DB import fetch_item_details
|
16 |
|
17 |
|
18 |
def create_media_edit_tab():
|
19 |
-
with gr.TabItem("Edit Existing Items", visible=True):
|
20 |
gr.Markdown("# Search and Edit Media Items")
|
21 |
|
22 |
with gr.Row():
|
@@ -89,7 +89,7 @@ def create_media_edit_tab():
|
|
89 |
|
90 |
|
91 |
def create_media_edit_and_clone_tab():
|
92 |
-
with gr.TabItem("Clone and Edit Existing Items", visible=True):
|
93 |
gr.Markdown("# Search, Edit, and Clone Existing Items")
|
94 |
|
95 |
with gr.Row():
|
@@ -199,6 +199,11 @@ def create_media_edit_and_clone_tab():
|
|
199 |
|
200 |
|
201 |
def create_prompt_edit_tab():
|
|
|
|
|
|
|
|
|
|
|
202 |
with gr.TabItem("Add & Edit Prompts", visible=True):
|
203 |
with gr.Row():
|
204 |
with gr.Column():
|
@@ -207,38 +212,145 @@ def create_prompt_edit_tab():
|
|
207 |
choices=[],
|
208 |
interactive=True
|
209 |
)
|
|
|
|
|
|
|
210 |
prompt_list_button = gr.Button("List Prompts")
|
211 |
|
212 |
with gr.Column():
|
213 |
title_input = gr.Textbox(label="Title", placeholder="Enter the prompt title")
|
214 |
-
author_input = gr.Textbox(label="Author", placeholder="Enter the prompt's author", lines=
|
215 |
description_input = gr.Textbox(label="Description", placeholder="Enter the prompt description", lines=3)
|
216 |
system_prompt_input = gr.Textbox(label="System Prompt", placeholder="Enter the system prompt", lines=3)
|
217 |
user_prompt_input = gr.Textbox(label="User Prompt", placeholder="Enter the user prompt", lines=3)
|
218 |
add_prompt_button = gr.Button("Add/Update Prompt")
|
219 |
add_prompt_output = gr.HTML()
|
220 |
|
221 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
prompt_list_button.click(
|
223 |
fn=update_prompt_dropdown,
|
224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
)
|
226 |
|
|
|
227 |
add_prompt_button.click(
|
228 |
fn=add_or_update_prompt,
|
229 |
inputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input],
|
230 |
-
outputs=add_prompt_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
)
|
232 |
|
233 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
prompt_dropdown.change(
|
235 |
fn=load_prompt_details,
|
236 |
inputs=[prompt_dropdown],
|
237 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
)
|
239 |
|
240 |
|
|
|
241 |
def create_prompt_clone_tab():
|
|
|
|
|
|
|
|
|
|
|
242 |
with gr.TabItem("Clone and Edit Prompts", visible=True):
|
243 |
with gr.Row():
|
244 |
with gr.Column():
|
@@ -248,6 +360,9 @@ def create_prompt_clone_tab():
|
|
248 |
choices=[],
|
249 |
interactive=True
|
250 |
)
|
|
|
|
|
|
|
251 |
prompt_list_button = gr.Button("List Prompts")
|
252 |
|
253 |
with gr.Column():
|
@@ -260,19 +375,99 @@ def create_prompt_clone_tab():
|
|
260 |
save_cloned_prompt_button = gr.Button("Save Cloned Prompt", visible=False)
|
261 |
add_prompt_output = gr.HTML()
|
262 |
|
263 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
prompt_list_button.click(
|
265 |
fn=update_prompt_dropdown,
|
266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
)
|
268 |
|
269 |
# Load prompt details when selected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
prompt_dropdown.change(
|
271 |
fn=load_prompt_details,
|
272 |
inputs=[prompt_dropdown],
|
273 |
outputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input]
|
274 |
)
|
275 |
|
|
|
276 |
def prepare_for_cloning(selected_prompt):
|
277 |
if selected_prompt:
|
278 |
return gr.update(value=f"Copy of {selected_prompt}"), gr.update(visible=True)
|
@@ -284,18 +479,21 @@ def create_prompt_clone_tab():
|
|
284 |
outputs=[title_input, save_cloned_prompt_button]
|
285 |
)
|
286 |
|
287 |
-
|
|
|
288 |
try:
|
289 |
-
result = add_prompt(title, description, system_prompt, user_prompt)
|
290 |
if result == "Prompt added successfully.":
|
291 |
-
|
|
|
|
|
292 |
else:
|
293 |
-
return result, gr.update()
|
294 |
except Exception as e:
|
295 |
-
return f"Error saving cloned prompt: {str(e)}", gr.update()
|
296 |
|
297 |
save_cloned_prompt_button.click(
|
298 |
fn=save_cloned_prompt,
|
299 |
-
inputs=[title_input, description_input, system_prompt_input, user_prompt_input],
|
300 |
-
outputs=[add_prompt_output, prompt_dropdown]
|
301 |
-
)
|
|
|
10 |
#
|
11 |
# Local Imports
|
12 |
from App_Function_Libraries.DB.DB_Manager import add_prompt, update_media_content, db, add_or_update_prompt, \
|
13 |
+
load_prompt_details, fetch_keywords_for_media, update_keywords_for_media, fetch_prompt_details, list_prompts
|
14 |
+
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown
|
15 |
from App_Function_Libraries.DB.SQLite_DB import fetch_item_details
|
16 |
|
17 |
|
18 |
def create_media_edit_tab():
|
19 |
+
with gr.TabItem("Edit Existing Items in the Media DB", visible=True):
|
20 |
gr.Markdown("# Search and Edit Media Items")
|
21 |
|
22 |
with gr.Row():
|
|
|
89 |
|
90 |
|
91 |
def create_media_edit_and_clone_tab():
|
92 |
+
with gr.TabItem("Clone and Edit Existing Items in the Media DB", visible=True):
|
93 |
gr.Markdown("# Search, Edit, and Clone Existing Items")
|
94 |
|
95 |
with gr.Row():
|
|
|
199 |
|
200 |
|
201 |
def create_prompt_edit_tab():
|
202 |
+
# Initialize state variables for pagination
|
203 |
+
current_page_state = gr.State(value=1)
|
204 |
+
total_pages_state = gr.State(value=1)
|
205 |
+
per_page = 10 # Number of prompts per page
|
206 |
+
|
207 |
with gr.TabItem("Add & Edit Prompts", visible=True):
|
208 |
with gr.Row():
|
209 |
with gr.Column():
|
|
|
212 |
choices=[],
|
213 |
interactive=True
|
214 |
)
|
215 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
216 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
217 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
218 |
prompt_list_button = gr.Button("List Prompts")
|
219 |
|
220 |
with gr.Column():
|
221 |
title_input = gr.Textbox(label="Title", placeholder="Enter the prompt title")
|
222 |
+
author_input = gr.Textbox(label="Author", placeholder="Enter the prompt's author", lines=1)
|
223 |
description_input = gr.Textbox(label="Description", placeholder="Enter the prompt description", lines=3)
|
224 |
system_prompt_input = gr.Textbox(label="System Prompt", placeholder="Enter the system prompt", lines=3)
|
225 |
user_prompt_input = gr.Textbox(label="User Prompt", placeholder="Enter the user prompt", lines=3)
|
226 |
add_prompt_button = gr.Button("Add/Update Prompt")
|
227 |
add_prompt_output = gr.HTML()
|
228 |
|
229 |
+
# Function to update the prompt dropdown with pagination
|
230 |
+
def update_prompt_dropdown(page=1):
|
231 |
+
prompts, total_pages, current_page = list_prompts(page=page, per_page=per_page)
|
232 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
233 |
+
prev_button_visible = current_page > 1
|
234 |
+
next_button_visible = current_page < total_pages
|
235 |
+
return (
|
236 |
+
gr.update(choices=prompts),
|
237 |
+
gr.update(value=page_display_text, visible=True),
|
238 |
+
gr.update(visible=prev_button_visible),
|
239 |
+
gr.update(visible=next_button_visible),
|
240 |
+
current_page,
|
241 |
+
total_pages
|
242 |
+
)
|
243 |
+
|
244 |
+
# Event handler for listing prompts
|
245 |
prompt_list_button.click(
|
246 |
fn=update_prompt_dropdown,
|
247 |
+
inputs=[],
|
248 |
+
outputs=[
|
249 |
+
prompt_dropdown,
|
250 |
+
page_display,
|
251 |
+
prev_page_button,
|
252 |
+
next_page_button,
|
253 |
+
current_page_state,
|
254 |
+
total_pages_state
|
255 |
+
]
|
256 |
+
)
|
257 |
+
|
258 |
+
# Functions to handle pagination
|
259 |
+
def on_prev_page_click(current_page):
|
260 |
+
new_page = max(current_page - 1, 1)
|
261 |
+
return update_prompt_dropdown(page=new_page)
|
262 |
+
|
263 |
+
def on_next_page_click(current_page, total_pages):
|
264 |
+
new_page = min(current_page + 1, total_pages)
|
265 |
+
return update_prompt_dropdown(page=new_page)
|
266 |
+
|
267 |
+
# Event handlers for pagination buttons
|
268 |
+
prev_page_button.click(
|
269 |
+
fn=on_prev_page_click,
|
270 |
+
inputs=[current_page_state],
|
271 |
+
outputs=[
|
272 |
+
prompt_dropdown,
|
273 |
+
page_display,
|
274 |
+
prev_page_button,
|
275 |
+
next_page_button,
|
276 |
+
current_page_state,
|
277 |
+
total_pages_state
|
278 |
+
]
|
279 |
+
)
|
280 |
+
|
281 |
+
next_page_button.click(
|
282 |
+
fn=on_next_page_click,
|
283 |
+
inputs=[current_page_state, total_pages_state],
|
284 |
+
outputs=[
|
285 |
+
prompt_dropdown,
|
286 |
+
page_display,
|
287 |
+
prev_page_button,
|
288 |
+
next_page_button,
|
289 |
+
current_page_state,
|
290 |
+
total_pages_state
|
291 |
+
]
|
292 |
)
|
293 |
|
294 |
+
# Event handler for adding or updating a prompt
|
295 |
add_prompt_button.click(
|
296 |
fn=add_or_update_prompt,
|
297 |
inputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input],
|
298 |
+
outputs=[add_prompt_output]
|
299 |
+
).then(
|
300 |
+
fn=update_prompt_dropdown,
|
301 |
+
inputs=[],
|
302 |
+
outputs=[
|
303 |
+
prompt_dropdown,
|
304 |
+
page_display,
|
305 |
+
prev_page_button,
|
306 |
+
next_page_button,
|
307 |
+
current_page_state,
|
308 |
+
total_pages_state
|
309 |
+
]
|
310 |
)
|
311 |
|
312 |
+
# Function to load prompt details when a prompt is selected
|
313 |
+
def load_prompt_details(selected_prompt):
|
314 |
+
details = fetch_prompt_details(selected_prompt)
|
315 |
+
if details:
|
316 |
+
title, author, description, system_prompt, user_prompt, keywords = details
|
317 |
+
return (
|
318 |
+
gr.update(value=title),
|
319 |
+
gr.update(value=author or ""),
|
320 |
+
gr.update(value=description or ""),
|
321 |
+
gr.update(value=system_prompt or ""),
|
322 |
+
gr.update(value=user_prompt or "")
|
323 |
+
)
|
324 |
+
else:
|
325 |
+
return (
|
326 |
+
gr.update(value=""),
|
327 |
+
gr.update(value=""),
|
328 |
+
gr.update(value=""),
|
329 |
+
gr.update(value=""),
|
330 |
+
gr.update(value="")
|
331 |
+
)
|
332 |
+
|
333 |
+
# Event handler for prompt selection change
|
334 |
prompt_dropdown.change(
|
335 |
fn=load_prompt_details,
|
336 |
inputs=[prompt_dropdown],
|
337 |
+
outputs=[
|
338 |
+
title_input,
|
339 |
+
author_input,
|
340 |
+
description_input,
|
341 |
+
system_prompt_input,
|
342 |
+
user_prompt_input
|
343 |
+
]
|
344 |
)
|
345 |
|
346 |
|
347 |
+
|
348 |
def create_prompt_clone_tab():
|
349 |
+
# Initialize state variables for pagination
|
350 |
+
current_page_state = gr.State(value=1)
|
351 |
+
total_pages_state = gr.State(value=1)
|
352 |
+
per_page = 10 # Number of prompts per page
|
353 |
+
|
354 |
with gr.TabItem("Clone and Edit Prompts", visible=True):
|
355 |
with gr.Row():
|
356 |
with gr.Column():
|
|
|
360 |
choices=[],
|
361 |
interactive=True
|
362 |
)
|
363 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
364 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
365 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
366 |
prompt_list_button = gr.Button("List Prompts")
|
367 |
|
368 |
with gr.Column():
|
|
|
375 |
save_cloned_prompt_button = gr.Button("Save Cloned Prompt", visible=False)
|
376 |
add_prompt_output = gr.HTML()
|
377 |
|
378 |
+
# Function to update the prompt dropdown with pagination
|
379 |
+
def update_prompt_dropdown(page=1):
|
380 |
+
prompts, total_pages, current_page = list_prompts(page=page, per_page=per_page)
|
381 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
382 |
+
prev_button_visible = current_page > 1
|
383 |
+
next_button_visible = current_page < total_pages
|
384 |
+
return (
|
385 |
+
gr.update(choices=prompts),
|
386 |
+
gr.update(value=page_display_text, visible=True),
|
387 |
+
gr.update(visible=prev_button_visible),
|
388 |
+
gr.update(visible=next_button_visible),
|
389 |
+
current_page,
|
390 |
+
total_pages
|
391 |
+
)
|
392 |
+
|
393 |
+
# Event handler for listing prompts
|
394 |
prompt_list_button.click(
|
395 |
fn=update_prompt_dropdown,
|
396 |
+
inputs=[],
|
397 |
+
outputs=[
|
398 |
+
prompt_dropdown,
|
399 |
+
page_display,
|
400 |
+
prev_page_button,
|
401 |
+
next_page_button,
|
402 |
+
current_page_state,
|
403 |
+
total_pages_state
|
404 |
+
]
|
405 |
+
)
|
406 |
+
|
407 |
+
# Functions to handle pagination
|
408 |
+
def on_prev_page_click(current_page):
|
409 |
+
new_page = max(current_page - 1, 1)
|
410 |
+
return update_prompt_dropdown(page=new_page)
|
411 |
+
|
412 |
+
def on_next_page_click(current_page, total_pages):
|
413 |
+
new_page = min(current_page + 1, total_pages)
|
414 |
+
return update_prompt_dropdown(page=new_page)
|
415 |
+
|
416 |
+
# Event handlers for pagination buttons
|
417 |
+
prev_page_button.click(
|
418 |
+
fn=on_prev_page_click,
|
419 |
+
inputs=[current_page_state],
|
420 |
+
outputs=[
|
421 |
+
prompt_dropdown,
|
422 |
+
page_display,
|
423 |
+
prev_page_button,
|
424 |
+
next_page_button,
|
425 |
+
current_page_state,
|
426 |
+
total_pages_state
|
427 |
+
]
|
428 |
+
)
|
429 |
+
|
430 |
+
next_page_button.click(
|
431 |
+
fn=on_next_page_click,
|
432 |
+
inputs=[current_page_state, total_pages_state],
|
433 |
+
outputs=[
|
434 |
+
prompt_dropdown,
|
435 |
+
page_display,
|
436 |
+
prev_page_button,
|
437 |
+
next_page_button,
|
438 |
+
current_page_state,
|
439 |
+
total_pages_state
|
440 |
+
]
|
441 |
)
|
442 |
|
443 |
# Load prompt details when selected
|
444 |
+
def load_prompt_details(selected_prompt):
|
445 |
+
if selected_prompt:
|
446 |
+
details = fetch_prompt_details(selected_prompt)
|
447 |
+
if details:
|
448 |
+
title, author, description, system_prompt, user_prompt, keywords = details
|
449 |
+
return (
|
450 |
+
gr.update(value=title),
|
451 |
+
gr.update(value=author or ""),
|
452 |
+
gr.update(value=description or ""),
|
453 |
+
gr.update(value=system_prompt or ""),
|
454 |
+
gr.update(value=user_prompt or "")
|
455 |
+
)
|
456 |
+
return (
|
457 |
+
gr.update(value=""),
|
458 |
+
gr.update(value=""),
|
459 |
+
gr.update(value=""),
|
460 |
+
gr.update(value=""),
|
461 |
+
gr.update(value="")
|
462 |
+
)
|
463 |
+
|
464 |
prompt_dropdown.change(
|
465 |
fn=load_prompt_details,
|
466 |
inputs=[prompt_dropdown],
|
467 |
outputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input]
|
468 |
)
|
469 |
|
470 |
+
# Prepare for cloning
|
471 |
def prepare_for_cloning(selected_prompt):
|
472 |
if selected_prompt:
|
473 |
return gr.update(value=f"Copy of {selected_prompt}"), gr.update(visible=True)
|
|
|
479 |
outputs=[title_input, save_cloned_prompt_button]
|
480 |
)
|
481 |
|
482 |
+
# Function to save cloned prompt
|
483 |
+
def save_cloned_prompt(title, author, description, system_prompt, user_prompt, current_page):
|
484 |
try:
|
485 |
+
result = add_prompt(title, author, description, system_prompt, user_prompt)
|
486 |
if result == "Prompt added successfully.":
|
487 |
+
# After adding, refresh the prompt dropdown
|
488 |
+
prompt_dropdown_update = update_prompt_dropdown(page=current_page)
|
489 |
+
return (result, *prompt_dropdown_update)
|
490 |
else:
|
491 |
+
return (result, gr.update(), gr.update(), gr.update(), gr.update(), current_page, total_pages_state.value)
|
492 |
except Exception as e:
|
493 |
+
return (f"Error saving cloned prompt: {str(e)}", gr.update(), gr.update(), gr.update(), gr.update(), current_page, total_pages_state.value)
|
494 |
|
495 |
save_cloned_prompt_button.click(
|
496 |
fn=save_cloned_prompt,
|
497 |
+
inputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input, current_page_state],
|
498 |
+
outputs=[add_prompt_output, prompt_dropdown, page_display, prev_page_button, next_page_button, current_page_state, total_pages_state]
|
499 |
+
)
|
App_Function_Libraries/Gradio_UI/Media_wiki_tab.py
CHANGED
@@ -32,6 +32,13 @@ def create_mediawiki_import_tab():
|
|
32 |
value="sentences",
|
33 |
label="Chunking Method"
|
34 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
chunk_size = gr.Slider(minimum=100, maximum=2000, value=1000, step=100, label="Chunk Size")
|
36 |
chunk_overlap = gr.Slider(minimum=0, maximum=500, value=100, step=10, label="Chunk Overlap")
|
37 |
# FIXME - Add checkbox for 'Enable Summarization upon ingestion' for API summarization of chunks
|
|
|
32 |
value="sentences",
|
33 |
label="Chunking Method"
|
34 |
)
|
35 |
+
# FIXME - add API selection dropdown + Analysis/Summarization options
|
36 |
+
# Refactored API selection dropdown
|
37 |
+
# api_name_input = gr.Dropdown(
|
38 |
+
# choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
39 |
+
# value=default_value,
|
40 |
+
# label="API for Summarization (Optional)"
|
41 |
+
# )
|
42 |
chunk_size = gr.Slider(minimum=100, maximum=2000, value=1000, step=100, label="Chunk Size")
|
43 |
chunk_overlap = gr.Slider(minimum=0, maximum=500, value=100, step=10, label="Chunk Overlap")
|
44 |
# FIXME - Add checkbox for 'Enable Summarization upon ingestion' for API summarization of chunks
|
App_Function_Libraries/Gradio_UI/Mind_Map_tab.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Mind_Map_tab.py
|
2 |
+
# Description: File contains functions for generation of PlantUML mindmaps for the gradio tab
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
import re
|
6 |
+
#
|
7 |
+
# External Libraries
|
8 |
+
import gradio as gr
|
9 |
+
#
|
10 |
+
######################################################################################################################
|
11 |
+
#
|
12 |
+
# Functions:
|
13 |
+
|
14 |
+
def parse_plantuml_mindmap(plantuml_text: str) -> dict:
|
15 |
+
"""Parse PlantUML mindmap syntax into a nested dictionary structure"""
|
16 |
+
lines = [line.strip() for line in plantuml_text.split('\n')
|
17 |
+
if line.strip() and not line.strip().startswith('@')]
|
18 |
+
|
19 |
+
root = None
|
20 |
+
nodes = []
|
21 |
+
stack = []
|
22 |
+
|
23 |
+
for line in lines:
|
24 |
+
level_match = re.match(r'^([+\-*]+|\*+)', line)
|
25 |
+
if not level_match:
|
26 |
+
continue
|
27 |
+
level = len(level_match.group(0))
|
28 |
+
text = re.sub(r'^([+\-*]+|\*+)\s*', '', line).strip('[]').strip('()')
|
29 |
+
node = {'text': text, 'children': []}
|
30 |
+
|
31 |
+
while stack and stack[-1][0] >= level:
|
32 |
+
stack.pop()
|
33 |
+
|
34 |
+
if stack:
|
35 |
+
stack[-1][1]['children'].append(node)
|
36 |
+
else:
|
37 |
+
root = node
|
38 |
+
|
39 |
+
stack.append((level, node))
|
40 |
+
|
41 |
+
return root
|
42 |
+
|
43 |
+
def create_mindmap_html(plantuml_text: str) -> str:
|
44 |
+
"""Convert PlantUML mindmap to HTML visualization with collapsible nodes using CSS only"""
|
45 |
+
# Parse the mindmap text into a nested structure
|
46 |
+
root_node = parse_plantuml_mindmap(plantuml_text)
|
47 |
+
if not root_node:
|
48 |
+
return "<p>No valid mindmap content provided.</p>"
|
49 |
+
|
50 |
+
html = "<style>"
|
51 |
+
html += """
|
52 |
+
details {
|
53 |
+
margin-left: 20px;
|
54 |
+
}
|
55 |
+
summary {
|
56 |
+
cursor: pointer;
|
57 |
+
padding: 5px;
|
58 |
+
border: 1px solid #333;
|
59 |
+
border-radius: 3px;
|
60 |
+
background-color: #e6f3ff;
|
61 |
+
}
|
62 |
+
.mindmap-node {
|
63 |
+
margin-left: 20px;
|
64 |
+
padding: 5px;
|
65 |
+
border: 1px solid #333;
|
66 |
+
border-radius: 3px;
|
67 |
+
}
|
68 |
+
"""
|
69 |
+
html += "</style>"
|
70 |
+
|
71 |
+
colors = ['#e6f3ff', '#f0f7ff', '#f5f5f5', '#fff0f0', '#f0fff0']
|
72 |
+
|
73 |
+
def create_node_html(node, level):
|
74 |
+
bg_color = colors[(level - 1) % len(colors)]
|
75 |
+
if node['children']:
|
76 |
+
children_html = ''.join(create_node_html(child, level + 1) for child in node['children'])
|
77 |
+
return f"""
|
78 |
+
<details open>
|
79 |
+
<summary style="background-color: {bg_color};">{node['text']}</summary>
|
80 |
+
{children_html}
|
81 |
+
</details>
|
82 |
+
"""
|
83 |
+
else:
|
84 |
+
return f"""
|
85 |
+
<div class="mindmap-node" style="background-color: {bg_color}; margin-left: {level * 20}px;">
|
86 |
+
{node['text']}
|
87 |
+
</div>
|
88 |
+
"""
|
89 |
+
|
90 |
+
html += create_node_html(root_node, level=1)
|
91 |
+
return html
|
92 |
+
|
93 |
+
# Create Gradio interface
|
94 |
+
def create_mindmap_tab():
|
95 |
+
with gr.TabItem("PlantUML Mindmap"):
|
96 |
+
gr.Markdown("# Collapsible PlantUML Mindmap Visualizer")
|
97 |
+
gr.Markdown("Convert PlantUML mindmap syntax to a visual mindmap with collapsible nodes.")
|
98 |
+
plantuml_input = gr.Textbox(
|
99 |
+
lines=15,
|
100 |
+
label="Enter PlantUML mindmap",
|
101 |
+
placeholder="""@startmindmap
|
102 |
+
* Project Planning
|
103 |
+
** Requirements
|
104 |
+
*** Functional Requirements
|
105 |
+
**** User Interface
|
106 |
+
**** Backend Services
|
107 |
+
*** Technical Requirements
|
108 |
+
**** Performance
|
109 |
+
**** Security
|
110 |
+
** Timeline
|
111 |
+
*** Phase 1
|
112 |
+
*** Phase 2
|
113 |
+
** Resources
|
114 |
+
*** Team
|
115 |
+
*** Budget
|
116 |
+
@endmindmap"""
|
117 |
+
)
|
118 |
+
submit_btn = gr.Button("Generate Mindmap")
|
119 |
+
mindmap_output = gr.HTML(label="Mindmap Output")
|
120 |
+
submit_btn.click(
|
121 |
+
fn=create_mindmap_html,
|
122 |
+
inputs=plantuml_input,
|
123 |
+
outputs=mindmap_output
|
124 |
+
)
|
125 |
+
|
126 |
+
#
|
127 |
+
# End of Mind_Map_tab.py
|
128 |
+
######################################################################################################################
|
App_Function_Libraries/Gradio_UI/PDF_ingestion_tab.py
CHANGED
@@ -8,9 +8,12 @@ import tempfile
|
|
8 |
#
|
9 |
# External Imports
|
10 |
import gradio as gr
|
|
|
|
|
|
|
11 |
#
|
12 |
# Local Imports
|
13 |
-
from App_Function_Libraries.DB.DB_Manager import
|
14 |
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
15 |
from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_metadata_from_pdf, extract_text_and_format_from_pdf, \
|
16 |
process_and_cleanup_pdf
|
@@ -22,92 +25,258 @@ from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_metadata_from_p
|
|
22 |
|
23 |
def create_pdf_ingestion_tab():
|
24 |
with gr.TabItem("PDF Ingestion", visible=True):
|
25 |
-
# TODO - Add functionality to extract metadata from pdf as part of conversion process in marker
|
26 |
gr.Markdown("# Ingest PDF Files and Extract Metadata")
|
27 |
with gr.Row():
|
28 |
with gr.Column():
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
|
36 |
-
value=False,
|
37 |
-
visible=True)
|
38 |
-
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
|
39 |
-
value=False,
|
40 |
-
visible=True)
|
41 |
-
with gr.Row():
|
42 |
-
preset_prompt = gr.Dropdown(label="Select Preset Prompt",
|
43 |
-
choices=load_preset_prompts(),
|
44 |
-
visible=False)
|
45 |
-
with gr.Row():
|
46 |
-
custom_prompt_input = gr.Textbox(label="Custom Prompt",
|
47 |
-
placeholder="Enter custom prompt here",
|
48 |
-
lines=3,
|
49 |
-
visible=False)
|
50 |
-
with gr.Row():
|
51 |
-
system_prompt_input = gr.Textbox(label="System Prompt",
|
52 |
-
value="""
|
53 |
-
<s>You are a bulleted notes specialist.
|
54 |
-
[INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
55 |
-
**Bulleted Note Creation Guidelines**
|
56 |
-
|
57 |
-
**Headings**:
|
58 |
-
- Based on referenced topics, not categories like quotes or terms
|
59 |
-
- Surrounded by **bold** formatting
|
60 |
-
- Not listed as bullet points
|
61 |
-
- No space between headings and list items underneath
|
62 |
-
|
63 |
-
**Emphasis**:
|
64 |
-
- **Important terms** set in bold font
|
65 |
-
- **Text ending in a colon**: also bolded
|
66 |
-
|
67 |
-
**Review**:
|
68 |
-
- Ensure adherence to specified format
|
69 |
-
- Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]""",
|
70 |
-
lines=3,
|
71 |
-
visible=False)
|
72 |
-
|
73 |
-
custom_prompt_checkbox.change(
|
74 |
-
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
75 |
-
inputs=[custom_prompt_checkbox],
|
76 |
-
outputs=[custom_prompt_input, system_prompt_input]
|
77 |
)
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
)
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
gr.update(value=prompts["user_prompt"], visible=True),
|
88 |
-
gr.update(value=prompts["system_prompt"], visible=True)
|
89 |
-
)
|
90 |
-
|
91 |
-
preset_prompt.change(
|
92 |
-
update_prompts,
|
93 |
-
inputs=preset_prompt,
|
94 |
-
outputs=[custom_prompt_input, system_prompt_input]
|
95 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
-
pdf_ingest_button = gr.Button("Ingest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
-
pdf_upload_button.upload(fn=lambda file: file, inputs=pdf_upload_button, outputs=pdf_file_input)
|
100 |
with gr.Column():
|
101 |
-
pdf_result_output = gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
pdf_ingest_button.click(
|
104 |
-
fn=
|
105 |
-
inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
outputs=pdf_result_output
|
107 |
)
|
108 |
|
109 |
|
110 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
if pdf_file is None:
|
112 |
return "No file uploaded", ""
|
113 |
|
@@ -130,7 +299,37 @@ def test_pdf_ingestion(pdf_file):
|
|
130 |
title = metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0])
|
131 |
author = metadata.get('author', 'Unknown')
|
132 |
|
133 |
-
result = f"PDF '{title}' by {author} processed successfully."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
return result, markdown_text
|
135 |
except Exception as e:
|
136 |
return f"Error ingesting PDF: {str(e)}", ""
|
@@ -140,12 +339,24 @@ def create_pdf_ingestion_test_tab():
|
|
140 |
with gr.Row():
|
141 |
with gr.Column():
|
142 |
pdf_file_input = gr.File(label="Upload PDF for testing")
|
143 |
-
test_button = gr.Button("Test PDF Ingestion")
|
|
|
|
|
144 |
with gr.Column():
|
145 |
test_output = gr.Textbox(label="Test Result")
|
146 |
pdf_content_output = gr.Textbox(label="PDF Content", lines=200)
|
147 |
test_button.click(
|
148 |
-
fn=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
inputs=[pdf_file_input],
|
150 |
outputs=[test_output, pdf_content_output]
|
151 |
)
|
|
|
8 |
#
|
9 |
# External Imports
|
10 |
import gradio as gr
|
11 |
+
import pymupdf4llm
|
12 |
+
from docling.document_converter import DocumentConverter
|
13 |
+
|
14 |
#
|
15 |
# Local Imports
|
16 |
+
from App_Function_Libraries.DB.DB_Manager import list_prompts
|
17 |
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
18 |
from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_metadata_from_pdf, extract_text_and_format_from_pdf, \
|
19 |
process_and_cleanup_pdf
|
|
|
25 |
|
26 |
def create_pdf_ingestion_tab():
|
27 |
with gr.TabItem("PDF Ingestion", visible=True):
|
|
|
28 |
gr.Markdown("# Ingest PDF Files and Extract Metadata")
|
29 |
with gr.Row():
|
30 |
with gr.Column():
|
31 |
+
# Changed to support multiple files
|
32 |
+
pdf_file_input = gr.File(
|
33 |
+
label="Uploaded PDF Files",
|
34 |
+
file_types=[".pdf"],
|
35 |
+
visible=True,
|
36 |
+
file_count="multiple"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
)
|
38 |
+
pdf_upload_button = gr.UploadButton(
|
39 |
+
"Click to Upload PDFs",
|
40 |
+
file_types=[".pdf"],
|
41 |
+
file_count="multiple"
|
42 |
)
|
43 |
+
parser_selection = gr.Radio(
|
44 |
+
choices=["pymupdf", "pymupdf4llm", "docling"],
|
45 |
+
label="Select Parser",
|
46 |
+
value="pymupdf" # default value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
)
|
48 |
+
# Common metadata for all files
|
49 |
+
pdf_keywords_input = gr.Textbox(label="Keywords (Optional, comma-separated)")
|
50 |
+
# with gr.Row():
|
51 |
+
# custom_prompt_checkbox = gr.Checkbox(
|
52 |
+
# label="Use a Custom Prompt",
|
53 |
+
# value=False,
|
54 |
+
# visible=True
|
55 |
+
# )
|
56 |
+
# preset_prompt_checkbox = gr.Checkbox(
|
57 |
+
# label="Use a pre-set Prompt",
|
58 |
+
# value=False,
|
59 |
+
# visible=True
|
60 |
+
# )
|
61 |
+
# # Initialize state variables for pagination
|
62 |
+
# current_page_state = gr.State(value=1)
|
63 |
+
# total_pages_state = gr.State(value=1)
|
64 |
+
# with gr.Row():
|
65 |
+
# # Add pagination controls
|
66 |
+
# preset_prompt = gr.Dropdown(
|
67 |
+
# label="Select Preset Prompt",
|
68 |
+
# choices=[],
|
69 |
+
# visible=False
|
70 |
+
# )
|
71 |
+
# prev_page_button = gr.Button("Previous Page", visible=False)
|
72 |
+
# page_display = gr.Markdown("Page 1 of X", visible=False)
|
73 |
+
# next_page_button = gr.Button("Next Page", visible=False)
|
74 |
+
# with gr.Row():
|
75 |
+
# custom_prompt_input = gr.Textbox(
|
76 |
+
# label="Custom Prompt",
|
77 |
+
# placeholder="Enter custom prompt here",
|
78 |
+
# lines=3,
|
79 |
+
# visible=False
|
80 |
+
# )
|
81 |
+
# with gr.Row():
|
82 |
+
# system_prompt_input = gr.Textbox(
|
83 |
+
# label="System Prompt",
|
84 |
+
# value="""
|
85 |
+
# <s>You are a bulleted notes specialist.
|
86 |
+
# [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
87 |
+
# **Bulleted Note Creation Guidelines**
|
88 |
+
#
|
89 |
+
# **Headings**:
|
90 |
+
# - Based on referenced topics, not categories like quotes or terms
|
91 |
+
# - Surrounded by **bold** formatting
|
92 |
+
# - Not listed as bullet points
|
93 |
+
# - No space between headings and list items underneath
|
94 |
+
#
|
95 |
+
# **Emphasis**:
|
96 |
+
# - **Important terms** set in bold font
|
97 |
+
# - **Text ending in a colon**: also bolded
|
98 |
+
#
|
99 |
+
# **Review**:
|
100 |
+
# - Ensure adherence to specified format
|
101 |
+
# - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]""",
|
102 |
+
# lines=3,
|
103 |
+
# visible=False
|
104 |
+
# )
|
105 |
+
#
|
106 |
+
# custom_prompt_checkbox.change(
|
107 |
+
# fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
108 |
+
# inputs=[custom_prompt_checkbox],
|
109 |
+
# outputs=[custom_prompt_input, system_prompt_input]
|
110 |
+
# )
|
111 |
+
#
|
112 |
+
# def on_preset_prompt_checkbox_change(is_checked):
|
113 |
+
# if is_checked:
|
114 |
+
# prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
|
115 |
+
# page_display_text = f"Page {current_page} of {total_pages}"
|
116 |
+
# return (
|
117 |
+
# gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
|
118 |
+
# gr.update(visible=True), # prev_page_button
|
119 |
+
# gr.update(visible=True), # next_page_button
|
120 |
+
# gr.update(value=page_display_text, visible=True), # page_display
|
121 |
+
# current_page, # current_page_state
|
122 |
+
# total_pages # total_pages_state
|
123 |
+
# )
|
124 |
+
# else:
|
125 |
+
# return (
|
126 |
+
# gr.update(visible=False, interactive=False), # preset_prompt
|
127 |
+
# gr.update(visible=False), # prev_page_button
|
128 |
+
# gr.update(visible=False), # next_page_button
|
129 |
+
# gr.update(visible=False), # page_display
|
130 |
+
# 1, # current_page_state
|
131 |
+
# 1 # total_pages_state
|
132 |
+
# )
|
133 |
+
#
|
134 |
+
# preset_prompt_checkbox.change(
|
135 |
+
# fn=on_preset_prompt_checkbox_change,
|
136 |
+
# inputs=[preset_prompt_checkbox],
|
137 |
+
# outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
|
138 |
+
# )
|
139 |
+
#
|
140 |
+
# def on_prev_page_click(current_page, total_pages):
|
141 |
+
# new_page = max(current_page - 1, 1)
|
142 |
+
# prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
|
143 |
+
# page_display_text = f"Page {current_page} of {total_pages}"
|
144 |
+
# return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
145 |
+
#
|
146 |
+
# prev_page_button.click(
|
147 |
+
# fn=on_prev_page_click,
|
148 |
+
# inputs=[current_page_state, total_pages_state],
|
149 |
+
# outputs=[preset_prompt, page_display, current_page_state]
|
150 |
+
# )
|
151 |
+
#
|
152 |
+
# def on_next_page_click(current_page, total_pages):
|
153 |
+
# new_page = min(current_page + 1, total_pages)
|
154 |
+
# prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
|
155 |
+
# page_display_text = f"Page {current_page} of {total_pages}"
|
156 |
+
# return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
157 |
+
#
|
158 |
+
# next_page_button.click(
|
159 |
+
# fn=on_next_page_click,
|
160 |
+
# inputs=[current_page_state, total_pages_state],
|
161 |
+
# outputs=[preset_prompt, page_display, current_page_state]
|
162 |
+
# )
|
163 |
+
#
|
164 |
+
# def update_prompts(preset_name):
|
165 |
+
# prompts = update_user_prompt(preset_name)
|
166 |
+
# return (
|
167 |
+
# gr.update(value=prompts["user_prompt"], visible=True),
|
168 |
+
# gr.update(value=prompts["system_prompt"], visible=True)
|
169 |
+
# )
|
170 |
+
#
|
171 |
+
# preset_prompt.change(
|
172 |
+
# update_prompts,
|
173 |
+
# inputs=preset_prompt,
|
174 |
+
# outputs=[custom_prompt_input, system_prompt_input]
|
175 |
+
# )
|
176 |
|
177 |
+
pdf_ingest_button = gr.Button("Ingest PDFs")
|
178 |
+
|
179 |
+
# Update the upload button handler for multiple files
|
180 |
+
pdf_upload_button.upload(
|
181 |
+
fn=lambda files: files,
|
182 |
+
inputs=pdf_upload_button,
|
183 |
+
outputs=pdf_file_input
|
184 |
+
)
|
185 |
|
|
|
186 |
with gr.Column():
|
187 |
+
pdf_result_output = gr.DataFrame(
|
188 |
+
headers=["Filename", "Status", "Message"],
|
189 |
+
label="Processing Results"
|
190 |
+
)
|
191 |
+
|
192 |
+
# Define a new function to handle multiple PDFs
|
193 |
+
def process_multiple_pdfs(pdf_files, keywords, custom_prompt_checkbox_value, custom_prompt_text, system_prompt_text):
|
194 |
+
results = []
|
195 |
+
if pdf_files is None:
|
196 |
+
return [["No files", "Error", "No files uploaded"]]
|
197 |
+
|
198 |
+
for pdf_file in pdf_files:
|
199 |
+
try:
|
200 |
+
# Extract metadata from PDF
|
201 |
+
metadata = extract_metadata_from_pdf(pdf_file.name)
|
202 |
|
203 |
+
# Use custom or system prompt if checkbox is checked
|
204 |
+
if custom_prompt_checkbox_value:
|
205 |
+
prompt = custom_prompt_text
|
206 |
+
system_prompt = system_prompt_text
|
207 |
+
else:
|
208 |
+
prompt = None
|
209 |
+
system_prompt = None
|
210 |
+
|
211 |
+
# Process the PDF with prompts
|
212 |
+
result = process_and_cleanup_pdf(
|
213 |
+
pdf_file,
|
214 |
+
metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0]),
|
215 |
+
metadata.get('author', 'Unknown'),
|
216 |
+
keywords,
|
217 |
+
#prompt=prompt,
|
218 |
+
#system_prompt=system_prompt
|
219 |
+
)
|
220 |
+
|
221 |
+
results.append([
|
222 |
+
pdf_file.name,
|
223 |
+
"Success" if "successfully" in result else "Error",
|
224 |
+
result
|
225 |
+
])
|
226 |
+
except Exception as e:
|
227 |
+
results.append([
|
228 |
+
pdf_file.name,
|
229 |
+
"Error",
|
230 |
+
str(e)
|
231 |
+
])
|
232 |
+
|
233 |
+
return results
|
234 |
+
|
235 |
+
# Update the ingest button click handler
|
236 |
pdf_ingest_button.click(
|
237 |
+
fn=process_multiple_pdfs,
|
238 |
+
inputs=[
|
239 |
+
pdf_file_input,
|
240 |
+
pdf_keywords_input,
|
241 |
+
parser_selection,
|
242 |
+
#custom_prompt_checkbox,
|
243 |
+
#custom_prompt_input,
|
244 |
+
#system_prompt_input
|
245 |
+
],
|
246 |
outputs=pdf_result_output
|
247 |
)
|
248 |
|
249 |
|
250 |
+
def test_pymupdf4llm_pdf_ingestion(pdf_file):
|
251 |
+
if pdf_file is None:
|
252 |
+
return "No file uploaded", ""
|
253 |
+
|
254 |
+
try:
|
255 |
+
# Create a temporary directory
|
256 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
257 |
+
# Create a path for the temporary PDF file
|
258 |
+
temp_path = os.path.join(temp_dir, "temp.pdf")
|
259 |
+
|
260 |
+
# Copy the contents of the uploaded file to the temporary file
|
261 |
+
shutil.copy(pdf_file.name, temp_path)
|
262 |
+
|
263 |
+
# Extract text and convert to Markdown
|
264 |
+
markdown_text = pymupdf4llm.to_markdown(temp_path)
|
265 |
+
|
266 |
+
# Extract metadata from PDF
|
267 |
+
metadata = extract_metadata_from_pdf(temp_path)
|
268 |
+
|
269 |
+
# Use metadata for title and author if not provided
|
270 |
+
title = metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0])
|
271 |
+
author = metadata.get('author', 'Unknown')
|
272 |
+
|
273 |
+
result = f"PDF '{title}' by {author} processed successfully by pymupdf4llm."
|
274 |
+
return result, markdown_text
|
275 |
+
except Exception as e:
|
276 |
+
return f"Error ingesting PDF: {str(e)}", ""
|
277 |
+
|
278 |
+
|
279 |
+
def test_pymupdf_pdf_ingestion(pdf_file):
|
280 |
if pdf_file is None:
|
281 |
return "No file uploaded", ""
|
282 |
|
|
|
299 |
title = metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0])
|
300 |
author = metadata.get('author', 'Unknown')
|
301 |
|
302 |
+
result = f"PDF '{title}' by {author} processed successfully by pymupdf."
|
303 |
+
return result, markdown_text
|
304 |
+
except Exception as e:
|
305 |
+
return f"Error ingesting PDF: {str(e)}", ""
|
306 |
+
|
307 |
+
|
308 |
+
def test_docling_pdf_ingestion(pdf_file):
|
309 |
+
if pdf_file is None:
|
310 |
+
return "No file uploaded", ""
|
311 |
+
|
312 |
+
try:
|
313 |
+
# Create a temporary directory
|
314 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
315 |
+
# Create a path for the temporary PDF file
|
316 |
+
temp_path = os.path.join(temp_dir, "temp.pdf")
|
317 |
+
|
318 |
+
# Copy the contents of the uploaded file to the temporary file
|
319 |
+
shutil.copy(pdf_file.name, temp_path)
|
320 |
+
|
321 |
+
# Extract text and convert to Markdown
|
322 |
+
converter = DocumentConverter()
|
323 |
+
parsed_pdf = converter.convert(temp_path)
|
324 |
+
markdown_text = parsed_pdf.document.export_to_markdown()
|
325 |
+
# Extract metadata from PDF
|
326 |
+
metadata = extract_metadata_from_pdf(temp_path)
|
327 |
+
|
328 |
+
# Use metadata for title and author if not provided
|
329 |
+
title = metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0])
|
330 |
+
author = metadata.get('author', 'Unknown')
|
331 |
+
|
332 |
+
result = f"PDF '{title}' by {author} processed successfully by pymupdf."
|
333 |
return result, markdown_text
|
334 |
except Exception as e:
|
335 |
return f"Error ingesting PDF: {str(e)}", ""
|
|
|
339 |
with gr.Row():
|
340 |
with gr.Column():
|
341 |
pdf_file_input = gr.File(label="Upload PDF for testing")
|
342 |
+
test_button = gr.Button("Test pymupdf PDF Ingestion")
|
343 |
+
test_button_2 = gr.Button("Test pymupdf4llm PDF Ingestion")
|
344 |
+
test_button_3 = gr.Button("Test Docling PDF Ingestion")
|
345 |
with gr.Column():
|
346 |
test_output = gr.Textbox(label="Test Result")
|
347 |
pdf_content_output = gr.Textbox(label="PDF Content", lines=200)
|
348 |
test_button.click(
|
349 |
+
fn=test_pymupdf_pdf_ingestion,
|
350 |
+
inputs=[pdf_file_input],
|
351 |
+
outputs=[test_output, pdf_content_output]
|
352 |
+
)
|
353 |
+
test_button_2.click(
|
354 |
+
fn=test_pymupdf4llm_pdf_ingestion,
|
355 |
+
inputs=[pdf_file_input],
|
356 |
+
outputs=[test_output, pdf_content_output]
|
357 |
+
)
|
358 |
+
test_button_3.click(
|
359 |
+
fn=test_docling_pdf_ingestion,
|
360 |
inputs=[pdf_file_input],
|
361 |
outputs=[test_output, pdf_content_output]
|
362 |
)
|
App_Function_Libraries/Gradio_UI/Plaintext_tab_import.py
CHANGED
@@ -6,6 +6,7 @@
|
|
6 |
#######################################################################################################################
|
7 |
#
|
8 |
# Import necessary libraries
|
|
|
9 |
import os
|
10 |
import tempfile
|
11 |
import zipfile
|
@@ -16,101 +17,104 @@ from docx2txt import docx2txt
|
|
16 |
from pypandoc import convert_file
|
17 |
#
|
18 |
# Import Local libraries
|
19 |
-
from App_Function_Libraries.
|
|
|
20 |
#
|
21 |
#######################################################################################################################
|
22 |
#
|
23 |
# Functions:
|
24 |
|
25 |
def create_plain_text_import_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
with gr.TabItem("Import Plain text & .docx Files", visible=True):
|
27 |
with gr.Row():
|
28 |
with gr.Column():
|
29 |
-
gr.Markdown("# Import
|
30 |
-
gr.Markdown("Upload
|
31 |
-
import_file = gr.File(label="Upload file for import", file_types=[".md", ".txt", ".rtf", ".docx", ".zip"])
|
32 |
-
title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content (for single files)")
|
33 |
-
author_input = gr.Textbox(label="Author", placeholder="Enter the author's name (for single files)")
|
34 |
-
keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords, comma-separated")
|
35 |
-
system_prompt_input = gr.Textbox(label="System Prompt (for Summarization)", lines=3,
|
36 |
-
value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
37 |
-
**Bulleted Note Creation Guidelines**
|
38 |
-
|
39 |
-
**Headings**:
|
40 |
-
- Based on referenced topics, not categories like quotes or terms
|
41 |
-
- Surrounded by **bold** formatting
|
42 |
-
- Not listed as bullet points
|
43 |
-
- No space between headings and list items underneath
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
|
|
|
|
|
55 |
api_name_input = gr.Dropdown(
|
56 |
-
choices=[None
|
57 |
-
|
58 |
-
label="API for
|
59 |
)
|
60 |
api_key_input = gr.Textbox(label="API Key", type="password")
|
61 |
import_button = gr.Button("Import File(s)")
|
62 |
-
with gr.Column():
|
63 |
-
import_output = gr.Textbox(label="Import Status")
|
64 |
-
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
# Determine the file type and convert if necessary
|
69 |
-
file_extension = os.path.splitext(file_path)[1].lower()
|
70 |
-
if file_extension == '.rtf':
|
71 |
-
with tempfile.NamedTemporaryFile(suffix='.md', delete=False) as temp_file:
|
72 |
-
convert_file(file_path, 'md', outputfile=temp_file.name)
|
73 |
-
file_path = temp_file.name
|
74 |
-
elif file_extension == '.docx':
|
75 |
-
content = docx2txt.process(file_path)
|
76 |
-
else:
|
77 |
-
with open(file_path, 'r', encoding='utf-8') as file:
|
78 |
-
content = file.read()
|
79 |
-
|
80 |
-
# Process the content
|
81 |
-
return import_data(content, title, author, keywords, system_prompt,
|
82 |
-
user_prompt, auto_summarize, api_name, api_key)
|
83 |
-
except Exception as e:
|
84 |
-
return f"Error processing file: {str(e)}"
|
85 |
-
|
86 |
-
def process_plain_text_zip_file(zip_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
|
87 |
-
results = []
|
88 |
-
with tempfile.TemporaryDirectory() as temp_dir:
|
89 |
-
with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
|
90 |
-
zip_ref.extractall(temp_dir)
|
91 |
-
|
92 |
-
for filename in os.listdir(temp_dir):
|
93 |
-
if filename.lower().endswith(('.md', '.txt', '.rtf', '.docx')):
|
94 |
-
file_path = os.path.join(temp_dir, filename)
|
95 |
-
result = import_plain_text_file(file_path, title, author, keywords, system_prompt,
|
96 |
-
user_prompt, auto_summarize, api_name, api_key)
|
97 |
-
results.append(f"File: {filename} - {result}")
|
98 |
-
|
99 |
-
return "\n".join(results)
|
100 |
-
|
101 |
-
def import_file_handler(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
|
102 |
-
if file.name.lower().endswith(('.md', '.txt', '.rtf', '.docx')):
|
103 |
-
return import_plain_text_file(file.name, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
|
104 |
-
elif file.name.lower().endswith('.zip'):
|
105 |
-
return process_plain_text_zip_file(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
|
106 |
-
else:
|
107 |
-
return "Unsupported file type. Please upload a .md, .txt, .rtf, .docx file or a .zip file containing these file types."
|
108 |
|
109 |
import_button.click(
|
110 |
fn=import_file_handler,
|
111 |
-
inputs=[
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
outputs=import_output
|
114 |
)
|
115 |
|
116 |
-
return
|
|
|
|
|
|
|
|
|
|
6 |
#######################################################################################################################
|
7 |
#
|
8 |
# Import necessary libraries
|
9 |
+
import logging
|
10 |
import os
|
11 |
import tempfile
|
12 |
import zipfile
|
|
|
17 |
from pypandoc import convert_file
|
18 |
#
|
19 |
# Import Local libraries
|
20 |
+
from App_Function_Libraries.Plaintext.Plaintext_Files import import_file_handler
|
21 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
22 |
#
|
23 |
#######################################################################################################################
|
24 |
#
|
25 |
# Functions:
|
26 |
|
27 |
def create_plain_text_import_tab():
|
28 |
+
try:
|
29 |
+
default_value = None
|
30 |
+
if default_api_endpoint:
|
31 |
+
if default_api_endpoint in global_api_endpoints:
|
32 |
+
default_value = format_api_name(default_api_endpoint)
|
33 |
+
else:
|
34 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
35 |
+
except Exception as e:
|
36 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
37 |
+
default_value = None
|
38 |
+
|
39 |
with gr.TabItem("Import Plain text & .docx Files", visible=True):
|
40 |
with gr.Row():
|
41 |
with gr.Column():
|
42 |
+
gr.Markdown("# Import `.md`/`.txt`/`.rtf`/`.docx` Files & `.zip` collections of them.")
|
43 |
+
gr.Markdown("Upload multiple files or a zip file containing multiple files")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
# Updated to support multiple files
|
46 |
+
import_files = gr.File(
|
47 |
+
label="Upload files for import",
|
48 |
+
file_count="multiple",
|
49 |
+
file_types=[".md", ".txt", ".rtf", ".docx", ".zip"]
|
50 |
+
)
|
51 |
|
52 |
+
# Optional metadata override fields
|
53 |
+
author_input = gr.Textbox(
|
54 |
+
label="Author Override (optional)",
|
55 |
+
placeholder="Enter author name to apply to all files"
|
56 |
+
)
|
57 |
+
keywords_input = gr.Textbox(
|
58 |
+
label="Keywords",
|
59 |
+
placeholder="Enter keywords, comma-separated - will be applied to all files"
|
60 |
+
)
|
61 |
+
system_prompt_input = gr.Textbox(
|
62 |
+
label="System Prompt (for Summarization)",
|
63 |
+
lines=3,
|
64 |
+
value="""
|
65 |
+
<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
66 |
+
**Bulleted Note Creation Guidelines**
|
67 |
+
|
68 |
+
**Headings**:
|
69 |
+
- Based on referenced topics, not categories like quotes or terms
|
70 |
+
- Surrounded by **bold** formatting
|
71 |
+
- Not listed as bullet points
|
72 |
+
- No space between headings and list items underneath
|
73 |
+
|
74 |
+
**Emphasis**:
|
75 |
+
- **Important terms** set in bold font
|
76 |
+
- **Text ending in a colon**: also bolded
|
77 |
+
|
78 |
+
**Review**:
|
79 |
+
- Ensure adherence to specified format
|
80 |
+
- Do not reference these instructions in your response.</s>[INST]
|
81 |
+
"""
|
82 |
+
)
|
83 |
+
custom_prompt_input = gr.Textbox(
|
84 |
+
label="Custom User Prompt",
|
85 |
+
placeholder="Enter a custom user prompt for summarization (optional)"
|
86 |
+
)
|
87 |
auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
|
88 |
+
|
89 |
+
# API configuration
|
90 |
api_name_input = gr.Dropdown(
|
91 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
92 |
+
value=default_value,
|
93 |
+
label="API for Summarization/Analysis (Optional)"
|
94 |
)
|
95 |
api_key_input = gr.Textbox(label="API Key", type="password")
|
96 |
import_button = gr.Button("Import File(s)")
|
|
|
|
|
|
|
97 |
|
98 |
+
with gr.Column():
|
99 |
+
import_output = gr.Textbox(label="Import Status", lines=10)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
import_button.click(
|
102 |
fn=import_file_handler,
|
103 |
+
inputs=[
|
104 |
+
import_files,
|
105 |
+
author_input,
|
106 |
+
keywords_input,
|
107 |
+
system_prompt_input,
|
108 |
+
custom_prompt_input,
|
109 |
+
auto_summarize_checkbox,
|
110 |
+
api_name_input,
|
111 |
+
api_key_input
|
112 |
+
],
|
113 |
outputs=import_output
|
114 |
)
|
115 |
|
116 |
+
return import_files, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
|
117 |
+
|
118 |
+
#
|
119 |
+
# End of Plain_text_import.py
|
120 |
+
#######################################################################################################################
|
App_Function_Libraries/Gradio_UI/Podcast_tab.py
CHANGED
@@ -2,23 +2,38 @@
|
|
2 |
# Description: Gradio UI for ingesting podcasts into the database
|
3 |
#
|
4 |
# Imports
|
|
|
5 |
#
|
6 |
# External Imports
|
7 |
import gradio as gr
|
8 |
#
|
9 |
# Local Imports
|
10 |
from App_Function_Libraries.Audio.Audio_Files import process_podcast
|
11 |
-
from App_Function_Libraries.DB.DB_Manager import
|
12 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
|
|
|
13 |
#
|
14 |
########################################################################################################################
|
15 |
#
|
16 |
# Functions:
|
17 |
|
18 |
-
|
19 |
def create_podcast_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
with gr.TabItem("Podcast", visible=True):
|
21 |
gr.Markdown("# Podcast Transcription and Ingestion", visible=True)
|
|
|
|
|
|
|
|
|
22 |
with gr.Row():
|
23 |
with gr.Column():
|
24 |
podcast_url_input = gr.Textbox(label="Podcast URL", placeholder="Enter the podcast URL here")
|
@@ -35,54 +50,130 @@ def create_podcast_tab():
|
|
35 |
keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
|
36 |
|
37 |
with gr.Row():
|
38 |
-
podcast_custom_prompt_checkbox = gr.Checkbox(
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
44 |
with gr.Row():
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
48 |
with gr.Row():
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
with gr.Row():
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
**
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
**
|
69 |
-
-
|
70 |
-
-
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
|
|
75 |
podcast_custom_prompt_checkbox.change(
|
76 |
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
77 |
inputs=[podcast_custom_prompt_checkbox],
|
78 |
outputs=[podcast_custom_prompt_input, system_prompt_input]
|
79 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
preset_prompt_checkbox.change(
|
81 |
-
fn=
|
82 |
inputs=[preset_prompt_checkbox],
|
83 |
-
outputs=[preset_prompt]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
)
|
85 |
|
|
|
86 |
def update_prompts(preset_name):
|
87 |
prompts = update_user_prompt(preset_name)
|
88 |
return (
|
@@ -91,16 +182,16 @@ def create_podcast_tab():
|
|
91 |
)
|
92 |
|
93 |
preset_prompt.change(
|
94 |
-
update_prompts,
|
95 |
-
inputs=preset_prompt,
|
96 |
outputs=[podcast_custom_prompt_input, system_prompt_input]
|
97 |
)
|
98 |
|
|
|
99 |
podcast_api_name_input = gr.Dropdown(
|
100 |
-
choices=[None
|
101 |
-
|
102 |
-
|
103 |
-
label="API Name for Summarization (Optional)"
|
104 |
)
|
105 |
podcast_api_key_input = gr.Textbox(label="API Key (if required)", type="password")
|
106 |
podcast_whisper_model_input = gr.Dropdown(choices=whisper_models, value="medium", label="Whisper Model")
|
@@ -151,13 +242,37 @@ def create_podcast_tab():
|
|
151 |
|
152 |
podcast_process_button.click(
|
153 |
fn=process_podcast,
|
154 |
-
inputs=[
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
)
|
|
|
2 |
# Description: Gradio UI for ingesting podcasts into the database
|
3 |
#
|
4 |
# Imports
|
5 |
+
import logging
|
6 |
#
|
7 |
# External Imports
|
8 |
import gradio as gr
|
9 |
#
|
10 |
# Local Imports
|
11 |
from App_Function_Libraries.Audio.Audio_Files import process_podcast
|
12 |
+
from App_Function_Libraries.DB.DB_Manager import list_prompts
|
13 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
|
14 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
15 |
#
|
16 |
########################################################################################################################
|
17 |
#
|
18 |
# Functions:
|
19 |
|
|
|
20 |
def create_podcast_tab():
|
21 |
+
try:
|
22 |
+
default_value = None
|
23 |
+
if default_api_endpoint:
|
24 |
+
if default_api_endpoint in global_api_endpoints:
|
25 |
+
default_value = format_api_name(default_api_endpoint)
|
26 |
+
else:
|
27 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
28 |
+
except Exception as e:
|
29 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
30 |
+
default_value = None
|
31 |
with gr.TabItem("Podcast", visible=True):
|
32 |
gr.Markdown("# Podcast Transcription and Ingestion", visible=True)
|
33 |
+
# Initialize state variables for pagination
|
34 |
+
current_page_state = gr.State(value=1)
|
35 |
+
total_pages_state = gr.State(value=1)
|
36 |
+
|
37 |
with gr.Row():
|
38 |
with gr.Column():
|
39 |
podcast_url_input = gr.Textbox(label="Podcast URL", placeholder="Enter the podcast URL here")
|
|
|
50 |
keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
|
51 |
|
52 |
with gr.Row():
|
53 |
+
podcast_custom_prompt_checkbox = gr.Checkbox(
|
54 |
+
label="Use a Custom Prompt",
|
55 |
+
value=False,
|
56 |
+
visible=True
|
57 |
+
)
|
58 |
+
preset_prompt_checkbox = gr.Checkbox(
|
59 |
+
label="Use a pre-set Prompt",
|
60 |
+
value=False,
|
61 |
+
visible=True
|
62 |
+
)
|
63 |
+
|
64 |
with gr.Row():
|
65 |
+
# Add pagination controls
|
66 |
+
preset_prompt = gr.Dropdown(
|
67 |
+
label="Select Preset Prompt",
|
68 |
+
choices=[],
|
69 |
+
visible=False
|
70 |
+
)
|
71 |
with gr.Row():
|
72 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
73 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
74 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
75 |
+
|
76 |
with gr.Row():
|
77 |
+
podcast_custom_prompt_input = gr.Textbox(
|
78 |
+
label="Custom Prompt",
|
79 |
+
placeholder="Enter custom prompt here",
|
80 |
+
lines=10,
|
81 |
+
visible=False
|
82 |
+
)
|
83 |
+
with gr.Row():
|
84 |
+
system_prompt_input = gr.Textbox(
|
85 |
+
label="System Prompt",
|
86 |
+
value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhere to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
87 |
+
**Bulleted Note Creation Guidelines**
|
88 |
+
|
89 |
+
**Headings**:
|
90 |
+
- Based on referenced topics, not categories like quotes or terms
|
91 |
+
- Surrounded by **bold** formatting
|
92 |
+
- Not listed as bullet points
|
93 |
+
- No space between headings and list items underneath
|
94 |
+
|
95 |
+
**Emphasis**:
|
96 |
+
- **Important terms** set in bold font
|
97 |
+
- **Text ending in a colon**: also bolded
|
98 |
+
|
99 |
+
**Review**:
|
100 |
+
- Ensure adherence to specified format
|
101 |
+
- Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
|
102 |
+
""",
|
103 |
+
lines=10,
|
104 |
+
visible=False
|
105 |
+
)
|
106 |
|
107 |
+
# Handle custom prompt checkbox change
|
108 |
podcast_custom_prompt_checkbox.change(
|
109 |
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
110 |
inputs=[podcast_custom_prompt_checkbox],
|
111 |
outputs=[podcast_custom_prompt_input, system_prompt_input]
|
112 |
)
|
113 |
+
|
114 |
+
# Handle preset prompt checkbox change
|
115 |
+
def on_preset_prompt_checkbox_change(is_checked):
|
116 |
+
if is_checked:
|
117 |
+
prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
|
118 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
119 |
+
return (
|
120 |
+
gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
|
121 |
+
gr.update(visible=True), # prev_page_button
|
122 |
+
gr.update(visible=True), # next_page_button
|
123 |
+
gr.update(value=page_display_text, visible=True), # page_display
|
124 |
+
current_page, # current_page_state
|
125 |
+
total_pages # total_pages_state
|
126 |
+
)
|
127 |
+
else:
|
128 |
+
return (
|
129 |
+
gr.update(visible=False, interactive=False), # preset_prompt
|
130 |
+
gr.update(visible=False), # prev_page_button
|
131 |
+
gr.update(visible=False), # next_page_button
|
132 |
+
gr.update(visible=False), # page_display
|
133 |
+
1, # current_page_state
|
134 |
+
1 # total_pages_state
|
135 |
+
)
|
136 |
+
|
137 |
preset_prompt_checkbox.change(
|
138 |
+
fn=on_preset_prompt_checkbox_change,
|
139 |
inputs=[preset_prompt_checkbox],
|
140 |
+
outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
|
141 |
+
)
|
142 |
+
|
143 |
+
# Pagination button functions
|
144 |
+
def on_prev_page_click(current_page, total_pages):
|
145 |
+
new_page = max(current_page - 1, 1)
|
146 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
147 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
148 |
+
return (
|
149 |
+
gr.update(choices=prompts),
|
150 |
+
gr.update(value=page_display_text),
|
151 |
+
current_page
|
152 |
+
)
|
153 |
+
|
154 |
+
prev_page_button.click(
|
155 |
+
fn=on_prev_page_click,
|
156 |
+
inputs=[current_page_state, total_pages_state],
|
157 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
158 |
+
)
|
159 |
+
|
160 |
+
def on_next_page_click(current_page, total_pages):
|
161 |
+
new_page = min(current_page + 1, total_pages)
|
162 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
163 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
164 |
+
return (
|
165 |
+
gr.update(choices=prompts),
|
166 |
+
gr.update(value=page_display_text),
|
167 |
+
current_page
|
168 |
+
)
|
169 |
+
|
170 |
+
next_page_button.click(
|
171 |
+
fn=on_next_page_click,
|
172 |
+
inputs=[current_page_state, total_pages_state],
|
173 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
174 |
)
|
175 |
|
176 |
+
# Update prompts when a preset is selected
|
177 |
def update_prompts(preset_name):
|
178 |
prompts = update_user_prompt(preset_name)
|
179 |
return (
|
|
|
182 |
)
|
183 |
|
184 |
preset_prompt.change(
|
185 |
+
fn=update_prompts,
|
186 |
+
inputs=[preset_prompt],
|
187 |
outputs=[podcast_custom_prompt_input, system_prompt_input]
|
188 |
)
|
189 |
|
190 |
+
# Refactored API selection dropdown
|
191 |
podcast_api_name_input = gr.Dropdown(
|
192 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
193 |
+
value=default_value,
|
194 |
+
label="API for Summarization/Analysis (Optional)"
|
|
|
195 |
)
|
196 |
podcast_api_key_input = gr.Textbox(label="API Key (if required)", type="password")
|
197 |
podcast_whisper_model_input = gr.Dropdown(choices=whisper_models, value="medium", label="Whisper Model")
|
|
|
242 |
|
243 |
podcast_process_button.click(
|
244 |
fn=process_podcast,
|
245 |
+
inputs=[
|
246 |
+
podcast_url_input,
|
247 |
+
podcast_title_input,
|
248 |
+
podcast_author_input,
|
249 |
+
podcast_keywords_input,
|
250 |
+
podcast_custom_prompt_input,
|
251 |
+
podcast_api_name_input,
|
252 |
+
podcast_api_key_input,
|
253 |
+
podcast_whisper_model_input,
|
254 |
+
keep_original_input,
|
255 |
+
enable_diarization_input,
|
256 |
+
use_cookies_input,
|
257 |
+
cookies_input,
|
258 |
+
chunk_method,
|
259 |
+
max_chunk_size,
|
260 |
+
chunk_overlap,
|
261 |
+
use_adaptive_chunking,
|
262 |
+
use_multi_level_chunking,
|
263 |
+
chunk_language,
|
264 |
+
keep_timestamps_input,
|
265 |
+
system_prompt_input # Include system prompt input
|
266 |
+
],
|
267 |
+
outputs=[
|
268 |
+
podcast_progress_output,
|
269 |
+
podcast_transcription_output,
|
270 |
+
podcast_summary_output,
|
271 |
+
podcast_title_input,
|
272 |
+
podcast_author_input,
|
273 |
+
podcast_keywords_input,
|
274 |
+
podcast_error_output,
|
275 |
+
download_transcription,
|
276 |
+
download_summary
|
277 |
+
]
|
278 |
)
|
App_Function_Libraries/Gradio_UI/Prompt_Suggestion_tab.py
CHANGED
@@ -1,11 +1,14 @@
|
|
1 |
# Description: Gradio UI for Creating and Testing new Prompts
|
2 |
#
|
3 |
# Imports
|
|
|
|
|
4 |
import gradio as gr
|
5 |
|
6 |
-
from App_Function_Libraries.Chat import chat
|
7 |
-
from App_Function_Libraries.DB.
|
8 |
from App_Function_Libraries.Prompt_Engineering.Prompt_Engineering import generate_prompt, test_generated_prompt
|
|
|
9 |
|
10 |
|
11 |
#
|
@@ -18,6 +21,16 @@ from App_Function_Libraries.Prompt_Engineering.Prompt_Engineering import generat
|
|
18 |
|
19 |
# Gradio tab for prompt suggestion and testing
|
20 |
def create_prompt_suggestion_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
with gr.TabItem("Prompt Suggestion/Creation", visible=True):
|
22 |
gr.Markdown("# Generate and Test AI Prompts with the Metaprompt Approach")
|
23 |
|
@@ -30,11 +43,11 @@ def create_prompt_suggestion_tab():
|
|
30 |
placeholder="E.g., CUSTOMER_COMPLAINT, COMPANY_NAME")
|
31 |
|
32 |
# API-related inputs
|
|
|
33 |
api_name_input = gr.Dropdown(
|
34 |
-
choices=["
|
35 |
-
|
36 |
-
label="API
|
37 |
-
value="OpenAI" # Default selection
|
38 |
)
|
39 |
|
40 |
api_key_input = gr.Textbox(label="API Key", placeholder="Enter your API key (if required)",
|
|
|
1 |
# Description: Gradio UI for Creating and Testing new Prompts
|
2 |
#
|
3 |
# Imports
|
4 |
+
import logging
|
5 |
+
|
6 |
import gradio as gr
|
7 |
|
8 |
+
from App_Function_Libraries.Chat.Chat_Functions import chat
|
9 |
+
from App_Function_Libraries.DB.DB_Manager import add_or_update_prompt
|
10 |
from App_Function_Libraries.Prompt_Engineering.Prompt_Engineering import generate_prompt, test_generated_prompt
|
11 |
+
from App_Function_Libraries.Utils.Utils import format_api_name, global_api_endpoints, default_api_endpoint
|
12 |
|
13 |
|
14 |
#
|
|
|
21 |
|
22 |
# Gradio tab for prompt suggestion and testing
|
23 |
def create_prompt_suggestion_tab():
|
24 |
+
try:
|
25 |
+
default_value = None
|
26 |
+
if default_api_endpoint:
|
27 |
+
if default_api_endpoint in global_api_endpoints:
|
28 |
+
default_value = format_api_name(default_api_endpoint)
|
29 |
+
else:
|
30 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
31 |
+
except Exception as e:
|
32 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
33 |
+
default_value = None
|
34 |
with gr.TabItem("Prompt Suggestion/Creation", visible=True):
|
35 |
gr.Markdown("# Generate and Test AI Prompts with the Metaprompt Approach")
|
36 |
|
|
|
43 |
placeholder="E.g., CUSTOMER_COMPLAINT, COMPANY_NAME")
|
44 |
|
45 |
# API-related inputs
|
46 |
+
# Refactored API selection dropdown
|
47 |
api_name_input = gr.Dropdown(
|
48 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
49 |
+
value=default_value,
|
50 |
+
label="API for Analysis (Optional)"
|
|
|
51 |
)
|
52 |
|
53 |
api_key_input = gr.Textbox(label="API Key", placeholder="Enter your API key (if required)",
|
App_Function_Libraries/Gradio_UI/Prompts_tab.py
ADDED
@@ -0,0 +1,297 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Prompts_tab.py
|
2 |
+
# Description: This file contains the code for the prompts tab in the Gradio UI
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
import html
|
6 |
+
import logging
|
7 |
+
|
8 |
+
#
|
9 |
+
# External Imports
|
10 |
+
import gradio as gr
|
11 |
+
#
|
12 |
+
# Local Imports
|
13 |
+
from App_Function_Libraries.DB.DB_Manager import fetch_prompt_details, list_prompts
|
14 |
+
#
|
15 |
+
####################################################################################################
|
16 |
+
#
|
17 |
+
# Functions:
|
18 |
+
|
19 |
+
def create_prompt_view_tab():
|
20 |
+
with gr.TabItem("View Prompt Database", visible=True):
|
21 |
+
gr.Markdown("# View Prompt Database Entries")
|
22 |
+
with gr.Row():
|
23 |
+
with gr.Column():
|
24 |
+
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
25 |
+
page_number = gr.Number(value=1, label="Page Number", precision=0)
|
26 |
+
view_button = gr.Button("View Page")
|
27 |
+
previous_page_button = gr.Button("Previous Page", visible=True)
|
28 |
+
next_page_button = gr.Button("Next Page", visible=True)
|
29 |
+
pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
|
30 |
+
prompt_selector = gr.Dropdown(label="Select Prompt to View", choices=[])
|
31 |
+
with gr.Column():
|
32 |
+
results_table = gr.HTML()
|
33 |
+
selected_prompt_display = gr.HTML()
|
34 |
+
|
35 |
+
# Function to view database entries
|
36 |
+
def view_database(page, entries_per_page):
|
37 |
+
try:
|
38 |
+
# Use list_prompts to get prompts and total pages
|
39 |
+
prompts, total_pages, current_page = list_prompts(page=int(page), per_page=int(entries_per_page))
|
40 |
+
|
41 |
+
table_html = "<table style='width:100%; border-collapse: collapse;'>"
|
42 |
+
table_html += "<tr><th style='border: 1px solid black; padding: 8px;'>Title</th><th style='border: 1px solid black; padding: 8px;'>Author</th></tr>"
|
43 |
+
prompt_choices = []
|
44 |
+
for prompt_name in prompts:
|
45 |
+
details = fetch_prompt_details(prompt_name)
|
46 |
+
if details:
|
47 |
+
title, author, _, _, _, _ = details
|
48 |
+
author = author or "Unknown" # Handle None author
|
49 |
+
table_html += f"<tr><td style='border: 1px solid black; padding: 8px;'>{html.escape(title)}</td><td style='border: 1px solid black; padding: 8px;'>{html.escape(author)}</td></tr>"
|
50 |
+
prompt_choices.append(prompt_name) # Using prompt_name as value
|
51 |
+
table_html += "</table>"
|
52 |
+
|
53 |
+
# Get total prompts if possible
|
54 |
+
total_prompts = total_pages * int(entries_per_page) # This might overestimate if the last page is not full
|
55 |
+
|
56 |
+
pagination = f"Page {current_page} of {total_pages} (Total prompts: {total_prompts})"
|
57 |
+
|
58 |
+
return table_html, pagination, total_pages, prompt_choices
|
59 |
+
except Exception as e:
|
60 |
+
return f"<p>Error fetching prompts: {e}</p>", "Error", 0, []
|
61 |
+
|
62 |
+
# Function to update page content
|
63 |
+
def update_page(page, entries_per_page):
|
64 |
+
results, pagination, total_pages, prompt_choices = view_database(page, entries_per_page)
|
65 |
+
page = int(page)
|
66 |
+
next_disabled = page >= total_pages
|
67 |
+
prev_disabled = page <= 1
|
68 |
+
return (
|
69 |
+
results,
|
70 |
+
pagination,
|
71 |
+
page,
|
72 |
+
gr.update(visible=True, interactive=not prev_disabled), # previous_page_button
|
73 |
+
gr.update(visible=True, interactive=not next_disabled), # next_page_button
|
74 |
+
gr.update(choices=prompt_choices)
|
75 |
+
)
|
76 |
+
|
77 |
+
# Function to go to the next page
|
78 |
+
def go_to_next_page(current_page, entries_per_page):
|
79 |
+
next_page = int(current_page) + 1
|
80 |
+
return update_page(next_page, entries_per_page)
|
81 |
+
|
82 |
+
# Function to go to the previous page
|
83 |
+
def go_to_previous_page(current_page, entries_per_page):
|
84 |
+
previous_page = max(1, int(current_page) - 1)
|
85 |
+
return update_page(previous_page, entries_per_page)
|
86 |
+
|
87 |
+
# Function to display selected prompt details
|
88 |
+
def display_selected_prompt(prompt_name):
|
89 |
+
details = fetch_prompt_details(prompt_name)
|
90 |
+
if details:
|
91 |
+
title, author, description, system_prompt, user_prompt, keywords = details
|
92 |
+
# Handle None values by converting them to empty strings
|
93 |
+
description = description or ""
|
94 |
+
system_prompt = system_prompt or ""
|
95 |
+
user_prompt = user_prompt or ""
|
96 |
+
author = author or "Unknown"
|
97 |
+
keywords = keywords or ""
|
98 |
+
|
99 |
+
html_content = f"""
|
100 |
+
<div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 20px;">
|
101 |
+
<h3>{html.escape(title)}</h3> <h4>by {html.escape(author)}</h4>
|
102 |
+
<p><strong>Description:</strong> {html.escape(description)}</p>
|
103 |
+
<div style="margin-top: 10px;">
|
104 |
+
<strong>System Prompt:</strong>
|
105 |
+
<pre style="white-space: pre-wrap; word-wrap: break-word;">{html.escape(system_prompt)}</pre>
|
106 |
+
</div>
|
107 |
+
<div style="margin-top: 10px;">
|
108 |
+
<strong>User Prompt:</strong>
|
109 |
+
<pre style="white-space: pre-wrap; word-wrap: break-word;">{html.escape(user_prompt)}</pre>
|
110 |
+
</div>
|
111 |
+
<p><strong>Keywords:</strong> {html.escape(keywords)}</p>
|
112 |
+
</div>
|
113 |
+
"""
|
114 |
+
return html_content
|
115 |
+
else:
|
116 |
+
return "<p>Prompt not found.</p>"
|
117 |
+
|
118 |
+
# Event handlers
|
119 |
+
view_button.click(
|
120 |
+
fn=update_page,
|
121 |
+
inputs=[page_number, entries_per_page],
|
122 |
+
outputs=[results_table, pagination_info, page_number, previous_page_button, next_page_button, prompt_selector]
|
123 |
+
)
|
124 |
+
|
125 |
+
next_page_button.click(
|
126 |
+
fn=go_to_next_page,
|
127 |
+
inputs=[page_number, entries_per_page],
|
128 |
+
outputs=[results_table, pagination_info, page_number, previous_page_button, next_page_button, prompt_selector]
|
129 |
+
)
|
130 |
+
|
131 |
+
previous_page_button.click(
|
132 |
+
fn=go_to_previous_page,
|
133 |
+
inputs=[page_number, entries_per_page],
|
134 |
+
outputs=[results_table, pagination_info, page_number, previous_page_button, next_page_button, prompt_selector]
|
135 |
+
)
|
136 |
+
|
137 |
+
prompt_selector.change(
|
138 |
+
fn=display_selected_prompt,
|
139 |
+
inputs=[prompt_selector],
|
140 |
+
outputs=[selected_prompt_display]
|
141 |
+
)
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
def create_prompts_export_tab():
|
146 |
+
"""Creates a tab for exporting prompts database content with multiple format options"""
|
147 |
+
with gr.TabItem("Export Prompts", visible=True):
|
148 |
+
gr.Markdown("# Export Prompts Database Content")
|
149 |
+
|
150 |
+
with gr.Row():
|
151 |
+
with gr.Column():
|
152 |
+
export_type = gr.Radio(
|
153 |
+
choices=["All Prompts", "Prompts by Keyword"],
|
154 |
+
label="Export Type",
|
155 |
+
value="All Prompts"
|
156 |
+
)
|
157 |
+
|
158 |
+
# Keyword selection for filtered export
|
159 |
+
with gr.Column(visible=False) as keyword_col:
|
160 |
+
keyword_input = gr.Textbox(
|
161 |
+
label="Enter Keywords (comma-separated)",
|
162 |
+
placeholder="Enter keywords to filter prompts..."
|
163 |
+
)
|
164 |
+
|
165 |
+
# Export format selection
|
166 |
+
export_format = gr.Radio(
|
167 |
+
choices=["CSV", "Markdown (ZIP)"],
|
168 |
+
label="Export Format",
|
169 |
+
value="CSV"
|
170 |
+
)
|
171 |
+
|
172 |
+
# Export options
|
173 |
+
include_options = gr.CheckboxGroup(
|
174 |
+
choices=[
|
175 |
+
"Include System Prompts",
|
176 |
+
"Include User Prompts",
|
177 |
+
"Include Details",
|
178 |
+
"Include Author",
|
179 |
+
"Include Keywords"
|
180 |
+
],
|
181 |
+
label="Export Options",
|
182 |
+
value=["Include Keywords", "Include Author"]
|
183 |
+
)
|
184 |
+
|
185 |
+
# Markdown-specific options (only visible when Markdown is selected)
|
186 |
+
with gr.Column(visible=False) as markdown_options_col:
|
187 |
+
markdown_template = gr.Radio(
|
188 |
+
choices=[
|
189 |
+
"Basic Template",
|
190 |
+
"Detailed Template",
|
191 |
+
"Custom Template"
|
192 |
+
],
|
193 |
+
label="Markdown Template",
|
194 |
+
value="Basic Template"
|
195 |
+
)
|
196 |
+
custom_template = gr.Textbox(
|
197 |
+
label="Custom Template",
|
198 |
+
placeholder="Use {title}, {author}, {details}, {system}, {user}, {keywords} as placeholders",
|
199 |
+
visible=False
|
200 |
+
)
|
201 |
+
|
202 |
+
export_button = gr.Button("Export Prompts")
|
203 |
+
|
204 |
+
with gr.Column():
|
205 |
+
export_status = gr.Textbox(label="Export Status", interactive=False)
|
206 |
+
export_file = gr.File(label="Download Export")
|
207 |
+
|
208 |
+
def update_ui_visibility(export_type, format_choice, template_choice):
|
209 |
+
"""Update UI elements visibility based on selections"""
|
210 |
+
show_keywords = export_type == "Prompts by Keyword"
|
211 |
+
show_markdown_options = format_choice == "Markdown (ZIP)"
|
212 |
+
show_custom_template = template_choice == "Custom Template" and show_markdown_options
|
213 |
+
|
214 |
+
return [
|
215 |
+
gr.update(visible=show_keywords), # keyword_col
|
216 |
+
gr.update(visible=show_markdown_options), # markdown_options_col
|
217 |
+
gr.update(visible=show_custom_template) # custom_template
|
218 |
+
]
|
219 |
+
|
220 |
+
def handle_export(export_type, keywords, export_format, options, markdown_template, custom_template):
|
221 |
+
"""Handle the export process based on selected options"""
|
222 |
+
try:
|
223 |
+
# Parse options
|
224 |
+
include_system = "Include System Prompts" in options
|
225 |
+
include_user = "Include User Prompts" in options
|
226 |
+
include_details = "Include Details" in options
|
227 |
+
include_author = "Include Author" in options
|
228 |
+
include_keywords = "Include Keywords" in options
|
229 |
+
|
230 |
+
# Handle keyword filtering
|
231 |
+
keyword_list = None
|
232 |
+
if export_type == "Prompts by Keyword" and keywords:
|
233 |
+
keyword_list = [k.strip() for k in keywords.split(",") if k.strip()]
|
234 |
+
|
235 |
+
# Get the appropriate template
|
236 |
+
template = None
|
237 |
+
if export_format == "Markdown (ZIP)":
|
238 |
+
if markdown_template == "Custom Template":
|
239 |
+
template = custom_template
|
240 |
+
else:
|
241 |
+
template = markdown_template
|
242 |
+
|
243 |
+
# Perform export
|
244 |
+
from App_Function_Libraries.DB.Prompts_DB import export_prompts
|
245 |
+
status, file_path = export_prompts(
|
246 |
+
export_format=export_format.split()[0].lower(), # 'csv' or 'markdown'
|
247 |
+
filter_keywords=keyword_list,
|
248 |
+
include_system=include_system,
|
249 |
+
include_user=include_user,
|
250 |
+
include_details=include_details,
|
251 |
+
include_author=include_author,
|
252 |
+
include_keywords=include_keywords,
|
253 |
+
markdown_template=template
|
254 |
+
)
|
255 |
+
|
256 |
+
return status, file_path
|
257 |
+
|
258 |
+
except Exception as e:
|
259 |
+
error_msg = f"Export failed: {str(e)}"
|
260 |
+
logging.error(error_msg)
|
261 |
+
return error_msg, None
|
262 |
+
|
263 |
+
# Event handlers
|
264 |
+
export_type.change(
|
265 |
+
fn=lambda t, f, m: update_ui_visibility(t, f, m),
|
266 |
+
inputs=[export_type, export_format, markdown_template],
|
267 |
+
outputs=[keyword_col, markdown_options_col, custom_template]
|
268 |
+
)
|
269 |
+
|
270 |
+
export_format.change(
|
271 |
+
fn=lambda t, f, m: update_ui_visibility(t, f, m),
|
272 |
+
inputs=[export_type, export_format, markdown_template],
|
273 |
+
outputs=[keyword_col, markdown_options_col, custom_template]
|
274 |
+
)
|
275 |
+
|
276 |
+
markdown_template.change(
|
277 |
+
fn=lambda t, f, m: update_ui_visibility(t, f, m),
|
278 |
+
inputs=[export_type, export_format, markdown_template],
|
279 |
+
outputs=[keyword_col, markdown_options_col, custom_template]
|
280 |
+
)
|
281 |
+
|
282 |
+
export_button.click(
|
283 |
+
fn=handle_export,
|
284 |
+
inputs=[
|
285 |
+
export_type,
|
286 |
+
keyword_input,
|
287 |
+
export_format,
|
288 |
+
include_options,
|
289 |
+
markdown_template,
|
290 |
+
custom_template
|
291 |
+
],
|
292 |
+
outputs=[export_status, export_file]
|
293 |
+
)
|
294 |
+
|
295 |
+
#
|
296 |
+
# End of Prompts_tab.py
|
297 |
+
####################################################################################################
|
App_Function_Libraries/Gradio_UI/RAG_Chat_tab.py
CHANGED
@@ -10,12 +10,26 @@ import gradio as gr
|
|
10 |
# Local Imports
|
11 |
|
12 |
from App_Function_Libraries.RAG.RAG_Library_2 import enhanced_rag_pipeline
|
|
|
|
|
|
|
13 |
#
|
14 |
########################################################################################################################
|
15 |
#
|
16 |
# Functions:
|
17 |
|
18 |
def create_rag_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
with gr.TabItem("RAG Search", visible=True):
|
20 |
gr.Markdown("# Retrieval-Augmented Generation (RAG) Search")
|
21 |
|
@@ -36,10 +50,11 @@ def create_rag_tab():
|
|
36 |
visible=False
|
37 |
)
|
38 |
|
|
|
39 |
api_choice = gr.Dropdown(
|
40 |
-
choices=["
|
41 |
-
|
42 |
-
|
43 |
)
|
44 |
search_button = gr.Button("Search")
|
45 |
|
|
|
10 |
# Local Imports
|
11 |
|
12 |
from App_Function_Libraries.RAG.RAG_Library_2 import enhanced_rag_pipeline
|
13 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
14 |
+
|
15 |
+
|
16 |
#
|
17 |
########################################################################################################################
|
18 |
#
|
19 |
# Functions:
|
20 |
|
21 |
def create_rag_tab():
|
22 |
+
try:
|
23 |
+
default_value = None
|
24 |
+
if default_api_endpoint:
|
25 |
+
if default_api_endpoint in global_api_endpoints:
|
26 |
+
default_value = format_api_name(default_api_endpoint)
|
27 |
+
else:
|
28 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
29 |
+
except Exception as e:
|
30 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
31 |
+
default_value = None
|
32 |
+
|
33 |
with gr.TabItem("RAG Search", visible=True):
|
34 |
gr.Markdown("# Retrieval-Augmented Generation (RAG) Search")
|
35 |
|
|
|
50 |
visible=False
|
51 |
)
|
52 |
|
53 |
+
# Refactored API selection dropdown
|
54 |
api_choice = gr.Dropdown(
|
55 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
56 |
+
value=default_value,
|
57 |
+
label="API for Chat Response (Optional)"
|
58 |
)
|
59 |
search_button = gr.Button("Search")
|
60 |
|
App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py
CHANGED
@@ -6,6 +6,7 @@ import csv
|
|
6 |
import logging
|
7 |
import json
|
8 |
import os
|
|
|
9 |
from datetime import datetime
|
10 |
#
|
11 |
# External Imports
|
@@ -14,32 +15,39 @@ import gradio as gr
|
|
14 |
#
|
15 |
# Local Imports
|
16 |
from App_Function_Libraries.Books.Book_Ingestion_Lib import read_epub
|
17 |
-
from App_Function_Libraries.DB.
|
18 |
-
from App_Function_Libraries.DB.
|
19 |
-
save_notes,
|
20 |
-
add_keywords_to_note,
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
get_notes_by_keywords,
|
28 |
-
get_notes_by_keyword_collection,
|
29 |
-
update_note,
|
30 |
-
clear_keywords_from_note, get_notes, get_keywords_for_note, delete_conversation, delete_note, execute_query,
|
31 |
-
add_keywords_to_conversation, fetch_all_notes, fetch_all_conversations, fetch_conversations_by_ids,
|
32 |
-
fetch_notes_by_ids,
|
33 |
-
)
|
34 |
from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_text_and_format_from_pdf
|
35 |
from App_Function_Libraries.RAG.RAG_Library_2 import generate_answer, enhanced_rag_pipeline
|
36 |
from App_Function_Libraries.RAG.RAG_QA_Chat import search_database, rag_qa_chat
|
|
|
|
|
|
|
|
|
37 |
#
|
38 |
########################################################################################################################
|
39 |
#
|
40 |
# Functions:
|
41 |
|
42 |
def create_rag_qa_chat_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
with gr.TabItem("RAG QA Chat", visible=True):
|
44 |
gr.Markdown("# RAG QA Chat")
|
45 |
|
@@ -47,18 +55,53 @@ def create_rag_qa_chat_tab():
|
|
47 |
"page": 1,
|
48 |
"context_source": "Entire Media Database",
|
49 |
"conversation_messages": [],
|
|
|
50 |
})
|
51 |
|
52 |
note_state = gr.State({"note_id": None})
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
# Update the conversation list function
|
55 |
def update_conversation_list():
|
56 |
conversations, total_pages, total_count = get_all_conversations()
|
57 |
-
choices = [
|
|
|
|
|
|
|
58 |
return choices
|
59 |
|
60 |
with gr.Row():
|
61 |
with gr.Column(scale=1):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
context_source = gr.Radio(
|
63 |
["All Files in the Database", "Search Database", "Upload File"],
|
64 |
label="Context Source",
|
@@ -71,19 +114,52 @@ def create_rag_qa_chat_tab():
|
|
71 |
next_page_btn = gr.Button("Next Page")
|
72 |
page_info = gr.HTML("Page 1")
|
73 |
top_k_input = gr.Number(value=10, label="Maximum amount of results to use (Default: 10)", minimum=1, maximum=50, step=1, precision=0, interactive=True)
|
74 |
-
keywords_input = gr.Textbox(label="Keywords (comma-separated) to filter results by)", visible=True)
|
75 |
use_query_rewriting = gr.Checkbox(label="Use Query Rewriting", value=True)
|
76 |
use_re_ranking = gr.Checkbox(label="Use Re-ranking", value=True)
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
search_query = gr.Textbox(label="Search Query", visible=False)
|
84 |
search_button = gr.Button("Search", visible=False)
|
85 |
search_results = gr.Dropdown(label="Search Results", choices=[], visible=False)
|
86 |
-
# FIXME - Add pages for search results handling
|
87 |
file_upload = gr.File(
|
88 |
label="Upload File",
|
89 |
visible=False,
|
@@ -95,34 +171,28 @@ def create_rag_qa_chat_tab():
|
|
95 |
load_conversation = gr.Dropdown(
|
96 |
label="Load Conversation",
|
97 |
choices=update_conversation_list()
|
98 |
-
|
99 |
new_conversation = gr.Button("New Conversation")
|
100 |
save_conversation_button = gr.Button("Save Conversation")
|
101 |
conversation_title = gr.Textbox(
|
102 |
-
label="Conversation Title",
|
|
|
103 |
)
|
104 |
keywords = gr.Textbox(label="Keywords (comma-separated)", visible=True)
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
api_choice = gr.Dropdown(
|
107 |
-
choices=[
|
108 |
-
|
109 |
-
|
110 |
-
"Anthropic",
|
111 |
-
"Cohere",
|
112 |
-
"Groq",
|
113 |
-
"DeepSeek",
|
114 |
-
"Mistral",
|
115 |
-
"OpenRouter",
|
116 |
-
"Llama.cpp",
|
117 |
-
"Kobold",
|
118 |
-
"Ooba",
|
119 |
-
"Tabbyapi",
|
120 |
-
"VLLM",
|
121 |
-
"ollama",
|
122 |
-
"HuggingFace",
|
123 |
-
],
|
124 |
-
label="Select API for RAG",
|
125 |
-
value="OpenAI",
|
126 |
)
|
127 |
|
128 |
with gr.Row():
|
@@ -145,6 +215,8 @@ def create_rag_qa_chat_tab():
|
|
145 |
clear_notes_btn = gr.Button("Clear Current Note text")
|
146 |
|
147 |
new_note_btn = gr.Button("New Note")
|
|
|
|
|
148 |
search_notes_by_keyword = gr.Textbox(label="Search Notes by Keyword")
|
149 |
search_notes_button = gr.Button("Search Notes")
|
150 |
note_results = gr.Dropdown(label="Notes", choices=[])
|
@@ -152,8 +224,58 @@ def create_rag_qa_chat_tab():
|
|
152 |
|
153 |
loading_indicator = gr.HTML("Loading...", visible=False)
|
154 |
status_message = gr.HTML()
|
|
|
|
|
|
|
155 |
|
156 |
# Function Definitions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
def update_state(state, **kwargs):
|
159 |
new_state = state.copy()
|
@@ -168,18 +290,28 @@ def create_rag_qa_chat_tab():
|
|
168 |
outputs=[note_title, notes, note_state]
|
169 |
)
|
170 |
|
171 |
-
def search_notes(keywords):
|
172 |
if keywords:
|
173 |
keywords_list = [kw.strip() for kw in keywords.split(',')]
|
174 |
notes_data, total_pages, total_count = get_notes_by_keywords(keywords_list)
|
175 |
-
choices = [f"Note {note_id} ({timestamp})" for
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
else:
|
178 |
-
return
|
|
|
|
|
|
|
|
|
179 |
|
180 |
search_notes_button.click(
|
181 |
search_notes,
|
182 |
-
inputs=[search_notes_by_keyword],
|
183 |
outputs=[note_results]
|
184 |
)
|
185 |
|
@@ -201,31 +333,69 @@ def create_rag_qa_chat_tab():
|
|
201 |
|
202 |
def save_notes_function(note_title_text, notes_content, keywords_content, note_state_value, state_value):
|
203 |
"""Save the notes and associated keywords to the database."""
|
204 |
-
|
205 |
-
|
206 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
if note_id:
|
208 |
-
|
209 |
update_note(note_id, note_title_text, notes_content)
|
210 |
else:
|
211 |
-
|
212 |
-
note_id = save_notes(conversation_id, note_title_text, notes_content)
|
213 |
-
note_state_value
|
|
|
|
|
|
|
214 |
if keywords_content:
|
215 |
-
|
216 |
clear_keywords_from_note(note_id)
|
217 |
-
|
|
|
|
|
218 |
|
219 |
-
logging.info("Notes
|
220 |
-
return
|
221 |
-
|
222 |
-
|
223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
save_notes_btn.click(
|
226 |
save_notes_function,
|
227 |
inputs=[note_title, notes, keywords_for_notes, note_state, state],
|
228 |
-
outputs=[notes, note_state]
|
229 |
)
|
230 |
|
231 |
def clear_notes_function():
|
@@ -237,83 +407,112 @@ def create_rag_qa_chat_tab():
|
|
237 |
outputs=[notes, note_state]
|
238 |
)
|
239 |
|
240 |
-
def update_conversation_list():
|
241 |
-
conversations, total_pages, total_count = get_all_conversations()
|
242 |
-
choices = [f"{title} (ID: {conversation_id})" for conversation_id, title in conversations]
|
243 |
-
return choices
|
244 |
-
|
245 |
# Initialize the conversation list
|
246 |
load_conversation.choices = update_conversation_list()
|
247 |
|
248 |
def load_conversation_history(selected_conversation, state_value):
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
chat_data, total_pages_val, _ = load_chat_history(conversation_id, 1, 50)
|
252 |
-
#
|
|
|
|
|
|
|
|
|
253 |
history = []
|
254 |
for role, content in chat_data:
|
255 |
if role == 'user':
|
256 |
history.append((content, ''))
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
|
|
|
|
|
|
|
|
|
|
263 |
notes_content = get_notes(conversation_id)
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
|
|
268 |
|
269 |
load_conversation.change(
|
270 |
load_conversation_history,
|
271 |
inputs=[load_conversation, state],
|
272 |
-
outputs=[chatbot, state, notes]
|
273 |
)
|
274 |
|
275 |
# Modify save_conversation_function to use gr.update()
|
276 |
-
def save_conversation_function(conversation_title_text, keywords_text, state_value):
|
277 |
conversation_messages = state_value.get("conversation_messages", [])
|
|
|
278 |
if not conversation_messages:
|
279 |
return gr.update(
|
280 |
value="<p style='color:red;'>No conversation to save.</p>"
|
281 |
-
), state_value, gr.update()
|
282 |
-
# Start a new conversation in the database
|
283 |
-
|
284 |
-
|
285 |
-
|
|
|
|
|
|
|
|
|
286 |
# Save the messages
|
287 |
for role, content in conversation_messages:
|
288 |
-
save_message(
|
289 |
# Save keywords if provided
|
290 |
if keywords_text:
|
291 |
-
add_keywords_to_conversation(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
# Update state
|
293 |
-
updated_state = update_state(state_value, conversation_id=
|
294 |
# Update the conversation list
|
295 |
conversation_choices = update_conversation_list()
|
|
|
|
|
|
|
296 |
return gr.update(
|
297 |
value="<p style='color:green;'>Conversation saved successfully.</p>"
|
298 |
-
), updated_state, gr.update(choices=conversation_choices)
|
299 |
|
300 |
save_conversation_button.click(
|
301 |
save_conversation_function,
|
302 |
-
inputs=[conversation_title, keywords, state],
|
303 |
-
outputs=[status_message, state, load_conversation]
|
304 |
)
|
305 |
|
306 |
def start_new_conversation_wrapper(title, state_value):
|
307 |
-
# Reset the state with no conversation_id
|
308 |
-
updated_state = update_state(state_value, conversation_id=None, page=1,
|
309 |
-
|
310 |
-
|
311 |
-
return [], updated_state
|
312 |
|
313 |
new_conversation.click(
|
314 |
start_new_conversation_wrapper,
|
315 |
inputs=[conversation_title, state],
|
316 |
-
outputs=[chatbot, state]
|
317 |
)
|
318 |
|
319 |
def update_file_list(page):
|
@@ -328,11 +527,12 @@ def create_rag_qa_chat_tab():
|
|
328 |
return update_file_list(max(1, current_page - 1))
|
329 |
|
330 |
def update_context_source(choice):
|
|
|
331 |
return {
|
332 |
existing_file: gr.update(visible=choice == "Existing File"),
|
333 |
-
prev_page_btn: gr.update(visible=choice == "
|
334 |
-
next_page_btn: gr.update(visible=choice == "
|
335 |
-
page_info: gr.update(visible=choice == "
|
336 |
search_query: gr.update(visible=choice == "Search Database"),
|
337 |
search_button: gr.update(visible=choice == "Search Database"),
|
338 |
search_results: gr.update(visible=choice == "Search Database"),
|
@@ -352,17 +552,36 @@ def create_rag_qa_chat_tab():
|
|
352 |
context_source.change(lambda choice: update_file_list(1) if choice == "Existing File" else (gr.update(), gr.update(), 1),
|
353 |
inputs=[context_source], outputs=[existing_file, page_info, file_page])
|
354 |
|
355 |
-
def perform_search(query):
|
356 |
try:
|
357 |
-
results =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
return gr.update(choices=results)
|
359 |
except Exception as e:
|
360 |
gr.Error(f"Error performing search: {str(e)}")
|
361 |
return gr.update(choices=[])
|
362 |
|
|
|
363 |
search_button.click(
|
364 |
perform_search,
|
365 |
-
inputs=[search_query],
|
366 |
outputs=[search_results]
|
367 |
)
|
368 |
|
@@ -384,17 +603,22 @@ Rewritten Question:"""
|
|
384 |
logging.info(f"Rephrased question: {rephrased_question}")
|
385 |
return rephrased_question.strip()
|
386 |
|
387 |
-
|
388 |
-
|
389 |
-
|
|
|
|
|
|
|
390 |
try:
|
391 |
logging.info(f"Starting rag_qa_chat_wrapper with message: {message}")
|
392 |
logging.info(f"Context source: {context_source}")
|
393 |
logging.info(f"API choice: {api_choice}")
|
394 |
logging.info(f"Query rewriting: {'enabled' if use_query_rewriting else 'disabled'}")
|
|
|
395 |
|
396 |
# Show loading indicator
|
397 |
-
yield history, "", gr.update(visible=True), state_value
|
|
|
398 |
|
399 |
conversation_id = state_value.get("conversation_id")
|
400 |
conversation_messages = state_value.get("conversation_messages", [])
|
@@ -408,12 +632,12 @@ Rewritten Question:"""
|
|
408 |
state_value["conversation_messages"] = conversation_messages
|
409 |
|
410 |
# Ensure api_choice is a string
|
411 |
-
|
412 |
-
logging.info(f"Resolved API choice: {
|
413 |
|
414 |
# Only rephrase the question if it's not the first query and query rewriting is enabled
|
415 |
if len(history) > 0 and use_query_rewriting:
|
416 |
-
rephrased_question = rephrase_question(history, message,
|
417 |
logging.info(f"Original question: {message}")
|
418 |
logging.info(f"Rephrased question: {rephrased_question}")
|
419 |
else:
|
@@ -421,18 +645,20 @@ Rewritten Question:"""
|
|
421 |
logging.info(f"Using original question: {message}")
|
422 |
|
423 |
if context_source == "All Files in the Database":
|
424 |
-
# Use the enhanced_rag_pipeline to search the
|
425 |
-
context = enhanced_rag_pipeline(
|
426 |
-
|
|
|
|
|
427 |
logging.info(f"Using enhanced_rag_pipeline for database search")
|
428 |
elif context_source == "Search Database":
|
429 |
context = f"media_id:{search_results.split('(ID: ')[1][:-1]}"
|
430 |
logging.info(f"Using search result with context: {context}")
|
431 |
-
else:
|
|
|
432 |
logging.info("Processing uploaded file")
|
433 |
if file_upload is None:
|
434 |
raise ValueError("No file uploaded")
|
435 |
-
|
436 |
# Process the uploaded file
|
437 |
file_path = file_upload.name
|
438 |
file_name = os.path.basename(file_path)
|
@@ -445,7 +671,6 @@ Rewritten Question:"""
|
|
445 |
logging.info("Reading file content")
|
446 |
with open(file_path, 'r', encoding='utf-8') as f:
|
447 |
content = f.read()
|
448 |
-
|
449 |
logging.info(f"File content length: {len(content)} characters")
|
450 |
|
451 |
# Process keywords
|
@@ -467,18 +692,17 @@ Rewritten Question:"""
|
|
467 |
author='Unknown',
|
468 |
ingestion_date=datetime.now().strftime('%Y-%m-%d')
|
469 |
)
|
470 |
-
|
471 |
logging.info(f"Result from add_media_with_keywords: {result}")
|
472 |
if isinstance(result, tuple):
|
473 |
media_id, _ = result
|
474 |
else:
|
475 |
media_id = result
|
476 |
-
|
477 |
context = f"media_id:{media_id}"
|
478 |
logging.info(f"Context for uploaded file: {context}")
|
479 |
|
480 |
logging.info("Calling rag_qa_chat function")
|
481 |
-
new_history, response = rag_qa_chat(rephrased_question, history, context,
|
|
|
482 |
# Log first 100 chars of response
|
483 |
logging.info(f"Response received from rag_qa_chat: {response[:100]}...")
|
484 |
|
@@ -490,7 +714,8 @@ Rewritten Question:"""
|
|
490 |
state_value["conversation_messages"] = conversation_messages
|
491 |
|
492 |
# Update the state
|
493 |
-
|
|
|
494 |
|
495 |
# Safely update history
|
496 |
if new_history:
|
@@ -498,24 +723,43 @@ Rewritten Question:"""
|
|
498 |
else:
|
499 |
new_history = [(message, response)]
|
500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
gr.Info("Response generated successfully")
|
502 |
logging.info("rag_qa_chat_wrapper completed successfully")
|
503 |
-
yield new_history, "", gr.update(
|
|
|
|
|
504 |
except ValueError as e:
|
505 |
logging.error(f"Input error in rag_qa_chat_wrapper: {str(e)}")
|
506 |
gr.Error(f"Input error: {str(e)}")
|
507 |
-
yield history, "", gr.update(visible=False), state_value
|
|
|
508 |
except DatabaseError as e:
|
509 |
logging.error(f"Database error in rag_qa_chat_wrapper: {str(e)}")
|
510 |
gr.Error(f"Database error: {str(e)}")
|
511 |
-
yield history, "", gr.update(visible=False), state_value
|
|
|
512 |
except Exception as e:
|
513 |
logging.error(f"Unexpected error in rag_qa_chat_wrapper: {e}", exc_info=True)
|
514 |
gr.Error("An unexpected error occurred. Please try again later.")
|
515 |
-
yield history, "", gr.update(visible=False), state_value
|
|
|
516 |
|
517 |
def clear_chat_history():
|
518 |
-
return [], ""
|
519 |
|
520 |
submit.click(
|
521 |
rag_qa_chat_wrapper,
|
@@ -532,14 +776,17 @@ Rewritten Question:"""
|
|
532 |
use_query_rewriting,
|
533 |
state,
|
534 |
keywords_input,
|
535 |
-
top_k_input
|
|
|
|
|
|
|
536 |
],
|
537 |
-
outputs=[chatbot, msg, loading_indicator, state],
|
538 |
)
|
539 |
|
540 |
clear_chat.click(
|
541 |
clear_chat_history,
|
542 |
-
outputs=[chatbot, msg]
|
543 |
)
|
544 |
|
545 |
return (
|
@@ -560,12 +807,10 @@ Rewritten Question:"""
|
|
560 |
)
|
561 |
|
562 |
|
563 |
-
|
564 |
def create_rag_qa_notes_management_tab():
|
565 |
# New Management Tab
|
566 |
with gr.TabItem("Notes Management", visible=True):
|
567 |
gr.Markdown("# RAG QA Notes Management")
|
568 |
-
|
569 |
management_state = gr.State({
|
570 |
"selected_conversation_id": None,
|
571 |
"selected_note_id": None,
|
@@ -574,7 +819,8 @@ def create_rag_qa_notes_management_tab():
|
|
574 |
with gr.Row():
|
575 |
with gr.Column(scale=1):
|
576 |
# Search Notes
|
577 |
-
|
|
|
578 |
search_notes_button = gr.Button("Search Notes")
|
579 |
notes_list = gr.Dropdown(label="Notes", choices=[])
|
580 |
|
@@ -583,24 +829,34 @@ def create_rag_qa_notes_management_tab():
|
|
583 |
delete_note_button = gr.Button("Delete Note")
|
584 |
note_title_input = gr.Textbox(label="Note Title")
|
585 |
note_content_input = gr.TextArea(label="Note Content", lines=20)
|
586 |
-
note_keywords_input = gr.Textbox(label="Note Keywords (comma-separated)")
|
587 |
save_note_button = gr.Button("Save Note")
|
588 |
create_new_note_button = gr.Button("Create New Note")
|
589 |
status_message = gr.HTML()
|
590 |
|
591 |
# Function Definitions
|
592 |
-
def search_notes(keywords):
|
593 |
if keywords:
|
594 |
keywords_list = [kw.strip() for kw in keywords.split(',')]
|
595 |
notes_data, total_pages, total_count = get_notes_by_keywords(keywords_list)
|
596 |
-
choices = [f"{title} (
|
597 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
598 |
else:
|
599 |
-
return
|
|
|
|
|
|
|
|
|
600 |
|
601 |
search_notes_button.click(
|
602 |
search_notes,
|
603 |
-
inputs=[
|
604 |
outputs=[notes_list]
|
605 |
)
|
606 |
|
@@ -664,7 +920,7 @@ def create_rag_qa_notes_management_tab():
|
|
664 |
# Reset state
|
665 |
state_value["selected_note_id"] = None
|
666 |
# Update notes list
|
667 |
-
updated_notes = search_notes("")
|
668 |
return updated_notes, gr.update(value="Note deleted successfully."), state_value
|
669 |
else:
|
670 |
return gr.update(), gr.update(value="No note selected."), state_value
|
@@ -702,7 +958,20 @@ def create_rag_qa_chat_management_tab():
|
|
702 |
with gr.Row():
|
703 |
with gr.Column(scale=1):
|
704 |
# Search Conversations
|
705 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
706 |
search_conversations_button = gr.Button("Search Conversations")
|
707 |
conversations_list = gr.Dropdown(label="Conversations", choices=[])
|
708 |
new_conversation_button = gr.Button("New Conversation")
|
@@ -716,26 +985,40 @@ def create_rag_qa_chat_management_tab():
|
|
716 |
status_message = gr.HTML()
|
717 |
|
718 |
# Function Definitions
|
719 |
-
def search_conversations(keywords):
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
725 |
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
|
|
|
|
733 |
|
734 |
-
|
735 |
|
|
|
|
|
|
|
|
|
|
|
736 |
search_conversations_button.click(
|
737 |
search_conversations,
|
738 |
-
inputs=[
|
739 |
outputs=[conversations_list, conversation_mapping]
|
740 |
)
|
741 |
|
@@ -892,19 +1175,18 @@ def create_rag_qa_chat_management_tab():
|
|
892 |
]
|
893 |
)
|
894 |
|
895 |
-
def
|
896 |
-
"""
|
897 |
try:
|
898 |
-
|
899 |
logging.info(f"Messages in conversation '{conversation_id}' deleted successfully.")
|
900 |
except Exception as e:
|
901 |
logging.error(f"Error deleting messages in conversation '{conversation_id}': {e}")
|
902 |
raise
|
903 |
|
904 |
-
def
|
905 |
"""Helper function to get the conversation title."""
|
906 |
-
|
907 |
-
result = execute_query(query, (conversation_id,))
|
908 |
if result:
|
909 |
return result[0][0]
|
910 |
else:
|
@@ -1034,19 +1316,6 @@ def create_export_data_tab():
|
|
1034 |
)
|
1035 |
|
1036 |
|
1037 |
-
|
1038 |
-
|
1039 |
-
def update_conversation_title(conversation_id, new_title):
|
1040 |
-
"""Update the title of a conversation."""
|
1041 |
-
try:
|
1042 |
-
query = "UPDATE conversation_metadata SET title = ? WHERE conversation_id = ?"
|
1043 |
-
execute_query(query, (new_title, conversation_id))
|
1044 |
-
logging.info(f"Conversation '{conversation_id}' title updated to '{new_title}'")
|
1045 |
-
except Exception as e:
|
1046 |
-
logging.error(f"Error updating conversation title: {e}")
|
1047 |
-
raise
|
1048 |
-
|
1049 |
-
|
1050 |
def convert_file_to_text(file_path):
|
1051 |
"""Convert various file types to plain text."""
|
1052 |
file_extension = os.path.splitext(file_path)[1].lower()
|
|
|
6 |
import logging
|
7 |
import json
|
8 |
import os
|
9 |
+
import re
|
10 |
from datetime import datetime
|
11 |
#
|
12 |
# External Imports
|
|
|
15 |
#
|
16 |
# Local Imports
|
17 |
from App_Function_Libraries.Books.Book_Ingestion_Lib import read_epub
|
18 |
+
from App_Function_Libraries.DB.Character_Chat_DB import search_character_chat, search_character_cards
|
19 |
+
from App_Function_Libraries.DB.DB_Manager import DatabaseError, get_paginated_files, add_media_with_keywords, \
|
20 |
+
get_all_conversations, get_note_by_id, get_notes_by_keywords, start_new_conversation, update_note, save_notes, \
|
21 |
+
clear_keywords_from_note, add_keywords_to_note, load_chat_history, save_message, add_keywords_to_conversation, \
|
22 |
+
get_keywords_for_note, delete_note, search_conversations_by_keywords, get_conversation_title, delete_conversation, \
|
23 |
+
update_conversation_title, fetch_all_conversations, fetch_all_notes, fetch_conversations_by_ids, fetch_notes_by_ids, \
|
24 |
+
search_media_db, search_notes_titles, list_prompts
|
25 |
+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_notes, delete_messages_in_conversation, search_rag_notes, \
|
26 |
+
search_rag_chat, get_conversation_rating, set_conversation_rating
|
27 |
+
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_text_and_format_from_pdf
|
29 |
from App_Function_Libraries.RAG.RAG_Library_2 import generate_answer, enhanced_rag_pipeline
|
30 |
from App_Function_Libraries.RAG.RAG_QA_Chat import search_database, rag_qa_chat
|
31 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name, \
|
32 |
+
load_comprehensive_config
|
33 |
+
|
34 |
+
|
35 |
#
|
36 |
########################################################################################################################
|
37 |
#
|
38 |
# Functions:
|
39 |
|
40 |
def create_rag_qa_chat_tab():
|
41 |
+
try:
|
42 |
+
default_value = None
|
43 |
+
if default_api_endpoint:
|
44 |
+
if default_api_endpoint in global_api_endpoints:
|
45 |
+
default_value = format_api_name(default_api_endpoint)
|
46 |
+
else:
|
47 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
48 |
+
except Exception as e:
|
49 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
50 |
+
default_value = None
|
51 |
with gr.TabItem("RAG QA Chat", visible=True):
|
52 |
gr.Markdown("# RAG QA Chat")
|
53 |
|
|
|
55 |
"page": 1,
|
56 |
"context_source": "Entire Media Database",
|
57 |
"conversation_messages": [],
|
58 |
+
"conversation_id": None
|
59 |
})
|
60 |
|
61 |
note_state = gr.State({"note_id": None})
|
62 |
|
63 |
+
def auto_save_conversation(message, response, state_value, auto_save_enabled):
|
64 |
+
"""Automatically save the conversation if auto-save is enabled"""
|
65 |
+
try:
|
66 |
+
if not auto_save_enabled:
|
67 |
+
return state_value
|
68 |
+
|
69 |
+
conversation_id = state_value.get("conversation_id")
|
70 |
+
if not conversation_id:
|
71 |
+
# Create new conversation with default title
|
72 |
+
title = "Auto-saved Conversation " + datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
73 |
+
conversation_id = start_new_conversation(title=title)
|
74 |
+
state_value = state_value.copy()
|
75 |
+
state_value["conversation_id"] = conversation_id
|
76 |
+
|
77 |
+
# Save the messages
|
78 |
+
save_message(conversation_id, "user", message)
|
79 |
+
save_message(conversation_id, "assistant", response)
|
80 |
+
|
81 |
+
return state_value
|
82 |
+
except Exception as e:
|
83 |
+
logging.error(f"Error in auto-save: {str(e)}")
|
84 |
+
return state_value
|
85 |
+
|
86 |
# Update the conversation list function
|
87 |
def update_conversation_list():
|
88 |
conversations, total_pages, total_count = get_all_conversations()
|
89 |
+
choices = [
|
90 |
+
f"{conversation['title']} (ID: {conversation['conversation_id']}) - Rating: {conversation['rating'] or 'Not Rated'}"
|
91 |
+
for conversation in conversations
|
92 |
+
]
|
93 |
return choices
|
94 |
|
95 |
with gr.Row():
|
96 |
with gr.Column(scale=1):
|
97 |
+
# FIXME - Offer the user to search 2+ databases at once
|
98 |
+
database_types = ["Media DB", "RAG Chat", "RAG Notes", "Character Chat", "Character Cards"]
|
99 |
+
db_choice = gr.CheckboxGroup(
|
100 |
+
label="Select Database(s)",
|
101 |
+
choices=database_types,
|
102 |
+
value=["Media DB"],
|
103 |
+
interactive=True
|
104 |
+
)
|
105 |
context_source = gr.Radio(
|
106 |
["All Files in the Database", "Search Database", "Upload File"],
|
107 |
label="Context Source",
|
|
|
114 |
next_page_btn = gr.Button("Next Page")
|
115 |
page_info = gr.HTML("Page 1")
|
116 |
top_k_input = gr.Number(value=10, label="Maximum amount of results to use (Default: 10)", minimum=1, maximum=50, step=1, precision=0, interactive=True)
|
117 |
+
keywords_input = gr.Textbox(label="Keywords (comma-separated) to filter results by)", value="rag_qa_default_keyword" ,visible=True)
|
118 |
use_query_rewriting = gr.Checkbox(label="Use Query Rewriting", value=True)
|
119 |
use_re_ranking = gr.Checkbox(label="Use Re-ranking", value=True)
|
120 |
+
config = load_comprehensive_config()
|
121 |
+
auto_save_value = config.getboolean('auto-save', 'save_character_chats', fallback=False)
|
122 |
+
auto_save_checkbox = gr.Checkbox(
|
123 |
+
label="Save chats automatically",
|
124 |
+
value=auto_save_value,
|
125 |
+
info="When enabled, conversations will be saved automatically after each message"
|
126 |
+
)
|
127 |
+
|
128 |
+
initial_prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
|
129 |
|
130 |
+
preset_prompt_checkbox = gr.Checkbox(
|
131 |
+
label="View Custom Prompts(have to copy/paste them)",
|
132 |
+
value=False,
|
133 |
+
visible=True
|
134 |
+
)
|
135 |
+
|
136 |
+
with gr.Row(visible=False) as preset_prompt_controls:
|
137 |
+
prev_prompt_page = gr.Button("Previous")
|
138 |
+
current_prompt_page_text = gr.Text(f"Page {current_page} of {total_pages}")
|
139 |
+
next_prompt_page = gr.Button("Next")
|
140 |
+
current_prompt_page_state = gr.State(value=1)
|
141 |
+
|
142 |
+
preset_prompt = gr.Dropdown(
|
143 |
+
label="Select Preset Prompt",
|
144 |
+
choices=initial_prompts,
|
145 |
+
visible=False
|
146 |
+
)
|
147 |
+
user_prompt = gr.Textbox(
|
148 |
+
label="Custom Prompt",
|
149 |
+
placeholder="Enter custom prompt here",
|
150 |
+
lines=3,
|
151 |
+
visible=False
|
152 |
+
)
|
153 |
+
|
154 |
+
system_prompt_input = gr.Textbox(
|
155 |
+
label="System Prompt",
|
156 |
+
lines=3,
|
157 |
+
visible=False
|
158 |
+
)
|
159 |
|
160 |
search_query = gr.Textbox(label="Search Query", visible=False)
|
161 |
search_button = gr.Button("Search", visible=False)
|
162 |
search_results = gr.Dropdown(label="Search Results", choices=[], visible=False)
|
|
|
163 |
file_upload = gr.File(
|
164 |
label="Upload File",
|
165 |
visible=False,
|
|
|
171 |
load_conversation = gr.Dropdown(
|
172 |
label="Load Conversation",
|
173 |
choices=update_conversation_list()
|
174 |
+
)
|
175 |
new_conversation = gr.Button("New Conversation")
|
176 |
save_conversation_button = gr.Button("Save Conversation")
|
177 |
conversation_title = gr.Textbox(
|
178 |
+
label="Conversation Title",
|
179 |
+
placeholder="Enter a title for the new conversation"
|
180 |
)
|
181 |
keywords = gr.Textbox(label="Keywords (comma-separated)", visible=True)
|
182 |
|
183 |
+
# Add the rating display and input
|
184 |
+
rating_display = gr.Markdown(value="", visible=False)
|
185 |
+
rating_input = gr.Radio(
|
186 |
+
choices=["1", "2", "3"],
|
187 |
+
label="Rate this Conversation (1-3 stars)",
|
188 |
+
visible=False
|
189 |
+
)
|
190 |
+
|
191 |
+
# Refactored API selection dropdown
|
192 |
api_choice = gr.Dropdown(
|
193 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
194 |
+
value=default_value,
|
195 |
+
label="API for Chat Response (Optional)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
)
|
197 |
|
198 |
with gr.Row():
|
|
|
215 |
clear_notes_btn = gr.Button("Clear Current Note text")
|
216 |
|
217 |
new_note_btn = gr.Button("New Note")
|
218 |
+
# FIXME - Change from only keywords to generalized search
|
219 |
+
search_notes_title = gr.Textbox(label="Search Notes by Title")
|
220 |
search_notes_by_keyword = gr.Textbox(label="Search Notes by Keyword")
|
221 |
search_notes_button = gr.Button("Search Notes")
|
222 |
note_results = gr.Dropdown(label="Notes", choices=[])
|
|
|
224 |
|
225 |
loading_indicator = gr.HTML("Loading...", visible=False)
|
226 |
status_message = gr.HTML()
|
227 |
+
auto_save_status = gr.HTML()
|
228 |
+
|
229 |
+
|
230 |
|
231 |
# Function Definitions
|
232 |
+
def update_prompt_page(direction, current_page_val):
|
233 |
+
new_page = max(1, min(total_pages, current_page_val + direction))
|
234 |
+
prompts, _, _ = list_prompts(page=new_page, per_page=10)
|
235 |
+
return (
|
236 |
+
gr.update(choices=prompts),
|
237 |
+
gr.update(value=f"Page {new_page} of {total_pages}"),
|
238 |
+
new_page
|
239 |
+
)
|
240 |
+
|
241 |
+
def update_prompts(preset_name):
|
242 |
+
prompts = update_user_prompt(preset_name)
|
243 |
+
return (
|
244 |
+
gr.update(value=prompts["user_prompt"], visible=True),
|
245 |
+
gr.update(value=prompts["system_prompt"], visible=True)
|
246 |
+
)
|
247 |
+
|
248 |
+
def toggle_preset_prompt(checkbox_value):
|
249 |
+
return (
|
250 |
+
gr.update(visible=checkbox_value),
|
251 |
+
gr.update(visible=checkbox_value),
|
252 |
+
gr.update(visible=False),
|
253 |
+
gr.update(visible=False)
|
254 |
+
)
|
255 |
+
|
256 |
+
prev_prompt_page.click(
|
257 |
+
lambda x: update_prompt_page(-1, x),
|
258 |
+
inputs=[current_prompt_page_state],
|
259 |
+
outputs=[preset_prompt, current_prompt_page_text, current_prompt_page_state]
|
260 |
+
)
|
261 |
+
|
262 |
+
next_prompt_page.click(
|
263 |
+
lambda x: update_prompt_page(1, x),
|
264 |
+
inputs=[current_prompt_page_state],
|
265 |
+
outputs=[preset_prompt, current_prompt_page_text, current_prompt_page_state]
|
266 |
+
)
|
267 |
+
|
268 |
+
preset_prompt.change(
|
269 |
+
update_prompts,
|
270 |
+
inputs=preset_prompt,
|
271 |
+
outputs=[user_prompt, system_prompt_input]
|
272 |
+
)
|
273 |
+
|
274 |
+
preset_prompt_checkbox.change(
|
275 |
+
toggle_preset_prompt,
|
276 |
+
inputs=[preset_prompt_checkbox],
|
277 |
+
outputs=[preset_prompt, preset_prompt_controls, user_prompt, system_prompt_input]
|
278 |
+
)
|
279 |
|
280 |
def update_state(state, **kwargs):
|
281 |
new_state = state.copy()
|
|
|
290 |
outputs=[note_title, notes, note_state]
|
291 |
)
|
292 |
|
293 |
+
def search_notes(search_notes_title, keywords):
|
294 |
if keywords:
|
295 |
keywords_list = [kw.strip() for kw in keywords.split(',')]
|
296 |
notes_data, total_pages, total_count = get_notes_by_keywords(keywords_list)
|
297 |
+
choices = [f"Note {note_id} - {title} ({timestamp})" for
|
298 |
+
note_id, title, content, timestamp, conversation_id in notes_data]
|
299 |
+
return gr.update(choices=choices, label=f"Found {total_count} notes")
|
300 |
+
elif search_notes_title:
|
301 |
+
notes_data, total_pages, total_count = search_notes_titles(search_notes_title)
|
302 |
+
choices = [f"Note {note_id} - {title} ({timestamp})" for
|
303 |
+
note_id, title, content, timestamp, conversation_id in notes_data]
|
304 |
+
return gr.update(choices=choices, label=f"Found {total_count} notes")
|
305 |
else:
|
306 |
+
# This will now return all notes, ordered by timestamp
|
307 |
+
notes_data, total_pages, total_count = search_notes_titles("")
|
308 |
+
choices = [f"Note {note_id} - {title} ({timestamp})" for
|
309 |
+
note_id, title, content, timestamp, conversation_id in notes_data]
|
310 |
+
return gr.update(choices=choices, label=f"All notes ({total_count} total)")
|
311 |
|
312 |
search_notes_button.click(
|
313 |
search_notes,
|
314 |
+
inputs=[search_notes_title, search_notes_by_keyword],
|
315 |
outputs=[note_results]
|
316 |
)
|
317 |
|
|
|
333 |
|
334 |
def save_notes_function(note_title_text, notes_content, keywords_content, note_state_value, state_value):
|
335 |
"""Save the notes and associated keywords to the database."""
|
336 |
+
logging.info(f"Starting save_notes_function with state: {state_value}")
|
337 |
+
logging.info(f"Note title: {note_title_text}")
|
338 |
+
logging.info(f"Notes content length: {len(notes_content) if notes_content else 0}")
|
339 |
+
|
340 |
+
try:
|
341 |
+
# Check current state
|
342 |
+
conversation_id = state_value.get("conversation_id")
|
343 |
+
logging.info(f"Current conversation_id: {conversation_id}")
|
344 |
+
|
345 |
+
# Create new conversation if none exists
|
346 |
+
if not conversation_id:
|
347 |
+
logging.info("No conversation ID found, creating new conversation")
|
348 |
+
conversation_title = note_title_text if note_title_text else "Untitled Conversation"
|
349 |
+
conversation_id = start_new_conversation(title=conversation_title)
|
350 |
+
state_value = state_value.copy() # Create a new copy of the state
|
351 |
+
state_value["conversation_id"] = conversation_id
|
352 |
+
logging.info(f"Created new conversation with ID: {conversation_id}")
|
353 |
+
|
354 |
+
if not notes_content:
|
355 |
+
logging.warning("No notes content provided")
|
356 |
+
return notes_content, note_state_value, state_value, gr.update(
|
357 |
+
value="<p style='color:red;'>Cannot save empty notes.</p>")
|
358 |
+
|
359 |
+
# Save or update note
|
360 |
+
note_id = note_state_value.get("note_id")
|
361 |
if note_id:
|
362 |
+
logging.info(f"Updating existing note with ID: {note_id}")
|
363 |
update_note(note_id, note_title_text, notes_content)
|
364 |
else:
|
365 |
+
logging.info(f"Creating new note for conversation: {conversation_id}")
|
366 |
+
note_id = save_notes(conversation_id, note_title_text or "Untitled Note", notes_content)
|
367 |
+
note_state_value = {"note_id": note_id}
|
368 |
+
logging.info(f"Created new note with ID: {note_id}")
|
369 |
+
|
370 |
+
# Handle keywords
|
371 |
if keywords_content:
|
372 |
+
logging.info("Processing keywords")
|
373 |
clear_keywords_from_note(note_id)
|
374 |
+
keywords = [kw.strip() for kw in keywords_content.split(',')]
|
375 |
+
add_keywords_to_note(note_id, keywords)
|
376 |
+
logging.info(f"Added keywords: {keywords}")
|
377 |
|
378 |
+
logging.info("Notes saved successfully")
|
379 |
+
return (
|
380 |
+
notes_content,
|
381 |
+
note_state_value,
|
382 |
+
state_value,
|
383 |
+
gr.update(value="<p style='color:green;'>Notes saved successfully!</p>")
|
384 |
+
)
|
385 |
+
|
386 |
+
except Exception as e:
|
387 |
+
logging.error(f"Error in save_notes_function: {str(e)}", exc_info=True)
|
388 |
+
return (
|
389 |
+
notes_content,
|
390 |
+
note_state_value,
|
391 |
+
state_value,
|
392 |
+
gr.update(value=f"<p style='color:red;'>Error saving notes: {str(e)}</p>")
|
393 |
+
)
|
394 |
|
395 |
save_notes_btn.click(
|
396 |
save_notes_function,
|
397 |
inputs=[note_title, notes, keywords_for_notes, note_state, state],
|
398 |
+
outputs=[notes, note_state, state, status_message]
|
399 |
)
|
400 |
|
401 |
def clear_notes_function():
|
|
|
407 |
outputs=[notes, note_state]
|
408 |
)
|
409 |
|
|
|
|
|
|
|
|
|
|
|
410 |
# Initialize the conversation list
|
411 |
load_conversation.choices = update_conversation_list()
|
412 |
|
413 |
def load_conversation_history(selected_conversation, state_value):
|
414 |
+
try:
|
415 |
+
if not selected_conversation:
|
416 |
+
return [], state_value, "", gr.update(value="", visible=False), gr.update(visible=False)
|
417 |
+
# Extract conversation ID
|
418 |
+
match = re.search(r'\(ID: ([0-9a-fA-F\-]+)\)', selected_conversation)
|
419 |
+
if not match:
|
420 |
+
logging.error(f"Invalid conversation format: {selected_conversation}")
|
421 |
+
return [], state_value, "", gr.update(value="", visible=False), gr.update(visible=False)
|
422 |
+
conversation_id = match.group(1)
|
423 |
chat_data, total_pages_val, _ = load_chat_history(conversation_id, 1, 50)
|
424 |
+
# Update state with valid conversation id
|
425 |
+
updated_state = state_value.copy()
|
426 |
+
updated_state["conversation_id"] = conversation_id
|
427 |
+
updated_state["conversation_messages"] = chat_data
|
428 |
+
# Format chat history
|
429 |
history = []
|
430 |
for role, content in chat_data:
|
431 |
if role == 'user':
|
432 |
history.append((content, ''))
|
433 |
+
elif history:
|
434 |
+
history[-1] = (history[-1][0], content)
|
435 |
+
# Fetch and display the conversation rating
|
436 |
+
rating = get_conversation_rating(conversation_id)
|
437 |
+
if rating is not None:
|
438 |
+
rating_text = f"**Current Rating:** {rating} star(s)"
|
439 |
+
rating_display_update = gr.update(value=rating_text, visible=True)
|
440 |
+
rating_input_update = gr.update(value=str(rating), visible=True)
|
441 |
+
else:
|
442 |
+
rating_display_update = gr.update(value="**Current Rating:** Not Rated", visible=True)
|
443 |
+
rating_input_update = gr.update(value=None, visible=True)
|
444 |
notes_content = get_notes(conversation_id)
|
445 |
+
return history, updated_state, "\n".join(
|
446 |
+
notes_content) if notes_content else "", rating_display_update, rating_input_update
|
447 |
+
except Exception as e:
|
448 |
+
logging.error(f"Error loading conversation: {str(e)}")
|
449 |
+
return [], state_value, "", gr.update(value="", visible=False), gr.update(visible=False)
|
450 |
|
451 |
load_conversation.change(
|
452 |
load_conversation_history,
|
453 |
inputs=[load_conversation, state],
|
454 |
+
outputs=[chatbot, state, notes, rating_display, rating_input]
|
455 |
)
|
456 |
|
457 |
# Modify save_conversation_function to use gr.update()
|
458 |
+
def save_conversation_function(conversation_title_text, keywords_text, rating_value, state_value):
|
459 |
conversation_messages = state_value.get("conversation_messages", [])
|
460 |
+
conversation_id = state_value.get("conversation_id")
|
461 |
if not conversation_messages:
|
462 |
return gr.update(
|
463 |
value="<p style='color:red;'>No conversation to save.</p>"
|
464 |
+
), state_value, gr.update(), gr.update(value="", visible=False), gr.update(visible=False)
|
465 |
+
# Start a new conversation in the database if not existing
|
466 |
+
if not conversation_id:
|
467 |
+
conversation_id = start_new_conversation(
|
468 |
+
conversation_title_text if conversation_title_text else "Untitled Conversation"
|
469 |
+
)
|
470 |
+
else:
|
471 |
+
# Update the conversation title if it has changed
|
472 |
+
update_conversation_title(conversation_id, conversation_title_text)
|
473 |
# Save the messages
|
474 |
for role, content in conversation_messages:
|
475 |
+
save_message(conversation_id, role, content)
|
476 |
# Save keywords if provided
|
477 |
if keywords_text:
|
478 |
+
add_keywords_to_conversation(conversation_id, [kw.strip() for kw in keywords_text.split(',')])
|
479 |
+
# Save the rating if provided
|
480 |
+
try:
|
481 |
+
if rating_value:
|
482 |
+
set_conversation_rating(conversation_id, int(rating_value))
|
483 |
+
except ValueError as ve:
|
484 |
+
logging.error(f"Invalid rating value: {ve}")
|
485 |
+
return gr.update(
|
486 |
+
value=f"<p style='color:red;'>Invalid rating: {ve}</p>"
|
487 |
+
), state_value, gr.update(), gr.update(value="", visible=False), gr.update(visible=False)
|
488 |
+
|
489 |
# Update state
|
490 |
+
updated_state = update_state(state_value, conversation_id=conversation_id)
|
491 |
# Update the conversation list
|
492 |
conversation_choices = update_conversation_list()
|
493 |
+
# Reset rating display and input
|
494 |
+
rating_display_update = gr.update(value=f"**Current Rating:** {rating_value} star(s)", visible=True)
|
495 |
+
rating_input_update = gr.update(value=rating_value, visible=True)
|
496 |
return gr.update(
|
497 |
value="<p style='color:green;'>Conversation saved successfully.</p>"
|
498 |
+
), updated_state, gr.update(choices=conversation_choices), rating_display_update, rating_input_update
|
499 |
|
500 |
save_conversation_button.click(
|
501 |
save_conversation_function,
|
502 |
+
inputs=[conversation_title, keywords, rating_input, state],
|
503 |
+
outputs=[status_message, state, load_conversation, rating_display, rating_input]
|
504 |
)
|
505 |
|
506 |
def start_new_conversation_wrapper(title, state_value):
|
507 |
+
# Reset the state with no conversation_id and empty conversation messages
|
508 |
+
updated_state = update_state(state_value, conversation_id=None, page=1, conversation_messages=[])
|
509 |
+
# Clear the chat history and reset rating components
|
510 |
+
return [], updated_state, gr.update(value="", visible=False), gr.update(value=None, visible=False)
|
|
|
511 |
|
512 |
new_conversation.click(
|
513 |
start_new_conversation_wrapper,
|
514 |
inputs=[conversation_title, state],
|
515 |
+
outputs=[chatbot, state, rating_display, rating_input]
|
516 |
)
|
517 |
|
518 |
def update_file_list(page):
|
|
|
527 |
return update_file_list(max(1, current_page - 1))
|
528 |
|
529 |
def update_context_source(choice):
|
530 |
+
# Update visibility based on context source choice
|
531 |
return {
|
532 |
existing_file: gr.update(visible=choice == "Existing File"),
|
533 |
+
prev_page_btn: gr.update(visible=choice == "Search Database"),
|
534 |
+
next_page_btn: gr.update(visible=choice == "Search Database"),
|
535 |
+
page_info: gr.update(visible=choice == "Search Database"),
|
536 |
search_query: gr.update(visible=choice == "Search Database"),
|
537 |
search_button: gr.update(visible=choice == "Search Database"),
|
538 |
search_results: gr.update(visible=choice == "Search Database"),
|
|
|
552 |
context_source.change(lambda choice: update_file_list(1) if choice == "Existing File" else (gr.update(), gr.update(), 1),
|
553 |
inputs=[context_source], outputs=[existing_file, page_info, file_page])
|
554 |
|
555 |
+
def perform_search(query, selected_databases, keywords):
|
556 |
try:
|
557 |
+
results = []
|
558 |
+
|
559 |
+
# Iterate over selected database types and perform searches accordingly
|
560 |
+
for database_type in selected_databases:
|
561 |
+
if database_type == "Media DB":
|
562 |
+
# FIXME - check for existence of keywords before setting as search field
|
563 |
+
search_fields = ["title", "content", "keywords"]
|
564 |
+
results += search_media_db(query, search_fields, keywords, page=1, results_per_page=25)
|
565 |
+
elif database_type == "RAG Chat":
|
566 |
+
results += search_rag_chat(query)
|
567 |
+
elif database_type == "RAG Notes":
|
568 |
+
results += search_rag_notes(query)
|
569 |
+
elif database_type == "Character Chat":
|
570 |
+
results += search_character_chat(query)
|
571 |
+
elif database_type == "Character Cards":
|
572 |
+
results += search_character_cards(query)
|
573 |
+
|
574 |
+
# Remove duplicate results if necessary
|
575 |
+
results = list(set(results))
|
576 |
return gr.update(choices=results)
|
577 |
except Exception as e:
|
578 |
gr.Error(f"Error performing search: {str(e)}")
|
579 |
return gr.update(choices=[])
|
580 |
|
581 |
+
# Click Event for the DB Search Button
|
582 |
search_button.click(
|
583 |
perform_search,
|
584 |
+
inputs=[search_query, db_choice, keywords_input],
|
585 |
outputs=[search_results]
|
586 |
)
|
587 |
|
|
|
603 |
logging.info(f"Rephrased question: {rephrased_question}")
|
604 |
return rephrased_question.strip()
|
605 |
|
606 |
+
# FIXME - RAG DB selection
|
607 |
+
def rag_qa_chat_wrapper(
|
608 |
+
message, history, context_source, existing_file, search_results, file_upload,
|
609 |
+
convert_to_text, keywords, api_choice, use_query_rewriting, state_value,
|
610 |
+
keywords_input, top_k_input, use_re_ranking, db_choices, auto_save_enabled
|
611 |
+
):
|
612 |
try:
|
613 |
logging.info(f"Starting rag_qa_chat_wrapper with message: {message}")
|
614 |
logging.info(f"Context source: {context_source}")
|
615 |
logging.info(f"API choice: {api_choice}")
|
616 |
logging.info(f"Query rewriting: {'enabled' if use_query_rewriting else 'disabled'}")
|
617 |
+
logging.info(f"Selected DB Choices: {db_choices}")
|
618 |
|
619 |
# Show loading indicator
|
620 |
+
yield history, "", gr.update(visible=True), state_value, gr.update(visible=False), gr.update(
|
621 |
+
visible=False)
|
622 |
|
623 |
conversation_id = state_value.get("conversation_id")
|
624 |
conversation_messages = state_value.get("conversation_messages", [])
|
|
|
632 |
state_value["conversation_messages"] = conversation_messages
|
633 |
|
634 |
# Ensure api_choice is a string
|
635 |
+
api_choice_str = api_choice.value if isinstance(api_choice, gr.components.Dropdown) else api_choice
|
636 |
+
logging.info(f"Resolved API choice: {api_choice_str}")
|
637 |
|
638 |
# Only rephrase the question if it's not the first query and query rewriting is enabled
|
639 |
if len(history) > 0 and use_query_rewriting:
|
640 |
+
rephrased_question = rephrase_question(history, message, api_choice_str)
|
641 |
logging.info(f"Original question: {message}")
|
642 |
logging.info(f"Rephrased question: {rephrased_question}")
|
643 |
else:
|
|
|
645 |
logging.info(f"Using original question: {message}")
|
646 |
|
647 |
if context_source == "All Files in the Database":
|
648 |
+
# Use the enhanced_rag_pipeline to search the selected databases
|
649 |
+
context = enhanced_rag_pipeline(
|
650 |
+
rephrased_question, api_choice_str, keywords_input, top_k_input, use_re_ranking,
|
651 |
+
database_types=db_choices # Pass the list of selected databases
|
652 |
+
)
|
653 |
logging.info(f"Using enhanced_rag_pipeline for database search")
|
654 |
elif context_source == "Search Database":
|
655 |
context = f"media_id:{search_results.split('(ID: ')[1][:-1]}"
|
656 |
logging.info(f"Using search result with context: {context}")
|
657 |
+
else:
|
658 |
+
# Upload File
|
659 |
logging.info("Processing uploaded file")
|
660 |
if file_upload is None:
|
661 |
raise ValueError("No file uploaded")
|
|
|
662 |
# Process the uploaded file
|
663 |
file_path = file_upload.name
|
664 |
file_name = os.path.basename(file_path)
|
|
|
671 |
logging.info("Reading file content")
|
672 |
with open(file_path, 'r', encoding='utf-8') as f:
|
673 |
content = f.read()
|
|
|
674 |
logging.info(f"File content length: {len(content)} characters")
|
675 |
|
676 |
# Process keywords
|
|
|
692 |
author='Unknown',
|
693 |
ingestion_date=datetime.now().strftime('%Y-%m-%d')
|
694 |
)
|
|
|
695 |
logging.info(f"Result from add_media_with_keywords: {result}")
|
696 |
if isinstance(result, tuple):
|
697 |
media_id, _ = result
|
698 |
else:
|
699 |
media_id = result
|
|
|
700 |
context = f"media_id:{media_id}"
|
701 |
logging.info(f"Context for uploaded file: {context}")
|
702 |
|
703 |
logging.info("Calling rag_qa_chat function")
|
704 |
+
new_history, response = rag_qa_chat(rephrased_question, history, context, api_choice_str)
|
705 |
+
|
706 |
# Log first 100 chars of response
|
707 |
logging.info(f"Response received from rag_qa_chat: {response[:100]}...")
|
708 |
|
|
|
714 |
state_value["conversation_messages"] = conversation_messages
|
715 |
|
716 |
# Update the state
|
717 |
+
updated_state = auto_save_conversation(message, response, state_value, auto_save_enabled)
|
718 |
+
updated_state["conversation_messages"] = conversation_messages
|
719 |
|
720 |
# Safely update history
|
721 |
if new_history:
|
|
|
723 |
else:
|
724 |
new_history = [(message, response)]
|
725 |
|
726 |
+
# Get the current rating and update display
|
727 |
+
conversation_id = updated_state.get("conversation_id")
|
728 |
+
if conversation_id:
|
729 |
+
rating = get_conversation_rating(conversation_id)
|
730 |
+
if rating is not None:
|
731 |
+
rating_display_update = gr.update(value=f"**Current Rating:** {rating} star(s)", visible=True)
|
732 |
+
rating_input_update = gr.update(value=str(rating), visible=True)
|
733 |
+
else:
|
734 |
+
rating_display_update = gr.update(value="**Current Rating:** Not Rated", visible=True)
|
735 |
+
rating_input_update = gr.update(value=None, visible=True)
|
736 |
+
else:
|
737 |
+
rating_display_update = gr.update(value="", visible=False)
|
738 |
+
rating_input_update = gr.update(value=None, visible=False)
|
739 |
+
|
740 |
gr.Info("Response generated successfully")
|
741 |
logging.info("rag_qa_chat_wrapper completed successfully")
|
742 |
+
yield new_history, "", gr.update(
|
743 |
+
visible=False), updated_state, rating_display_update, rating_input_update
|
744 |
+
|
745 |
except ValueError as e:
|
746 |
logging.error(f"Input error in rag_qa_chat_wrapper: {str(e)}")
|
747 |
gr.Error(f"Input error: {str(e)}")
|
748 |
+
yield history, "", gr.update(visible=False), state_value, gr.update(visible=False), gr.update(
|
749 |
+
visible=False)
|
750 |
except DatabaseError as e:
|
751 |
logging.error(f"Database error in rag_qa_chat_wrapper: {str(e)}")
|
752 |
gr.Error(f"Database error: {str(e)}")
|
753 |
+
yield history, "", gr.update(visible=False), state_value, gr.update(visible=False), gr.update(
|
754 |
+
visible=False)
|
755 |
except Exception as e:
|
756 |
logging.error(f"Unexpected error in rag_qa_chat_wrapper: {e}", exc_info=True)
|
757 |
gr.Error("An unexpected error occurred. Please try again later.")
|
758 |
+
yield history, "", gr.update(visible=False), state_value, gr.update(visible=False), gr.update(
|
759 |
+
visible=False)
|
760 |
|
761 |
def clear_chat_history():
|
762 |
+
return [], "", gr.update(value="", visible=False), gr.update(value=None, visible=False)
|
763 |
|
764 |
submit.click(
|
765 |
rag_qa_chat_wrapper,
|
|
|
776 |
use_query_rewriting,
|
777 |
state,
|
778 |
keywords_input,
|
779 |
+
top_k_input,
|
780 |
+
use_re_ranking,
|
781 |
+
db_choice,
|
782 |
+
auto_save_checkbox
|
783 |
],
|
784 |
+
outputs=[chatbot, msg, loading_indicator, state, rating_display, rating_input],
|
785 |
)
|
786 |
|
787 |
clear_chat.click(
|
788 |
clear_chat_history,
|
789 |
+
outputs=[chatbot, msg, rating_display, rating_input]
|
790 |
)
|
791 |
|
792 |
return (
|
|
|
807 |
)
|
808 |
|
809 |
|
|
|
810 |
def create_rag_qa_notes_management_tab():
|
811 |
# New Management Tab
|
812 |
with gr.TabItem("Notes Management", visible=True):
|
813 |
gr.Markdown("# RAG QA Notes Management")
|
|
|
814 |
management_state = gr.State({
|
815 |
"selected_conversation_id": None,
|
816 |
"selected_note_id": None,
|
|
|
819 |
with gr.Row():
|
820 |
with gr.Column(scale=1):
|
821 |
# Search Notes
|
822 |
+
search_notes_title = gr.Textbox(label="Search Notes by Title")
|
823 |
+
search_notes_by_keyword = gr.Textbox(label="Search Notes by Keywords")
|
824 |
search_notes_button = gr.Button("Search Notes")
|
825 |
notes_list = gr.Dropdown(label="Notes", choices=[])
|
826 |
|
|
|
829 |
delete_note_button = gr.Button("Delete Note")
|
830 |
note_title_input = gr.Textbox(label="Note Title")
|
831 |
note_content_input = gr.TextArea(label="Note Content", lines=20)
|
832 |
+
note_keywords_input = gr.Textbox(label="Note Keywords (comma-separated)", value="default_note_keyword")
|
833 |
save_note_button = gr.Button("Save Note")
|
834 |
create_new_note_button = gr.Button("Create New Note")
|
835 |
status_message = gr.HTML()
|
836 |
|
837 |
# Function Definitions
|
838 |
+
def search_notes(search_notes_title, keywords):
|
839 |
if keywords:
|
840 |
keywords_list = [kw.strip() for kw in keywords.split(',')]
|
841 |
notes_data, total_pages, total_count = get_notes_by_keywords(keywords_list)
|
842 |
+
choices = [f"Note {note_id} - {title} ({timestamp})" for
|
843 |
+
note_id, title, content, timestamp, conversation_id in notes_data]
|
844 |
+
return gr.update(choices=choices, label=f"Found {total_count} notes")
|
845 |
+
elif search_notes_title:
|
846 |
+
notes_data, total_pages, total_count = search_notes_titles(search_notes_title)
|
847 |
+
choices = [f"Note {note_id} - {title} ({timestamp})" for
|
848 |
+
note_id, title, content, timestamp, conversation_id in notes_data]
|
849 |
+
return gr.update(choices=choices, label=f"Found {total_count} notes")
|
850 |
else:
|
851 |
+
# This will now return all notes, ordered by timestamp
|
852 |
+
notes_data, total_pages, total_count = search_notes_titles("")
|
853 |
+
choices = [f"Note {note_id} - {title} ({timestamp})" for
|
854 |
+
note_id, title, content, timestamp, conversation_id in notes_data]
|
855 |
+
return gr.update(choices=choices, label=f"All notes ({total_count} total)")
|
856 |
|
857 |
search_notes_button.click(
|
858 |
search_notes,
|
859 |
+
inputs=[search_notes_title, search_notes_by_keyword],
|
860 |
outputs=[notes_list]
|
861 |
)
|
862 |
|
|
|
920 |
# Reset state
|
921 |
state_value["selected_note_id"] = None
|
922 |
# Update notes list
|
923 |
+
updated_notes = search_notes("", "")
|
924 |
return updated_notes, gr.update(value="Note deleted successfully."), state_value
|
925 |
else:
|
926 |
return gr.update(), gr.update(value="No note selected."), state_value
|
|
|
958 |
with gr.Row():
|
959 |
with gr.Column(scale=1):
|
960 |
# Search Conversations
|
961 |
+
with gr.Group():
|
962 |
+
gr.Markdown("## Search Conversations")
|
963 |
+
title_search = gr.Textbox(
|
964 |
+
label="Search by Title",
|
965 |
+
placeholder="Enter title to search..."
|
966 |
+
)
|
967 |
+
content_search = gr.Textbox(
|
968 |
+
label="Search in Chat Content",
|
969 |
+
placeholder="Enter text to search in messages..."
|
970 |
+
)
|
971 |
+
keyword_search = gr.Textbox(
|
972 |
+
label="Filter by Keywords (comma-separated)",
|
973 |
+
placeholder="keyword1, keyword2, ..."
|
974 |
+
)
|
975 |
search_conversations_button = gr.Button("Search Conversations")
|
976 |
conversations_list = gr.Dropdown(label="Conversations", choices=[])
|
977 |
new_conversation_button = gr.Button("New Conversation")
|
|
|
985 |
status_message = gr.HTML()
|
986 |
|
987 |
# Function Definitions
|
988 |
+
def search_conversations(title_query, content_query, keywords):
|
989 |
+
try:
|
990 |
+
# Parse keywords if provided
|
991 |
+
keywords_list = None
|
992 |
+
if keywords and keywords.strip():
|
993 |
+
keywords_list = [kw.strip() for kw in keywords.split(',')]
|
994 |
+
|
995 |
+
# Search using existing search_conversations_by_keywords function with all criteria
|
996 |
+
results, total_pages, total_count = search_conversations_by_keywords(
|
997 |
+
keywords=keywords_list,
|
998 |
+
title_query=title_query if title_query.strip() else None,
|
999 |
+
content_query=content_query if content_query.strip() else None
|
1000 |
+
)
|
1001 |
|
1002 |
+
# Build choices as list of titles (ensure uniqueness)
|
1003 |
+
choices = []
|
1004 |
+
mapping = {}
|
1005 |
+
for conv in results:
|
1006 |
+
conversation_id = conv['conversation_id']
|
1007 |
+
title = conv['title']
|
1008 |
+
display_title = f"{title} (ID: {conversation_id[:8]})"
|
1009 |
+
choices.append(display_title)
|
1010 |
+
mapping[display_title] = conversation_id
|
1011 |
|
1012 |
+
return gr.update(choices=choices), mapping
|
1013 |
|
1014 |
+
except Exception as e:
|
1015 |
+
logging.error(f"Error in search_conversations: {str(e)}")
|
1016 |
+
return gr.update(choices=[]), {}
|
1017 |
+
|
1018 |
+
# Update the search button click event
|
1019 |
search_conversations_button.click(
|
1020 |
search_conversations,
|
1021 |
+
inputs=[title_search, content_search, keyword_search],
|
1022 |
outputs=[conversations_list, conversation_mapping]
|
1023 |
)
|
1024 |
|
|
|
1175 |
]
|
1176 |
)
|
1177 |
|
1178 |
+
def delete_messages_in_conversation_wrapper(conversation_id):
|
1179 |
+
"""Wrapper function to delete all messages in a conversation."""
|
1180 |
try:
|
1181 |
+
delete_messages_in_conversation(conversation_id)
|
1182 |
logging.info(f"Messages in conversation '{conversation_id}' deleted successfully.")
|
1183 |
except Exception as e:
|
1184 |
logging.error(f"Error deleting messages in conversation '{conversation_id}': {e}")
|
1185 |
raise
|
1186 |
|
1187 |
+
def get_conversation_title_wrapper(conversation_id):
|
1188 |
"""Helper function to get the conversation title."""
|
1189 |
+
result = get_conversation_title(conversation_id)
|
|
|
1190 |
if result:
|
1191 |
return result[0][0]
|
1192 |
else:
|
|
|
1316 |
)
|
1317 |
|
1318 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1319 |
def convert_file_to_text(file_path):
|
1320 |
"""Convert various file types to plain text."""
|
1321 |
file_extension = os.path.splitext(file_path)[1].lower()
|
App_Function_Libraries/Gradio_UI/Re_summarize_tab.py
CHANGED
@@ -10,19 +10,33 @@ import gradio as gr
|
|
10 |
#
|
11 |
# Local Imports
|
12 |
from App_Function_Libraries.Chunk_Lib import improved_chunking_process
|
13 |
-
from App_Function_Libraries.DB.DB_Manager import update_media_content,
|
14 |
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
15 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, \
|
16 |
fetch_items_by_content, fetch_items_by_title_or_url
|
17 |
from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_chunk
|
18 |
-
from App_Function_Libraries.Utils.Utils import load_comprehensive_config
|
19 |
-
|
20 |
#
|
21 |
######################################################################################################################
|
22 |
#
|
23 |
# Functions:
|
24 |
|
25 |
def create_resummary_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
with gr.TabItem("Re-Summarize", visible=True):
|
27 |
gr.Markdown("# Re-Summarize Existing Content")
|
28 |
with gr.Row():
|
@@ -36,9 +50,10 @@ def create_resummary_tab():
|
|
36 |
|
37 |
with gr.Row():
|
38 |
api_name_input = gr.Dropdown(
|
39 |
-
choices=["
|
40 |
-
|
41 |
-
|
|
|
42 |
api_key_input = gr.Textbox(label="API Key", placeholder="Enter your API key here", type="password")
|
43 |
|
44 |
chunking_options_checkbox = gr.Checkbox(label="Use Chunking", value=False)
|
@@ -55,9 +70,17 @@ def create_resummary_tab():
|
|
55 |
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
|
56 |
value=False,
|
57 |
visible=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
with gr.Row():
|
59 |
preset_prompt = gr.Dropdown(label="Select Preset Prompt",
|
60 |
-
choices=
|
61 |
visible=False)
|
62 |
with gr.Row():
|
63 |
custom_prompt_input = gr.Textbox(label="Custom Prompt",
|
@@ -86,6 +109,15 @@ def create_resummary_tab():
|
|
86 |
lines=3,
|
87 |
visible=False)
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
def update_prompts(preset_name):
|
90 |
prompts = update_user_prompt(preset_name)
|
91 |
return (
|
@@ -93,6 +125,19 @@ def create_resummary_tab():
|
|
93 |
gr.update(value=prompts["system_prompt"], visible=True)
|
94 |
)
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
preset_prompt.change(
|
97 |
update_prompts,
|
98 |
inputs=preset_prompt,
|
@@ -109,9 +154,9 @@ def create_resummary_tab():
|
|
109 |
outputs=[custom_prompt_input, system_prompt_input]
|
110 |
)
|
111 |
preset_prompt_checkbox.change(
|
112 |
-
fn=lambda x: gr.update(visible=x),
|
113 |
inputs=[preset_prompt_checkbox],
|
114 |
-
outputs=[preset_prompt]
|
115 |
)
|
116 |
|
117 |
# Connect the UI elements
|
@@ -140,7 +185,12 @@ def create_resummary_tab():
|
|
140 |
outputs=result_output
|
141 |
)
|
142 |
|
143 |
-
return
|
|
|
|
|
|
|
|
|
|
|
144 |
|
145 |
|
146 |
def update_resummarize_dropdown(search_query, search_type):
|
|
|
10 |
#
|
11 |
# Local Imports
|
12 |
from App_Function_Libraries.Chunk_Lib import improved_chunking_process
|
13 |
+
from App_Function_Libraries.DB.DB_Manager import update_media_content, list_prompts
|
14 |
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
15 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, \
|
16 |
fetch_items_by_content, fetch_items_by_title_or_url
|
17 |
from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_chunk
|
18 |
+
from App_Function_Libraries.Utils.Utils import load_comprehensive_config, default_api_endpoint, global_api_endpoints, \
|
19 |
+
format_api_name
|
20 |
#
|
21 |
######################################################################################################################
|
22 |
#
|
23 |
# Functions:
|
24 |
|
25 |
def create_resummary_tab():
|
26 |
+
try:
|
27 |
+
default_value = None
|
28 |
+
if default_api_endpoint:
|
29 |
+
if default_api_endpoint in global_api_endpoints:
|
30 |
+
default_value = format_api_name(default_api_endpoint)
|
31 |
+
else:
|
32 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
33 |
+
except Exception as e:
|
34 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
35 |
+
default_value = None
|
36 |
+
|
37 |
+
# Get initial prompts for first page
|
38 |
+
initial_prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
|
39 |
+
|
40 |
with gr.TabItem("Re-Summarize", visible=True):
|
41 |
gr.Markdown("# Re-Summarize Existing Content")
|
42 |
with gr.Row():
|
|
|
50 |
|
51 |
with gr.Row():
|
52 |
api_name_input = gr.Dropdown(
|
53 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
54 |
+
value=default_value,
|
55 |
+
label="API for Summarization/Analysis (Optional)"
|
56 |
+
)
|
57 |
api_key_input = gr.Textbox(label="API Key", placeholder="Enter your API key here", type="password")
|
58 |
|
59 |
chunking_options_checkbox = gr.Checkbox(label="Use Chunking", value=False)
|
|
|
70 |
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
|
71 |
value=False,
|
72 |
visible=True)
|
73 |
+
|
74 |
+
# Add pagination controls for preset prompts
|
75 |
+
with gr.Row(visible=False) as preset_prompt_controls:
|
76 |
+
prev_page = gr.Button("Previous")
|
77 |
+
current_page_text = gr.Text(f"Page {current_page} of {total_pages}")
|
78 |
+
next_page = gr.Button("Next")
|
79 |
+
current_page_state = gr.State(value=1)
|
80 |
+
|
81 |
with gr.Row():
|
82 |
preset_prompt = gr.Dropdown(label="Select Preset Prompt",
|
83 |
+
choices=initial_prompts,
|
84 |
visible=False)
|
85 |
with gr.Row():
|
86 |
custom_prompt_input = gr.Textbox(label="Custom Prompt",
|
|
|
109 |
lines=3,
|
110 |
visible=False)
|
111 |
|
112 |
+
def update_prompt_page(direction, current_page_val):
|
113 |
+
new_page = max(1, min(total_pages, current_page_val + direction))
|
114 |
+
prompts, _, _ = list_prompts(page=new_page, per_page=10)
|
115 |
+
return (
|
116 |
+
gr.update(choices=prompts),
|
117 |
+
gr.update(value=f"Page {new_page} of {total_pages}"),
|
118 |
+
new_page
|
119 |
+
)
|
120 |
+
|
121 |
def update_prompts(preset_name):
|
122 |
prompts = update_user_prompt(preset_name)
|
123 |
return (
|
|
|
125 |
gr.update(value=prompts["system_prompt"], visible=True)
|
126 |
)
|
127 |
|
128 |
+
# Connect pagination buttons
|
129 |
+
prev_page.click(
|
130 |
+
lambda x: update_prompt_page(-1, x),
|
131 |
+
inputs=[current_page_state],
|
132 |
+
outputs=[preset_prompt, current_page_text, current_page_state]
|
133 |
+
)
|
134 |
+
|
135 |
+
next_page.click(
|
136 |
+
lambda x: update_prompt_page(1, x),
|
137 |
+
inputs=[current_page_state],
|
138 |
+
outputs=[preset_prompt, current_page_text, current_page_state]
|
139 |
+
)
|
140 |
+
|
141 |
preset_prompt.change(
|
142 |
update_prompts,
|
143 |
inputs=preset_prompt,
|
|
|
154 |
outputs=[custom_prompt_input, system_prompt_input]
|
155 |
)
|
156 |
preset_prompt_checkbox.change(
|
157 |
+
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
158 |
inputs=[preset_prompt_checkbox],
|
159 |
+
outputs=[preset_prompt, preset_prompt_controls]
|
160 |
)
|
161 |
|
162 |
# Connect the UI elements
|
|
|
185 |
outputs=result_output
|
186 |
)
|
187 |
|
188 |
+
return (
|
189 |
+
search_query_input, search_type_input, search_button, items_output,
|
190 |
+
item_mapping, api_name_input, api_key_input, chunking_options_checkbox,
|
191 |
+
chunking_options_box, chunk_method, max_chunk_size, chunk_overlap,
|
192 |
+
custom_prompt_checkbox, custom_prompt_input, resummarize_button, result_output
|
193 |
+
)
|
194 |
|
195 |
|
196 |
def update_resummarize_dropdown(search_query, search_type):
|
App_Function_Libraries/Gradio_UI/Search_Tab.py
CHANGED
@@ -11,8 +11,8 @@ import gradio as gr
|
|
11 |
#
|
12 |
# Local Imports
|
13 |
from App_Function_Libraries.DB.DB_Manager import view_database, search_and_display_items, get_all_document_versions, \
|
14 |
-
fetch_item_details_single, fetch_paginated_data, fetch_item_details, get_latest_transcription
|
15 |
-
|
16 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_detailed_view
|
17 |
from App_Function_Libraries.Utils.Utils import get_database_path, format_text_with_line_breaks
|
18 |
#
|
@@ -80,8 +80,8 @@ def format_as_html(content, title):
|
|
80 |
"""
|
81 |
|
82 |
def create_search_tab():
|
83 |
-
with gr.TabItem("Search / Detailed View", visible=True):
|
84 |
-
gr.Markdown("# Search across all ingested items in the Database")
|
85 |
with gr.Row():
|
86 |
with gr.Column(scale=1):
|
87 |
gr.Markdown("by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
|
@@ -150,8 +150,8 @@ def display_search_results(query):
|
|
150 |
|
151 |
|
152 |
def create_search_summaries_tab():
|
153 |
-
with gr.TabItem("Search/View Title+Summary", visible=True):
|
154 |
-
gr.Markdown("# Search across all ingested items in the Database and review their summaries")
|
155 |
gr.Markdown("Search by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
|
156 |
with gr.Row():
|
157 |
with gr.Column():
|
|
|
11 |
#
|
12 |
# Local Imports
|
13 |
from App_Function_Libraries.DB.DB_Manager import view_database, search_and_display_items, get_all_document_versions, \
|
14 |
+
fetch_item_details_single, fetch_paginated_data, fetch_item_details, get_latest_transcription, search_prompts, \
|
15 |
+
get_document_version
|
16 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_detailed_view
|
17 |
from App_Function_Libraries.Utils.Utils import get_database_path, format_text_with_line_breaks
|
18 |
#
|
|
|
80 |
"""
|
81 |
|
82 |
def create_search_tab():
|
83 |
+
with gr.TabItem("Media DB Search / Detailed View", visible=True):
|
84 |
+
gr.Markdown("# Search across all ingested items in the Media Database")
|
85 |
with gr.Row():
|
86 |
with gr.Column(scale=1):
|
87 |
gr.Markdown("by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
|
|
|
150 |
|
151 |
|
152 |
def create_search_summaries_tab():
|
153 |
+
with gr.TabItem("Media DB Search/View Title+Summary", visible=True):
|
154 |
+
gr.Markdown("# Search across all ingested items in the Media Database and review their summaries")
|
155 |
gr.Markdown("Search by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
|
156 |
with gr.Row():
|
157 |
with gr.Column():
|
App_Function_Libraries/Gradio_UI/Semantic_Scholar_tab.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Sematnic_Scholar_tab.py
|
2 |
+
# Description: contains the code to create the Semantic Scholar tab in the Gradio UI.
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
#
|
6 |
+
# External Libraries
|
7 |
+
import gradio as gr
|
8 |
+
#
|
9 |
+
# Internal Libraries
|
10 |
+
from App_Function_Libraries.Third_Party.Semantic_Scholar import search_and_display, FIELDS_OF_STUDY, PUBLICATION_TYPES
|
11 |
+
|
12 |
+
|
13 |
+
#
|
14 |
+
######################################################################################################################
|
15 |
+
# Functions
|
16 |
+
def create_semantic_scholar_tab():
|
17 |
+
"""Create the Semantic Scholar tab for the Gradio UI"""
|
18 |
+
with gr.Tab("Semantic Scholar Search"):
|
19 |
+
with gr.Row():
|
20 |
+
with gr.Column(scale=2):
|
21 |
+
gr.Markdown("""
|
22 |
+
## Semantic Scholar Paper Search
|
23 |
+
|
24 |
+
This interface allows you to search for academic papers using the Semantic Scholar API with advanced filtering options:
|
25 |
+
|
26 |
+
### Search Options
|
27 |
+
- **Keywords**: Search across titles, abstracts, and other paper content
|
28 |
+
- **Year Range**: Filter papers by publication year (e.g., "2020-2023" or "2020")
|
29 |
+
- **Venue**: Filter by publication venue (journal or conference)
|
30 |
+
- **Minimum Citations**: Filter papers by minimum citation count
|
31 |
+
- **Fields of Study**: Filter papers by academic field
|
32 |
+
- **Publication Types**: Filter by type of publication
|
33 |
+
- **Open Access**: Option to show only papers with free PDF access
|
34 |
+
|
35 |
+
### Results Include
|
36 |
+
- Paper title
|
37 |
+
- Author list
|
38 |
+
- Publication year and venue
|
39 |
+
- Citation count
|
40 |
+
- Publication types
|
41 |
+
- Abstract
|
42 |
+
- Links to PDF (when available) and Semantic Scholar page
|
43 |
+
""")
|
44 |
+
with gr.Column(scale=2):
|
45 |
+
gr.Markdown("""
|
46 |
+
### Pagination
|
47 |
+
- 10 results per page
|
48 |
+
- Navigate through results using Previous/Next buttons
|
49 |
+
- Current page number and total results displayed
|
50 |
+
|
51 |
+
### Usage Tips
|
52 |
+
- Combine multiple filters for more specific results
|
53 |
+
- Use specific terms for more focused results
|
54 |
+
- Try different combinations of filters if you don't find what you're looking for
|
55 |
+
""")
|
56 |
+
with gr.Row():
|
57 |
+
with gr.Column(scale=2):
|
58 |
+
search_input = gr.Textbox(
|
59 |
+
label="Search Query",
|
60 |
+
placeholder="Enter keywords to search for papers...",
|
61 |
+
lines=1
|
62 |
+
)
|
63 |
+
|
64 |
+
# Advanced search options
|
65 |
+
with gr.Row():
|
66 |
+
year_range = gr.Textbox(
|
67 |
+
label="Year Range",
|
68 |
+
placeholder="e.g., 2020-2023 or 2020",
|
69 |
+
lines=1
|
70 |
+
)
|
71 |
+
venue = gr.Textbox(
|
72 |
+
label="Venue",
|
73 |
+
placeholder="e.g., Nature, Science",
|
74 |
+
lines=1
|
75 |
+
)
|
76 |
+
min_citations = gr.Number(
|
77 |
+
label="Minimum Citations",
|
78 |
+
value=0,
|
79 |
+
minimum=0,
|
80 |
+
step=1
|
81 |
+
)
|
82 |
+
|
83 |
+
with gr.Row():
|
84 |
+
fields_of_study = gr.Dropdown(
|
85 |
+
choices=FIELDS_OF_STUDY,
|
86 |
+
label="Fields of Study",
|
87 |
+
multiselect=True,
|
88 |
+
value=[]
|
89 |
+
)
|
90 |
+
publication_types = gr.Dropdown(
|
91 |
+
choices=PUBLICATION_TYPES,
|
92 |
+
label="Publication Types",
|
93 |
+
multiselect=True,
|
94 |
+
value=[]
|
95 |
+
)
|
96 |
+
|
97 |
+
open_access_only = gr.Checkbox(
|
98 |
+
label="Open Access Only",
|
99 |
+
value=False
|
100 |
+
)
|
101 |
+
|
102 |
+
with gr.Column(scale=1):
|
103 |
+
search_button = gr.Button("Search", variant="primary")
|
104 |
+
|
105 |
+
# Pagination controls
|
106 |
+
with gr.Row():
|
107 |
+
prev_button = gr.Button("← Previous")
|
108 |
+
current_page = gr.Number(value=0, label="Page", minimum=0, step=1)
|
109 |
+
max_page = gr.Number(value=0, label="Max Page", visible=False)
|
110 |
+
next_button = gr.Button("Next →")
|
111 |
+
|
112 |
+
total_results = gr.Textbox(
|
113 |
+
label="Total Results",
|
114 |
+
value="0",
|
115 |
+
interactive=False
|
116 |
+
)
|
117 |
+
|
118 |
+
output_text = gr.Markdown(
|
119 |
+
label="Results",
|
120 |
+
value="Use the search options above to find papers."
|
121 |
+
)
|
122 |
+
|
123 |
+
def update_page(direction, current, maximum):
|
124 |
+
new_page = current + direction
|
125 |
+
if new_page < 0:
|
126 |
+
return 0
|
127 |
+
if new_page > maximum:
|
128 |
+
return maximum
|
129 |
+
return new_page
|
130 |
+
|
131 |
+
# Handle search and pagination
|
132 |
+
def search_from_button(query, fields_of_study, publication_types, year_range, venue, min_citations,
|
133 |
+
open_access_only):
|
134 |
+
"""Wrapper to always search from page 0 when search button is clicked"""
|
135 |
+
return search_and_display(
|
136 |
+
query=query,
|
137 |
+
page=0, # Force page 0 for new searches
|
138 |
+
fields_of_study=fields_of_study,
|
139 |
+
publication_types=publication_types,
|
140 |
+
year_range=year_range,
|
141 |
+
venue=venue,
|
142 |
+
min_citations=min_citations,
|
143 |
+
open_access_only=open_access_only
|
144 |
+
)
|
145 |
+
normal_search = search_and_display
|
146 |
+
|
147 |
+
search_button.click(
|
148 |
+
fn=search_from_button,
|
149 |
+
inputs=[
|
150 |
+
search_input, fields_of_study, publication_types,
|
151 |
+
year_range, venue, min_citations, open_access_only
|
152 |
+
],
|
153 |
+
outputs=[output_text, current_page, max_page, total_results]
|
154 |
+
)
|
155 |
+
|
156 |
+
prev_button.click(
|
157 |
+
fn=lambda curr, max_p: update_page(-1, curr, max_p),
|
158 |
+
inputs=[current_page, max_page],
|
159 |
+
outputs=current_page
|
160 |
+
).then(
|
161 |
+
fn=normal_search,
|
162 |
+
inputs=[
|
163 |
+
search_input, current_page, fields_of_study, publication_types,
|
164 |
+
year_range, venue, min_citations, open_access_only
|
165 |
+
],
|
166 |
+
outputs=[output_text, current_page, max_page, total_results]
|
167 |
+
)
|
168 |
+
|
169 |
+
next_button.click(
|
170 |
+
fn=lambda curr, max_p: update_page(1, curr, max_p),
|
171 |
+
inputs=[current_page, max_page],
|
172 |
+
outputs=current_page
|
173 |
+
).then(
|
174 |
+
fn=normal_search,
|
175 |
+
inputs=[
|
176 |
+
search_input, current_page, fields_of_study, publication_types,
|
177 |
+
year_range, venue, min_citations, open_access_only
|
178 |
+
],
|
179 |
+
outputs=[output_text, current_page, max_page, total_results]
|
180 |
+
)
|
181 |
+
|
182 |
+
#
|
183 |
+
# End of Semantic_Scholar_tab.py
|
184 |
+
######################################################################################################################
|
App_Function_Libraries/Gradio_UI/Video_transcription_tab.py
CHANGED
@@ -6,22 +6,23 @@ import json
|
|
6 |
import logging
|
7 |
import os
|
8 |
from datetime import datetime
|
9 |
-
from typing import Dict, Any
|
10 |
-
|
11 |
#
|
12 |
# External Imports
|
13 |
import gradio as gr
|
14 |
import yt_dlp
|
|
|
|
|
15 |
#
|
16 |
# Local Imports
|
17 |
-
from App_Function_Libraries.DB.DB_Manager import
|
18 |
-
check_media_and_whisper_model, check_existing_media, update_media_content_with_version
|
19 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
|
20 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import error_handler
|
21 |
from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_transcription, perform_summarization, \
|
22 |
save_transcription_and_summary
|
23 |
from App_Function_Libraries.Utils.Utils import convert_to_seconds, safe_read_file, format_transcription, \
|
24 |
-
create_download_directory, generate_unique_identifier, extract_text_from_segments
|
|
|
25 |
from App_Function_Libraries.Video_DL_Ingestion_Lib import parse_and_expand_urls, extract_metadata, download_video
|
26 |
from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
|
27 |
# Import metrics logging
|
@@ -32,6 +33,16 @@ from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histo
|
|
32 |
# Functions:
|
33 |
|
34 |
def create_video_transcription_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
with gr.TabItem("Video Transcription + Summarization", visible=True):
|
36 |
gr.Markdown("# Transcribe & Summarize Videos from URLs")
|
37 |
with gr.Row():
|
@@ -56,15 +67,20 @@ def create_video_transcription_tab():
|
|
56 |
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
|
57 |
value=False,
|
58 |
visible=True)
|
|
|
|
|
|
|
|
|
|
|
59 |
with gr.Row():
|
|
|
60 |
preset_prompt = gr.Dropdown(label="Select Preset Prompt",
|
61 |
-
choices=
|
62 |
visible=False)
|
63 |
with gr.Row():
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
visible=False)
|
68 |
with gr.Row():
|
69 |
system_prompt_input = gr.Textbox(label="System Prompt",
|
70 |
value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
@@ -87,22 +103,75 @@ def create_video_transcription_tab():
|
|
87 |
lines=3,
|
88 |
visible=False,
|
89 |
interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
custom_prompt_checkbox.change(
|
91 |
-
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
92 |
inputs=[custom_prompt_checkbox],
|
93 |
outputs=[custom_prompt_input, system_prompt_input]
|
94 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
preset_prompt_checkbox.change(
|
96 |
-
fn=
|
97 |
inputs=[preset_prompt_checkbox],
|
98 |
-
outputs=[preset_prompt]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
)
|
100 |
|
101 |
def update_prompts(preset_name):
|
102 |
prompts = update_user_prompt(preset_name)
|
103 |
return (
|
104 |
-
gr.update(value=prompts["user_prompt"], visible=True),
|
105 |
-
gr.update(value=prompts["system_prompt"], visible=True)
|
106 |
)
|
107 |
|
108 |
preset_prompt.change(
|
@@ -111,11 +180,12 @@ def create_video_transcription_tab():
|
|
111 |
outputs=[custom_prompt_input, system_prompt_input]
|
112 |
)
|
113 |
|
|
|
114 |
api_name_input = gr.Dropdown(
|
115 |
-
choices=[None
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
api_key_input = gr.Textbox(label="API Key (Optional - Set in Config.txt)", placeholder="Enter your API key here",
|
120 |
type="password")
|
121 |
keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords here (comma-separated)",
|
@@ -198,8 +268,7 @@ def create_video_transcription_tab():
|
|
198 |
progress: gr.Progress = gr.Progress()) -> tuple:
|
199 |
try:
|
200 |
# Start overall processing timer
|
201 |
-
proc_start_time = datetime.
|
202 |
-
# FIXME - summarize_recursively is not being used...
|
203 |
logging.info("Entering process_videos_with_error_handling")
|
204 |
logging.info(f"Received inputs: {inputs}")
|
205 |
|
@@ -251,8 +320,7 @@ def create_video_transcription_tab():
|
|
251 |
all_summaries = ""
|
252 |
|
253 |
# Start timing
|
254 |
-
|
255 |
-
start_proc = datetime.utcnow()
|
256 |
|
257 |
for i in range(0, len(all_inputs), batch_size):
|
258 |
batch = all_inputs[i:i + batch_size]
|
@@ -260,7 +328,7 @@ def create_video_transcription_tab():
|
|
260 |
|
261 |
for input_item in batch:
|
262 |
# Start individual video processing timer
|
263 |
-
video_start_time = datetime.
|
264 |
try:
|
265 |
start_seconds = convert_to_seconds(start_time)
|
266 |
end_seconds = convert_to_seconds(end_time) if end_time else None
|
@@ -313,7 +381,7 @@ def create_video_transcription_tab():
|
|
313 |
input_item, 2, whisper_model,
|
314 |
custom_prompt,
|
315 |
start_seconds, api_name, api_key,
|
316 |
-
vad_use, False, False,
|
317 |
end_time=end_seconds,
|
318 |
include_timestamps=timestamp_option,
|
319 |
metadata=video_metadata,
|
@@ -365,7 +433,7 @@ def create_video_transcription_tab():
|
|
365 |
)
|
366 |
|
367 |
# Calculate processing time
|
368 |
-
video_end_time = datetime.
|
369 |
processing_time = (video_end_time - video_start_time).total_seconds()
|
370 |
log_histogram(
|
371 |
metric_name="video_processing_time_seconds",
|
@@ -473,7 +541,7 @@ def create_video_transcription_tab():
|
|
473 |
total_inputs = len(all_inputs)
|
474 |
|
475 |
# End overall processing timer
|
476 |
-
proc_end_time = datetime.
|
477 |
total_processing_time = (proc_end_time - proc_start_time).total_seconds()
|
478 |
log_histogram(
|
479 |
metric_name="total_processing_time_seconds",
|
@@ -702,8 +770,9 @@ def create_video_transcription_tab():
|
|
702 |
|
703 |
# Perform transcription
|
704 |
logging.info("process_url_with_metadata: Starting transcription...")
|
|
|
705 |
audio_file_path, segments = perform_transcription(video_file_path, offset, whisper_model,
|
706 |
-
vad_filter, diarize)
|
707 |
|
708 |
if audio_file_path is None or segments is None:
|
709 |
logging.error("process_url_with_metadata: Transcription failed or segments not available.")
|
@@ -771,7 +840,54 @@ def create_video_transcription_tab():
|
|
771 |
# API key resolution handled at base of function if none provided
|
772 |
api_key = api_key if api_key else None
|
773 |
logging.info(f"process_url_with_metadata: Starting summarization with {api_name}...")
|
774 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
775 |
if summary_text is None:
|
776 |
logging.error("Summarization failed.")
|
777 |
return None, None, None, None, None, None
|
@@ -859,3 +975,7 @@ def create_video_transcription_tab():
|
|
859 |
],
|
860 |
outputs=[progress_output, error_output, results_output, download_transcription, download_summary, confabulation_output]
|
861 |
)
|
|
|
|
|
|
|
|
|
|
6 |
import logging
|
7 |
import os
|
8 |
from datetime import datetime
|
|
|
|
|
9 |
#
|
10 |
# External Imports
|
11 |
import gradio as gr
|
12 |
import yt_dlp
|
13 |
+
|
14 |
+
from App_Function_Libraries.Chunk_Lib import improved_chunking_process
|
15 |
#
|
16 |
# Local Imports
|
17 |
+
from App_Function_Libraries.DB.DB_Manager import add_media_to_database, \
|
18 |
+
check_media_and_whisper_model, check_existing_media, update_media_content_with_version, list_prompts
|
19 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
|
20 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import error_handler
|
21 |
from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_transcription, perform_summarization, \
|
22 |
save_transcription_and_summary
|
23 |
from App_Function_Libraries.Utils.Utils import convert_to_seconds, safe_read_file, format_transcription, \
|
24 |
+
create_download_directory, generate_unique_identifier, extract_text_from_segments, default_api_endpoint, \
|
25 |
+
global_api_endpoints, format_api_name
|
26 |
from App_Function_Libraries.Video_DL_Ingestion_Lib import parse_and_expand_urls, extract_metadata, download_video
|
27 |
from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
|
28 |
# Import metrics logging
|
|
|
33 |
# Functions:
|
34 |
|
35 |
def create_video_transcription_tab():
|
36 |
+
try:
|
37 |
+
default_value = None
|
38 |
+
if default_api_endpoint:
|
39 |
+
if default_api_endpoint in global_api_endpoints:
|
40 |
+
default_value = format_api_name(default_api_endpoint)
|
41 |
+
else:
|
42 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
43 |
+
except Exception as e:
|
44 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
45 |
+
default_value = None
|
46 |
with gr.TabItem("Video Transcription + Summarization", visible=True):
|
47 |
gr.Markdown("# Transcribe & Summarize Videos from URLs")
|
48 |
with gr.Row():
|
|
|
67 |
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
|
68 |
value=False,
|
69 |
visible=True)
|
70 |
+
|
71 |
+
# Initialize state variables for pagination
|
72 |
+
current_page_state = gr.State(value=1)
|
73 |
+
total_pages_state = gr.State(value=1)
|
74 |
+
|
75 |
with gr.Row():
|
76 |
+
# Add pagination controls
|
77 |
preset_prompt = gr.Dropdown(label="Select Preset Prompt",
|
78 |
+
choices=[],
|
79 |
visible=False)
|
80 |
with gr.Row():
|
81 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
82 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
83 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
|
|
84 |
with gr.Row():
|
85 |
system_prompt_input = gr.Textbox(label="System Prompt",
|
86 |
value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
|
|
103 |
lines=3,
|
104 |
visible=False,
|
105 |
interactive=True)
|
106 |
+
with gr.Row():
|
107 |
+
custom_prompt_input = gr.Textbox(label="Custom Prompt",
|
108 |
+
placeholder="Enter custom prompt here",
|
109 |
+
lines=3,
|
110 |
+
visible=False)
|
111 |
+
|
112 |
custom_prompt_checkbox.change(
|
113 |
+
fn=lambda x: (gr.update(visible=x, interactive=x), gr.update(visible=x, interactive=x)),
|
114 |
inputs=[custom_prompt_checkbox],
|
115 |
outputs=[custom_prompt_input, system_prompt_input]
|
116 |
)
|
117 |
+
|
118 |
+
def on_preset_prompt_checkbox_change(is_checked):
|
119 |
+
if is_checked:
|
120 |
+
prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
|
121 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
122 |
+
return (
|
123 |
+
gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
|
124 |
+
gr.update(visible=True), # prev_page_button
|
125 |
+
gr.update(visible=True), # next_page_button
|
126 |
+
gr.update(value=page_display_text, visible=True), # page_display
|
127 |
+
current_page, # current_page_state
|
128 |
+
total_pages # total_pages_state
|
129 |
+
)
|
130 |
+
else:
|
131 |
+
return (
|
132 |
+
gr.update(visible=False, interactive=False), # preset_prompt
|
133 |
+
gr.update(visible=False), # prev_page_button
|
134 |
+
gr.update(visible=False), # next_page_button
|
135 |
+
gr.update(visible=False), # page_display
|
136 |
+
1, # current_page_state
|
137 |
+
1 # total_pages_state
|
138 |
+
)
|
139 |
+
|
140 |
preset_prompt_checkbox.change(
|
141 |
+
fn=on_preset_prompt_checkbox_change,
|
142 |
inputs=[preset_prompt_checkbox],
|
143 |
+
outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
|
144 |
+
)
|
145 |
+
|
146 |
+
def on_prev_page_click(current_page, total_pages):
|
147 |
+
new_page = max(current_page - 1, 1)
|
148 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
149 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
150 |
+
return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
151 |
+
|
152 |
+
prev_page_button.click(
|
153 |
+
fn=on_prev_page_click,
|
154 |
+
inputs=[current_page_state, total_pages_state],
|
155 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
156 |
+
)
|
157 |
+
|
158 |
+
def on_next_page_click(current_page, total_pages):
|
159 |
+
new_page = min(current_page + 1, total_pages)
|
160 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
161 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
162 |
+
return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
163 |
+
|
164 |
+
next_page_button.click(
|
165 |
+
fn=on_next_page_click,
|
166 |
+
inputs=[current_page_state, total_pages_state],
|
167 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
168 |
)
|
169 |
|
170 |
def update_prompts(preset_name):
|
171 |
prompts = update_user_prompt(preset_name)
|
172 |
return (
|
173 |
+
gr.update(value=prompts["user_prompt"], visible=True, interactive=True),
|
174 |
+
gr.update(value=prompts["system_prompt"], visible=True, interactive=True)
|
175 |
)
|
176 |
|
177 |
preset_prompt.change(
|
|
|
180 |
outputs=[custom_prompt_input, system_prompt_input]
|
181 |
)
|
182 |
|
183 |
+
# Refactored API selection dropdown
|
184 |
api_name_input = gr.Dropdown(
|
185 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
186 |
+
value=default_value,
|
187 |
+
label="API for Summarization/Analysis (Optional)"
|
188 |
+
)
|
189 |
api_key_input = gr.Textbox(label="API Key (Optional - Set in Config.txt)", placeholder="Enter your API key here",
|
190 |
type="password")
|
191 |
keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords here (comma-separated)",
|
|
|
268 |
progress: gr.Progress = gr.Progress()) -> tuple:
|
269 |
try:
|
270 |
# Start overall processing timer
|
271 |
+
proc_start_time = datetime.now()
|
|
|
272 |
logging.info("Entering process_videos_with_error_handling")
|
273 |
logging.info(f"Received inputs: {inputs}")
|
274 |
|
|
|
320 |
all_summaries = ""
|
321 |
|
322 |
# Start timing
|
323 |
+
start_proc = datetime.now()
|
|
|
324 |
|
325 |
for i in range(0, len(all_inputs), batch_size):
|
326 |
batch = all_inputs[i:i + batch_size]
|
|
|
328 |
|
329 |
for input_item in batch:
|
330 |
# Start individual video processing timer
|
331 |
+
video_start_time = datetime.now()
|
332 |
try:
|
333 |
start_seconds = convert_to_seconds(start_time)
|
334 |
end_seconds = convert_to_seconds(end_time) if end_time else None
|
|
|
381 |
input_item, 2, whisper_model,
|
382 |
custom_prompt,
|
383 |
start_seconds, api_name, api_key,
|
384 |
+
vad_use, False, False, summarize_recursively, 0.01, None, keywords, None, diarize,
|
385 |
end_time=end_seconds,
|
386 |
include_timestamps=timestamp_option,
|
387 |
metadata=video_metadata,
|
|
|
433 |
)
|
434 |
|
435 |
# Calculate processing time
|
436 |
+
video_end_time = datetime.now()
|
437 |
processing_time = (video_end_time - video_start_time).total_seconds()
|
438 |
log_histogram(
|
439 |
metric_name="video_processing_time_seconds",
|
|
|
541 |
total_inputs = len(all_inputs)
|
542 |
|
543 |
# End overall processing timer
|
544 |
+
proc_end_time = datetime.now()
|
545 |
total_processing_time = (proc_end_time - proc_start_time).total_seconds()
|
546 |
log_histogram(
|
547 |
metric_name="total_processing_time_seconds",
|
|
|
770 |
|
771 |
# Perform transcription
|
772 |
logging.info("process_url_with_metadata: Starting transcription...")
|
773 |
+
logging.info(f"process_url_with_metadata: overwrite existing?: {overwrite_existing}")
|
774 |
audio_file_path, segments = perform_transcription(video_file_path, offset, whisper_model,
|
775 |
+
vad_filter, diarize, overwrite_existing)
|
776 |
|
777 |
if audio_file_path is None or segments is None:
|
778 |
logging.error("process_url_with_metadata: Transcription failed or segments not available.")
|
|
|
840 |
# API key resolution handled at base of function if none provided
|
841 |
api_key = api_key if api_key else None
|
842 |
logging.info(f"process_url_with_metadata: Starting summarization with {api_name}...")
|
843 |
+
|
844 |
+
# Perform Chunking if enabled
|
845 |
+
# FIXME - Setup a proper prompt for Recursive Summarization
|
846 |
+
if use_chunking:
|
847 |
+
logging.info("process_url_with_metadata: Chunking enabled. Starting chunking...")
|
848 |
+
chunked_texts = improved_chunking_process(full_text_with_metadata, chunk_options)
|
849 |
+
|
850 |
+
if chunked_texts is None:
|
851 |
+
logging.warning("Chunking failed, falling back to full text summarization")
|
852 |
+
summary_text = perform_summarization(api_name, full_text_with_metadata, custom_prompt,
|
853 |
+
api_key)
|
854 |
+
else:
|
855 |
+
logging.debug(
|
856 |
+
f"process_url_with_metadata: Chunking completed. Processing {len(chunked_texts)} chunks...")
|
857 |
+
summaries = []
|
858 |
+
|
859 |
+
if rolling_summarization:
|
860 |
+
# Perform recursive summarization on each chunk
|
861 |
+
for chunk in chunked_texts:
|
862 |
+
chunk_summary = perform_summarization(api_name, chunk['text'], custom_prompt,
|
863 |
+
api_key)
|
864 |
+
if chunk_summary:
|
865 |
+
summaries.append(
|
866 |
+
f"Chunk {chunk['metadata']['chunk_index']}/{chunk['metadata']['total_chunks']}: {chunk_summary}")
|
867 |
+
summary_text = "\n\n".join(summaries)
|
868 |
+
else:
|
869 |
+
logging.error("All chunk summarizations failed")
|
870 |
+
summary_text = None
|
871 |
+
|
872 |
+
for chunk in chunked_texts:
|
873 |
+
# Perform Non-recursive summarization on each chunk
|
874 |
+
chunk_summary = perform_summarization(api_name, chunk['text'], custom_prompt,
|
875 |
+
api_key)
|
876 |
+
if chunk_summary:
|
877 |
+
summaries.append(
|
878 |
+
f"Chunk {chunk['metadata']['chunk_index']}/{chunk['metadata']['total_chunks']}: {chunk_summary}")
|
879 |
+
|
880 |
+
if summaries:
|
881 |
+
summary_text = "\n\n".join(summaries)
|
882 |
+
logging.info(f"Successfully summarized {len(summaries)} chunks")
|
883 |
+
else:
|
884 |
+
logging.error("All chunk summarizations failed")
|
885 |
+
summary_text = None
|
886 |
+
else:
|
887 |
+
# Regular summarization without chunking
|
888 |
+
summary_text = perform_summarization(api_name, full_text_with_metadata, custom_prompt,
|
889 |
+
api_key) if api_name else None
|
890 |
+
|
891 |
if summary_text is None:
|
892 |
logging.error("Summarization failed.")
|
893 |
return None, None, None, None, None, None
|
|
|
975 |
],
|
976 |
outputs=[progress_output, error_output, results_output, download_transcription, download_summary, confabulation_output]
|
977 |
)
|
978 |
+
|
979 |
+
#
|
980 |
+
# End of Video_transcription_tab.py
|
981 |
+
#######################################################################################################################
|
App_Function_Libraries/Gradio_UI/View_DB_Items_tab.py
CHANGED
@@ -3,131 +3,26 @@
|
|
3 |
#
|
4 |
# Imports
|
5 |
import html
|
|
|
|
|
6 |
#
|
7 |
# External Imports
|
8 |
import gradio as gr
|
9 |
#
|
10 |
# Local Imports
|
11 |
from App_Function_Libraries.DB.DB_Manager import view_database, get_all_document_versions, \
|
12 |
-
fetch_paginated_data, fetch_item_details, get_latest_transcription, list_prompts, fetch_prompt_details
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
15 |
#
|
16 |
####################################################################################################
|
17 |
#
|
18 |
# Functions
|
19 |
|
20 |
-
def create_prompt_view_tab():
|
21 |
-
with gr.TabItem("View Prompt Database", visible=True):
|
22 |
-
gr.Markdown("# View Prompt Database Entries")
|
23 |
-
with gr.Row():
|
24 |
-
with gr.Column():
|
25 |
-
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
26 |
-
page_number = gr.Number(value=1, label="Page Number", precision=0)
|
27 |
-
view_button = gr.Button("View Page")
|
28 |
-
next_page_button = gr.Button("Next Page")
|
29 |
-
previous_page_button = gr.Button("Previous Page")
|
30 |
-
pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
|
31 |
-
prompt_selector = gr.Dropdown(label="Select Prompt to View", choices=[])
|
32 |
-
with gr.Column():
|
33 |
-
results_table = gr.HTML()
|
34 |
-
selected_prompt_display = gr.HTML()
|
35 |
-
|
36 |
-
def view_database(page, entries_per_page):
|
37 |
-
try:
|
38 |
-
prompts, total_pages, current_page = list_prompts(page, entries_per_page)
|
39 |
-
|
40 |
-
table_html = "<table style='width:100%; border-collapse: collapse;'>"
|
41 |
-
table_html += "<tr><th style='border: 1px solid black; padding: 8px;'>Title</th><th style='border: 1px solid black; padding: 8px;'>Author</th></tr>"
|
42 |
-
prompt_choices = []
|
43 |
-
for prompt_name in prompts:
|
44 |
-
details = fetch_prompt_details(prompt_name)
|
45 |
-
if details:
|
46 |
-
title, _, _, _, _, _ = details
|
47 |
-
author = "Unknown" # Assuming author is not stored in the current schema
|
48 |
-
table_html += f"<tr><td style='border: 1px solid black; padding: 8px;'>{html.escape(title)}</td><td style='border: 1px solid black; padding: 8px;'>{html.escape(author)}</td></tr>"
|
49 |
-
prompt_choices.append((title, title)) # Using title as both label and value
|
50 |
-
table_html += "</table>"
|
51 |
-
|
52 |
-
total_prompts = len(load_preset_prompts()) # This might be inefficient for large datasets
|
53 |
-
pagination = f"Page {current_page} of {total_pages} (Total prompts: {total_prompts})"
|
54 |
-
|
55 |
-
return table_html, pagination, total_pages, prompt_choices
|
56 |
-
except Exception as e:
|
57 |
-
return f"<p>Error fetching prompts: {e}</p>", "Error", 0, []
|
58 |
-
|
59 |
-
def update_page(page, entries_per_page):
|
60 |
-
results, pagination, total_pages, prompt_choices = view_database(page, entries_per_page)
|
61 |
-
next_disabled = page >= total_pages
|
62 |
-
prev_disabled = page <= 1
|
63 |
-
return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(
|
64 |
-
interactive=not prev_disabled), gr.update(choices=prompt_choices)
|
65 |
-
|
66 |
-
def go_to_next_page(current_page, entries_per_page):
|
67 |
-
next_page = current_page + 1
|
68 |
-
return update_page(next_page, entries_per_page)
|
69 |
-
|
70 |
-
def go_to_previous_page(current_page, entries_per_page):
|
71 |
-
previous_page = max(1, current_page - 1)
|
72 |
-
return update_page(previous_page, entries_per_page)
|
73 |
-
|
74 |
-
def display_selected_prompt(prompt_name):
|
75 |
-
details = fetch_prompt_details(prompt_name)
|
76 |
-
if details:
|
77 |
-
title, author, description, system_prompt, user_prompt, keywords = details
|
78 |
-
# Handle None values by converting them to empty strings
|
79 |
-
description = description or ""
|
80 |
-
system_prompt = system_prompt or ""
|
81 |
-
user_prompt = user_prompt or ""
|
82 |
-
author = author or "Unknown"
|
83 |
-
keywords = keywords or ""
|
84 |
-
|
85 |
-
html_content = f"""
|
86 |
-
<div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 20px;">
|
87 |
-
<h3>{html.escape(title)}</h3> <h4>by {html.escape(author)}</h4>
|
88 |
-
<p><strong>Description:</strong> {html.escape(description)}</p>
|
89 |
-
<div style="margin-top: 10px;">
|
90 |
-
<strong>System Prompt:</strong>
|
91 |
-
<pre style="white-space: pre-wrap; word-wrap: break-word;">{html.escape(system_prompt)}</pre>
|
92 |
-
</div>
|
93 |
-
<div style="margin-top: 10px;">
|
94 |
-
<strong>User Prompt:</strong>
|
95 |
-
<pre style="white-space: pre-wrap; word-wrap: break-word;">{html.escape(user_prompt)}</pre>
|
96 |
-
</div>
|
97 |
-
<p><strong>Keywords:</strong> {html.escape(keywords)}</p>
|
98 |
-
</div>
|
99 |
-
"""
|
100 |
-
return html_content
|
101 |
-
else:
|
102 |
-
return "<p>Prompt not found.</p>"
|
103 |
-
|
104 |
-
view_button.click(
|
105 |
-
fn=update_page,
|
106 |
-
inputs=[page_number, entries_per_page],
|
107 |
-
outputs=[results_table, pagination_info, page_number, next_page_button, previous_page_button,
|
108 |
-
prompt_selector]
|
109 |
-
)
|
110 |
-
|
111 |
-
next_page_button.click(
|
112 |
-
fn=go_to_next_page,
|
113 |
-
inputs=[page_number, entries_per_page],
|
114 |
-
outputs=[results_table, pagination_info, page_number, next_page_button, previous_page_button,
|
115 |
-
prompt_selector]
|
116 |
-
)
|
117 |
-
|
118 |
-
previous_page_button.click(
|
119 |
-
fn=go_to_previous_page,
|
120 |
-
inputs=[page_number, entries_per_page],
|
121 |
-
outputs=[results_table, pagination_info, page_number, next_page_button, previous_page_button,
|
122 |
-
prompt_selector]
|
123 |
-
)
|
124 |
-
|
125 |
-
prompt_selector.change(
|
126 |
-
fn=display_selected_prompt,
|
127 |
-
inputs=[prompt_selector],
|
128 |
-
outputs=[selected_prompt_display]
|
129 |
-
)
|
130 |
-
|
131 |
def format_as_html(content, title):
|
132 |
escaped_content = html.escape(content)
|
133 |
formatted_content = escaped_content.replace('\n', '<br>')
|
@@ -149,9 +44,9 @@ def extract_prompt_and_summary(content: str):
|
|
149 |
return prompt, summary
|
150 |
|
151 |
|
152 |
-
def
|
153 |
-
with gr.TabItem("View All Items", visible=True):
|
154 |
-
gr.Markdown("# View All Database Entries with Version Selection")
|
155 |
with gr.Row():
|
156 |
with gr.Column(scale=1):
|
157 |
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
@@ -280,9 +175,143 @@ def create_view_all_with_versions_tab():
|
|
280 |
)
|
281 |
|
282 |
|
283 |
-
def
|
284 |
-
with gr.TabItem("
|
285 |
-
gr.Markdown("#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
with gr.Row():
|
287 |
with gr.Column():
|
288 |
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
@@ -327,5 +356,461 @@ def create_viewing_tab():
|
|
327 |
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
328 |
)
|
329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
#
|
331 |
-
|
|
|
3 |
#
|
4 |
# Imports
|
5 |
import html
|
6 |
+
import logging
|
7 |
+
|
8 |
#
|
9 |
# External Imports
|
10 |
import gradio as gr
|
11 |
#
|
12 |
# Local Imports
|
13 |
from App_Function_Libraries.DB.DB_Manager import view_database, get_all_document_versions, \
|
14 |
+
fetch_paginated_data, fetch_item_details, get_latest_transcription, list_prompts, fetch_prompt_details
|
15 |
+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_keywords_for_note, search_conversations_by_keywords, \
|
16 |
+
get_notes_by_keywords, get_keywords_for_conversation, get_db_connection, get_all_conversations, load_chat_history, \
|
17 |
+
get_notes
|
18 |
+
from App_Function_Libraries.DB.SQLite_DB import get_document_version, fetch_items_by_keyword, fetch_all_keywords
|
19 |
+
|
20 |
+
|
21 |
#
|
22 |
####################################################################################################
|
23 |
#
|
24 |
# Functions
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def format_as_html(content, title):
|
27 |
escaped_content = html.escape(content)
|
28 |
formatted_content = escaped_content.replace('\n', '<br>')
|
|
|
44 |
return prompt, summary
|
45 |
|
46 |
|
47 |
+
def create_view_all_mediadb_with_versions_tab():
|
48 |
+
with gr.TabItem("View All MediaDB Items", visible=True):
|
49 |
+
gr.Markdown("# View All Media Database Entries with Version Selection")
|
50 |
with gr.Row():
|
51 |
with gr.Column(scale=1):
|
52 |
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
|
|
175 |
)
|
176 |
|
177 |
|
178 |
+
def create_mediadb_keyword_search_tab():
|
179 |
+
with gr.TabItem("Search MediaDB by Keyword", visible=True):
|
180 |
+
gr.Markdown("# List Media Database Items by Keyword")
|
181 |
+
|
182 |
+
with gr.Row():
|
183 |
+
with gr.Column(scale=1):
|
184 |
+
# Keyword selection dropdown - initialize with empty list, will be populated on load
|
185 |
+
keyword_dropdown = gr.Dropdown(
|
186 |
+
label="Select Keyword",
|
187 |
+
choices=fetch_all_keywords(), # Initialize with keywords on creation
|
188 |
+
value=None
|
189 |
+
)
|
190 |
+
entries_per_page = gr.Dropdown(
|
191 |
+
choices=[10, 20, 50, 100],
|
192 |
+
label="Entries per Page",
|
193 |
+
value=10
|
194 |
+
)
|
195 |
+
page_number = gr.Number(
|
196 |
+
value=1,
|
197 |
+
label="Page Number",
|
198 |
+
precision=0
|
199 |
+
)
|
200 |
+
|
201 |
+
# Navigation buttons
|
202 |
+
refresh_keywords_button = gr.Button("Refresh Keywords")
|
203 |
+
view_button = gr.Button("View Results")
|
204 |
+
next_page_button = gr.Button("Next Page")
|
205 |
+
previous_page_button = gr.Button("Previous Page")
|
206 |
+
|
207 |
+
# Pagination information
|
208 |
+
pagination_info = gr.Textbox(
|
209 |
+
label="Pagination Info",
|
210 |
+
interactive=False
|
211 |
+
)
|
212 |
+
|
213 |
+
with gr.Column(scale=2):
|
214 |
+
# Results area
|
215 |
+
results_table = gr.HTML(
|
216 |
+
label="Search Results"
|
217 |
+
)
|
218 |
+
item_details = gr.HTML(
|
219 |
+
label="Item Details",
|
220 |
+
visible=True
|
221 |
+
)
|
222 |
+
|
223 |
+
def update_keyword_choices():
|
224 |
+
try:
|
225 |
+
keywords = fetch_all_keywords()
|
226 |
+
return gr.update(choices=keywords)
|
227 |
+
except Exception as e:
|
228 |
+
return gr.update(choices=[], value=None)
|
229 |
+
|
230 |
+
def search_items(keyword, page, entries_per_page):
|
231 |
+
try:
|
232 |
+
# Calculate offset for pagination
|
233 |
+
offset = (page - 1) * entries_per_page
|
234 |
+
|
235 |
+
# Fetch items for the selected keyword
|
236 |
+
items = fetch_items_by_keyword(keyword)
|
237 |
+
total_items = len(items)
|
238 |
+
total_pages = (total_items + entries_per_page - 1) // entries_per_page
|
239 |
+
|
240 |
+
# Paginate results
|
241 |
+
paginated_items = items[offset:offset + entries_per_page]
|
242 |
+
|
243 |
+
# Generate HTML table for results
|
244 |
+
table_html = "<table style='width:100%; border-collapse: collapse;'>"
|
245 |
+
table_html += "<tr><th style='border: 1px solid black; padding: 8px;'>Title</th>"
|
246 |
+
table_html += "<th style='border: 1px solid black; padding: 8px;'>URL</th></tr>"
|
247 |
+
|
248 |
+
for item_id, title, url in paginated_items:
|
249 |
+
table_html += f"""
|
250 |
+
<tr>
|
251 |
+
<td style='border: 1px solid black; padding: 8px;'>{html.escape(title)}</td>
|
252 |
+
<td style='border: 1px solid black; padding: 8px;'>{html.escape(url)}</td>
|
253 |
+
</tr>
|
254 |
+
"""
|
255 |
+
table_html += "</table>"
|
256 |
+
|
257 |
+
# Update pagination info
|
258 |
+
pagination = f"Page {page} of {total_pages} (Total items: {total_items})"
|
259 |
+
|
260 |
+
# Determine button states
|
261 |
+
next_disabled = page >= total_pages
|
262 |
+
prev_disabled = page <= 1
|
263 |
+
|
264 |
+
return (
|
265 |
+
table_html,
|
266 |
+
pagination,
|
267 |
+
gr.update(interactive=not next_disabled),
|
268 |
+
gr.update(interactive=not prev_disabled)
|
269 |
+
)
|
270 |
+
except Exception as e:
|
271 |
+
return (
|
272 |
+
f"<p>Error: {str(e)}</p>",
|
273 |
+
"Error in pagination",
|
274 |
+
gr.update(interactive=False),
|
275 |
+
gr.update(interactive=False)
|
276 |
+
)
|
277 |
+
|
278 |
+
def go_to_next_page(keyword, current_page, entries_per_page):
|
279 |
+
next_page = current_page + 1
|
280 |
+
return search_items(keyword, next_page, entries_per_page) + (next_page,)
|
281 |
+
|
282 |
+
def go_to_previous_page(keyword, current_page, entries_per_page):
|
283 |
+
previous_page = max(1, current_page - 1)
|
284 |
+
return search_items(keyword, previous_page, entries_per_page) + (previous_page,)
|
285 |
+
|
286 |
+
# Event handlers
|
287 |
+
refresh_keywords_button.click(
|
288 |
+
fn=update_keyword_choices,
|
289 |
+
inputs=[],
|
290 |
+
outputs=[keyword_dropdown]
|
291 |
+
)
|
292 |
+
|
293 |
+
view_button.click(
|
294 |
+
fn=search_items,
|
295 |
+
inputs=[keyword_dropdown, page_number, entries_per_page],
|
296 |
+
outputs=[results_table, pagination_info, next_page_button, previous_page_button]
|
297 |
+
)
|
298 |
+
|
299 |
+
next_page_button.click(
|
300 |
+
fn=go_to_next_page,
|
301 |
+
inputs=[keyword_dropdown, page_number, entries_per_page],
|
302 |
+
outputs=[results_table, pagination_info, next_page_button, previous_page_button, page_number]
|
303 |
+
)
|
304 |
+
|
305 |
+
previous_page_button.click(
|
306 |
+
fn=go_to_previous_page,
|
307 |
+
inputs=[keyword_dropdown, page_number, entries_per_page],
|
308 |
+
outputs=[results_table, pagination_info, next_page_button, previous_page_button, page_number]
|
309 |
+
)
|
310 |
+
|
311 |
+
|
312 |
+
def create_viewing_mediadb_tab():
|
313 |
+
with gr.TabItem("View Media Database Entries", visible=True):
|
314 |
+
gr.Markdown("# View Media Database Entries")
|
315 |
with gr.Row():
|
316 |
with gr.Column():
|
317 |
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
|
|
356 |
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
357 |
)
|
358 |
|
359 |
+
#####################################################################
|
360 |
+
#
|
361 |
+
# RAG DB Viewing Functions:
|
362 |
+
|
363 |
+
def create_viewing_ragdb_tab():
|
364 |
+
with gr.TabItem("View RAG Database Entries", visible=True):
|
365 |
+
gr.Markdown("# View RAG Database Entries")
|
366 |
+
with gr.Row():
|
367 |
+
with gr.Column():
|
368 |
+
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
369 |
+
page_number = gr.Number(value=1, label="Page Number", precision=0)
|
370 |
+
view_button = gr.Button("View Page")
|
371 |
+
next_page_button = gr.Button("Next Page")
|
372 |
+
previous_page_button = gr.Button("Previous Page")
|
373 |
+
pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
|
374 |
+
with gr.Column():
|
375 |
+
results_display = gr.HTML()
|
376 |
+
|
377 |
+
def format_conversations_table(conversations):
|
378 |
+
table_html = "<table style='width:100%; border-collapse: collapse;'>"
|
379 |
+
table_html += """
|
380 |
+
<tr>
|
381 |
+
<th style='border: 1px solid black; padding: 8px;'>Title</th>
|
382 |
+
<th style='border: 1px solid black; padding: 8px;'>Keywords</th>
|
383 |
+
<th style='border: 1px solid black; padding: 8px;'>Notes</th>
|
384 |
+
<th style='border: 1px solid black; padding: 8px;'>Rating</th>
|
385 |
+
</tr>
|
386 |
+
"""
|
387 |
+
|
388 |
+
for conversation in conversations:
|
389 |
+
conv_id = conversation['conversation_id']
|
390 |
+
title = conversation['title']
|
391 |
+
rating = conversation.get('rating', '') # Use get() to handle cases where rating might not exist
|
392 |
+
|
393 |
+
keywords = get_keywords_for_conversation(conv_id)
|
394 |
+
notes = get_notes(conv_id)
|
395 |
+
|
396 |
+
table_html += f"""
|
397 |
+
<tr>
|
398 |
+
<td style='border: 1px solid black; padding: 8px;'>{html.escape(str(title))}</td>
|
399 |
+
<td style='border: 1px solid black; padding: 8px;'>{html.escape(', '.join(keywords))}</td>
|
400 |
+
<td style='border: 1px solid black; padding: 8px;'>{len(notes)} note(s)</td>
|
401 |
+
<td style='border: 1px solid black; padding: 8px;'>{html.escape(str(rating))}</td>
|
402 |
+
</tr>
|
403 |
+
"""
|
404 |
+
table_html += "</table>"
|
405 |
+
return table_html
|
406 |
+
|
407 |
+
def update_page(page, entries_per_page):
|
408 |
+
try:
|
409 |
+
conversations, total_pages, total_count = get_all_conversations(page, entries_per_page)
|
410 |
+
results_html = format_conversations_table(conversations)
|
411 |
+
pagination = f"Page {page} of {total_pages} (Total conversations: {total_count})"
|
412 |
+
|
413 |
+
next_disabled = page >= total_pages
|
414 |
+
prev_disabled = page <= 1
|
415 |
+
|
416 |
+
return (
|
417 |
+
results_html,
|
418 |
+
pagination,
|
419 |
+
page,
|
420 |
+
gr.update(interactive=not next_disabled),
|
421 |
+
gr.update(interactive=not prev_disabled)
|
422 |
+
)
|
423 |
+
except Exception as e:
|
424 |
+
return (
|
425 |
+
f"<p>Error: {str(e)}</p>",
|
426 |
+
"Error in pagination",
|
427 |
+
page,
|
428 |
+
gr.update(interactive=False),
|
429 |
+
gr.update(interactive=False)
|
430 |
+
)
|
431 |
+
|
432 |
+
def go_to_next_page(current_page, entries_per_page):
|
433 |
+
return update_page(current_page + 1, entries_per_page)
|
434 |
+
|
435 |
+
def go_to_previous_page(current_page, entries_per_page):
|
436 |
+
return update_page(max(1, current_page - 1), entries_per_page)
|
437 |
+
|
438 |
+
view_button.click(
|
439 |
+
fn=update_page,
|
440 |
+
inputs=[page_number, entries_per_page],
|
441 |
+
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
442 |
+
)
|
443 |
+
|
444 |
+
next_page_button.click(
|
445 |
+
fn=go_to_next_page,
|
446 |
+
inputs=[page_number, entries_per_page],
|
447 |
+
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
448 |
+
)
|
449 |
+
|
450 |
+
previous_page_button.click(
|
451 |
+
fn=go_to_previous_page,
|
452 |
+
inputs=[page_number, entries_per_page],
|
453 |
+
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
454 |
+
)
|
455 |
+
|
456 |
+
|
457 |
+
def create_view_all_rag_notes_tab():
|
458 |
+
with gr.TabItem("View All RAG notes/Conversation Items", visible=True):
|
459 |
+
gr.Markdown("# View All RAG Notes/Conversation Entries")
|
460 |
+
with gr.Row():
|
461 |
+
with gr.Column(scale=1):
|
462 |
+
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
463 |
+
page_number = gr.Number(value=1, label="Page Number", precision=0)
|
464 |
+
view_button = gr.Button("View Page")
|
465 |
+
next_page_button = gr.Button("Next Page")
|
466 |
+
previous_page_button = gr.Button("Previous Page")
|
467 |
+
with gr.Column(scale=2):
|
468 |
+
items_output = gr.Dropdown(label="Select Conversation to View Details", choices=[])
|
469 |
+
conversation_title = gr.Textbox(label="Conversation Title", visible=True)
|
470 |
+
with gr.Row():
|
471 |
+
with gr.Column(scale=1):
|
472 |
+
pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
|
473 |
+
with gr.Column(scale=2):
|
474 |
+
keywords_output = gr.Textbox(label="Keywords", visible=True)
|
475 |
+
chat_history_output = gr.HTML(label="Chat History", visible=True)
|
476 |
+
notes_output = gr.HTML(label="Associated Notes", visible=True)
|
477 |
+
|
478 |
+
item_mapping = gr.State({})
|
479 |
+
|
480 |
+
def update_page(page, entries_per_page):
|
481 |
+
try:
|
482 |
+
conversations, total_pages, total_count = get_all_conversations(page, entries_per_page)
|
483 |
+
pagination = f"Page {page} of {total_pages} (Total conversations: {total_count})"
|
484 |
+
|
485 |
+
# Handle the dictionary structure correctly
|
486 |
+
choices = [f"{conv['title']} (ID: {conv['conversation_id']})" for conv in conversations]
|
487 |
+
new_item_mapping = {
|
488 |
+
f"{conv['title']} (ID: {conv['conversation_id']})": conv['conversation_id']
|
489 |
+
for conv in conversations
|
490 |
+
}
|
491 |
+
|
492 |
+
next_disabled = page >= total_pages
|
493 |
+
prev_disabled = page <= 1
|
494 |
+
|
495 |
+
return (
|
496 |
+
gr.update(choices=choices, value=None),
|
497 |
+
pagination,
|
498 |
+
page,
|
499 |
+
gr.update(interactive=not next_disabled),
|
500 |
+
gr.update(interactive=not prev_disabled),
|
501 |
+
"", # conversation_title
|
502 |
+
"", # keywords_output
|
503 |
+
"", # chat_history_output
|
504 |
+
"", # notes_output
|
505 |
+
new_item_mapping
|
506 |
+
)
|
507 |
+
except Exception as e:
|
508 |
+
logging.error(f"Error in update_page: {str(e)}", exc_info=True)
|
509 |
+
return (
|
510 |
+
gr.update(choices=[], value=None),
|
511 |
+
f"Error: {str(e)}",
|
512 |
+
page,
|
513 |
+
gr.update(interactive=False),
|
514 |
+
gr.update(interactive=False),
|
515 |
+
"", "", "", "",
|
516 |
+
{}
|
517 |
+
)
|
518 |
+
|
519 |
+
def format_as_html(content, title):
|
520 |
+
if content is None:
|
521 |
+
content = "No content available."
|
522 |
+
escaped_content = html.escape(str(content))
|
523 |
+
formatted_content = escaped_content.replace('\n', '<br>')
|
524 |
+
return f"""
|
525 |
+
<div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 10px;">
|
526 |
+
<h3>{title}</h3>
|
527 |
+
<div style="max-height: 700px; overflow-y: auto;">
|
528 |
+
{formatted_content}
|
529 |
+
</div>
|
530 |
+
</div>
|
531 |
+
"""
|
532 |
+
|
533 |
+
def format_chat_history(messages):
|
534 |
+
html_content = "<div style='max-height: 500px; overflow-y: auto;'>"
|
535 |
+
for role, content in messages:
|
536 |
+
role_class = "assistant" if role.lower() == "assistant" else "user"
|
537 |
+
html_content += f"""
|
538 |
+
<div class='{role_class}-message' style='margin: 10px 0; padding: 10px; border-radius: 5px;
|
539 |
+
background-color: {"#f0f0f0" if role_class == "user" else "#e3f2fd"}'>
|
540 |
+
<strong>{html.escape(role)}:</strong><br>
|
541 |
+
{html.escape(content)}
|
542 |
+
</div>
|
543 |
+
"""
|
544 |
+
html_content += "</div>"
|
545 |
+
return html_content
|
546 |
+
|
547 |
+
def display_conversation_details(selected_item, item_mapping):
|
548 |
+
if selected_item and item_mapping and selected_item in item_mapping:
|
549 |
+
conv_id = item_mapping[selected_item]
|
550 |
+
|
551 |
+
# Get keywords
|
552 |
+
keywords = get_keywords_for_conversation(conv_id)
|
553 |
+
keywords_text = ", ".join(keywords) if keywords else "No keywords"
|
554 |
+
|
555 |
+
# Get chat history
|
556 |
+
chat_messages, _, _ = load_chat_history(conv_id)
|
557 |
+
chat_html = format_chat_history(chat_messages)
|
558 |
+
|
559 |
+
# Get associated notes
|
560 |
+
notes = get_notes(conv_id)
|
561 |
+
notes_html = ""
|
562 |
+
for note in notes:
|
563 |
+
notes_html += format_as_html(note, "Note")
|
564 |
+
if not notes:
|
565 |
+
notes_html = "<p>No notes associated with this conversation.</p>"
|
566 |
+
|
567 |
+
return (
|
568 |
+
selected_item.split(" (ID:")[0], # Conversation title
|
569 |
+
keywords_text,
|
570 |
+
chat_html,
|
571 |
+
notes_html
|
572 |
+
)
|
573 |
+
return "", "", "", ""
|
574 |
+
|
575 |
+
view_button.click(
|
576 |
+
fn=update_page,
|
577 |
+
inputs=[page_number, entries_per_page],
|
578 |
+
outputs=[
|
579 |
+
items_output,
|
580 |
+
pagination_info,
|
581 |
+
page_number,
|
582 |
+
next_page_button,
|
583 |
+
previous_page_button,
|
584 |
+
conversation_title,
|
585 |
+
keywords_output,
|
586 |
+
chat_history_output,
|
587 |
+
notes_output,
|
588 |
+
item_mapping
|
589 |
+
]
|
590 |
+
)
|
591 |
+
|
592 |
+
next_page_button.click(
|
593 |
+
fn=lambda page, entries: update_page(page + 1, entries),
|
594 |
+
inputs=[page_number, entries_per_page],
|
595 |
+
outputs=[items_output, pagination_info, page_number, next_page_button, previous_page_button,
|
596 |
+
conversation_title, keywords_output, chat_history_output, notes_output, item_mapping]
|
597 |
+
)
|
598 |
+
|
599 |
+
previous_page_button.click(
|
600 |
+
fn=lambda page, entries: update_page(max(1, page - 1), entries),
|
601 |
+
inputs=[page_number, entries_per_page],
|
602 |
+
outputs=[items_output, pagination_info, page_number, next_page_button, previous_page_button,
|
603 |
+
conversation_title, keywords_output, chat_history_output, notes_output, item_mapping]
|
604 |
+
)
|
605 |
+
|
606 |
+
items_output.change(
|
607 |
+
fn=display_conversation_details,
|
608 |
+
inputs=[items_output, item_mapping],
|
609 |
+
outputs=[conversation_title, keywords_output, chat_history_output, notes_output]
|
610 |
+
)
|
611 |
+
|
612 |
+
|
613 |
+
def create_ragdb_keyword_items_tab():
|
614 |
+
with gr.TabItem("View RAG Notes/Conversations by Keyword", visible=True):
|
615 |
+
gr.Markdown("# View RAG Notes and Conversations by Keyword")
|
616 |
+
|
617 |
+
with gr.Row():
|
618 |
+
with gr.Column(scale=1):
|
619 |
+
# Keyword selection
|
620 |
+
keyword_dropdown = gr.Dropdown(
|
621 |
+
label="Select Keyword",
|
622 |
+
choices=[],
|
623 |
+
value=None,
|
624 |
+
multiselect=True
|
625 |
+
)
|
626 |
+
entries_per_page = gr.Dropdown(
|
627 |
+
choices=[10, 20, 50, 100],
|
628 |
+
label="Entries per Page",
|
629 |
+
value=10
|
630 |
+
)
|
631 |
+
page_number = gr.Number(
|
632 |
+
value=1,
|
633 |
+
label="Page Number",
|
634 |
+
precision=0
|
635 |
+
)
|
636 |
+
|
637 |
+
# Navigation buttons
|
638 |
+
refresh_keywords_button = gr.Button("Refresh Keywords")
|
639 |
+
view_button = gr.Button("View Items")
|
640 |
+
next_page_button = gr.Button("Next Page")
|
641 |
+
previous_page_button = gr.Button("Previous Page")
|
642 |
+
pagination_info = gr.Textbox(
|
643 |
+
label="Pagination Info",
|
644 |
+
interactive=False
|
645 |
+
)
|
646 |
+
|
647 |
+
with gr.Column(scale=2):
|
648 |
+
# Results tabs for conversations and notes
|
649 |
+
with gr.Tabs():
|
650 |
+
with gr.Tab("Notes"):
|
651 |
+
notes_results = gr.HTML()
|
652 |
+
with gr.Tab("Conversations"):
|
653 |
+
conversation_results = gr.HTML()
|
654 |
+
|
655 |
+
def update_keyword_choices():
|
656 |
+
"""Fetch all available keywords for the dropdown."""
|
657 |
+
try:
|
658 |
+
query = "SELECT keyword FROM rag_qa_keywords ORDER BY keyword"
|
659 |
+
with get_db_connection() as conn:
|
660 |
+
cursor = conn.cursor()
|
661 |
+
cursor.execute(query)
|
662 |
+
keywords = [row[0] for row in cursor.fetchall()]
|
663 |
+
return gr.update(choices=keywords)
|
664 |
+
except Exception as e:
|
665 |
+
return gr.update(choices=[], value=None)
|
666 |
+
|
667 |
+
def format_conversations_html(conversations_data):
|
668 |
+
"""Format conversations data as HTML."""
|
669 |
+
if not conversations_data:
|
670 |
+
return "<p>No conversations found for selected keywords.</p>"
|
671 |
+
|
672 |
+
html_content = "<div class='results-container'>"
|
673 |
+
for conv_id, title in conversations_data:
|
674 |
+
html_content += f"""
|
675 |
+
<div style='border: 1px solid #ddd; padding: 10px; margin-bottom: 10px;'>
|
676 |
+
<h3>{html.escape(title)}</h3>
|
677 |
+
<p>Conversation ID: {html.escape(conv_id)}</p>
|
678 |
+
<p><strong>Keywords:</strong> {', '.join(html.escape(k) for k in get_keywords_for_conversation(conv_id))}</p>
|
679 |
+
</div>
|
680 |
+
"""
|
681 |
+
html_content += "</div>"
|
682 |
+
return html_content
|
683 |
+
|
684 |
+
def format_notes_html(notes_data):
|
685 |
+
"""Format notes data as HTML."""
|
686 |
+
if not notes_data:
|
687 |
+
return "<p>No notes found for selected keywords.</p>"
|
688 |
+
|
689 |
+
html_content = "<div class='results-container'>"
|
690 |
+
for note_id, title, content, timestamp in notes_data:
|
691 |
+
keywords = get_keywords_for_note(note_id)
|
692 |
+
html_content += f"""
|
693 |
+
<div style='border: 1px solid #ddd; padding: 10px; margin-bottom: 10px;'>
|
694 |
+
<h3>{html.escape(title)}</h3>
|
695 |
+
<p><strong>Created:</strong> {timestamp}</p>
|
696 |
+
<p><strong>Keywords:</strong> {', '.join(html.escape(k) for k in keywords)}</p>
|
697 |
+
<div style='background: #f5f5f5; padding: 10px; margin-top: 10px;'>
|
698 |
+
{html.escape(content)}
|
699 |
+
</div>
|
700 |
+
</div>
|
701 |
+
"""
|
702 |
+
html_content += "</div>"
|
703 |
+
return html_content
|
704 |
+
|
705 |
+
def view_items(keywords, page, entries_per_page):
|
706 |
+
if not keywords or (isinstance(keywords, list) and len(keywords) == 0):
|
707 |
+
return (
|
708 |
+
"<p>Please select at least one keyword.</p>",
|
709 |
+
"<p>Please select at least one keyword.</p>",
|
710 |
+
"No results",
|
711 |
+
gr.update(interactive=False),
|
712 |
+
gr.update(interactive=False)
|
713 |
+
)
|
714 |
+
|
715 |
+
try:
|
716 |
+
# Ensure keywords is a list
|
717 |
+
keywords_list = keywords if isinstance(keywords, list) else [keywords]
|
718 |
+
|
719 |
+
# Get conversations for selected keywords
|
720 |
+
conversations, conv_total_pages, conv_count = search_conversations_by_keywords(
|
721 |
+
keywords_list, page, entries_per_page
|
722 |
+
)
|
723 |
+
|
724 |
+
# Get notes for selected keywords
|
725 |
+
notes, notes_total_pages, notes_count = get_notes_by_keywords(
|
726 |
+
keywords_list, page, entries_per_page
|
727 |
+
)
|
728 |
+
|
729 |
+
# Format results as HTML
|
730 |
+
conv_html = format_conversations_html(conversations)
|
731 |
+
notes_html = format_notes_html(notes)
|
732 |
+
|
733 |
+
# Create pagination info
|
734 |
+
pagination = f"Page {page} of {max(conv_total_pages, notes_total_pages)} "
|
735 |
+
pagination += f"(Conversations: {conv_count}, Notes: {notes_count})"
|
736 |
+
|
737 |
+
# Determine button states
|
738 |
+
max_pages = max(conv_total_pages, notes_total_pages)
|
739 |
+
next_disabled = page >= max_pages
|
740 |
+
prev_disabled = page <= 1
|
741 |
+
|
742 |
+
return (
|
743 |
+
conv_html,
|
744 |
+
notes_html,
|
745 |
+
pagination,
|
746 |
+
gr.update(interactive=not next_disabled),
|
747 |
+
gr.update(interactive=not prev_disabled)
|
748 |
+
)
|
749 |
+
except Exception as e:
|
750 |
+
logging.error(f"Error in view_items: {str(e)}")
|
751 |
+
return (
|
752 |
+
f"<p>Error: {str(e)}</p>",
|
753 |
+
f"<p>Error: {str(e)}</p>",
|
754 |
+
"Error in retrieval",
|
755 |
+
gr.update(interactive=False),
|
756 |
+
gr.update(interactive=False)
|
757 |
+
)
|
758 |
+
|
759 |
+
def go_to_next_page(keywords, current_page, entries_per_page):
|
760 |
+
return view_items(keywords, current_page + 1, entries_per_page)
|
761 |
+
|
762 |
+
def go_to_previous_page(keywords, current_page, entries_per_page):
|
763 |
+
return view_items(keywords, max(1, current_page - 1), entries_per_page)
|
764 |
+
|
765 |
+
# Event handlers
|
766 |
+
refresh_keywords_button.click(
|
767 |
+
fn=update_keyword_choices,
|
768 |
+
inputs=[],
|
769 |
+
outputs=[keyword_dropdown]
|
770 |
+
)
|
771 |
+
|
772 |
+
view_button.click(
|
773 |
+
fn=view_items,
|
774 |
+
inputs=[keyword_dropdown, page_number, entries_per_page],
|
775 |
+
outputs=[
|
776 |
+
conversation_results,
|
777 |
+
notes_results,
|
778 |
+
pagination_info,
|
779 |
+
next_page_button,
|
780 |
+
previous_page_button
|
781 |
+
]
|
782 |
+
)
|
783 |
+
|
784 |
+
next_page_button.click(
|
785 |
+
fn=go_to_next_page,
|
786 |
+
inputs=[keyword_dropdown, page_number, entries_per_page],
|
787 |
+
outputs=[
|
788 |
+
conversation_results,
|
789 |
+
notes_results,
|
790 |
+
pagination_info,
|
791 |
+
next_page_button,
|
792 |
+
previous_page_button
|
793 |
+
]
|
794 |
+
)
|
795 |
+
|
796 |
+
previous_page_button.click(
|
797 |
+
fn=go_to_previous_page,
|
798 |
+
inputs=[keyword_dropdown, page_number, entries_per_page],
|
799 |
+
outputs=[
|
800 |
+
conversation_results,
|
801 |
+
notes_results,
|
802 |
+
pagination_info,
|
803 |
+
next_page_button,
|
804 |
+
previous_page_button
|
805 |
+
]
|
806 |
+
)
|
807 |
+
|
808 |
+
# Initialize keyword dropdown on page load
|
809 |
+
keyword_dropdown.value = update_keyword_choices()
|
810 |
+
|
811 |
+
#
|
812 |
+
# End of RAG DB Viewing tabs
|
813 |
+
################################################################
|
814 |
+
|
815 |
#
|
816 |
+
#######################################################################################################################
|
App_Function_Libraries/Gradio_UI/Website_scraping_tab.py
CHANGED
@@ -1,554 +1,754 @@
|
|
1 |
-
# Website_scraping_tab.py
|
2 |
-
# Gradio UI for scraping websites
|
3 |
-
#
|
4 |
-
# Imports
|
5 |
-
import asyncio
|
6 |
-
import json
|
7 |
-
import logging
|
8 |
-
import os
|
9 |
-
import random
|
10 |
-
from concurrent.futures import ThreadPoolExecutor
|
11 |
-
from typing import Optional, List, Dict, Any
|
12 |
-
from urllib.parse import urlparse, urljoin
|
13 |
-
|
14 |
-
#
|
15 |
-
# External Imports
|
16 |
-
import gradio as gr
|
17 |
-
from playwright.async_api import TimeoutError, async_playwright
|
18 |
-
from playwright.sync_api import sync_playwright
|
19 |
-
|
20 |
-
|
21 |
-
#
|
22 |
-
|
23 |
-
from App_Function_Libraries.Web_Scraping.
|
24 |
-
|
25 |
-
from App_Function_Libraries.
|
26 |
-
from App_Function_Libraries.
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
#
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
asyncio.
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
)
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
'
|
67 |
-
'
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
'
|
174 |
-
'extraction_successful':
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
page
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
'
|
202 |
-
'extraction_successful':
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
if any(pattern in path for pattern in
|
229 |
-
return
|
230 |
-
|
231 |
-
#
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
label="
|
279 |
-
|
280 |
-
|
281 |
-
)
|
282 |
-
|
283 |
-
minimum=1,
|
284 |
-
maximum=10,
|
285 |
-
step=1,
|
286 |
-
label="
|
287 |
-
value=
|
288 |
-
visible=False
|
289 |
-
)
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
)
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
gr.
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Website_scraping_tab.py
|
2 |
+
# Gradio UI for scraping websites
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
import asyncio
|
6 |
+
import json
|
7 |
+
import logging
|
8 |
+
import os
|
9 |
+
import random
|
10 |
+
from concurrent.futures import ThreadPoolExecutor
|
11 |
+
from typing import Optional, List, Dict, Any
|
12 |
+
from urllib.parse import urlparse, urljoin
|
13 |
+
|
14 |
+
#
|
15 |
+
# External Imports
|
16 |
+
import gradio as gr
|
17 |
+
from playwright.async_api import TimeoutError, async_playwright
|
18 |
+
from playwright.sync_api import sync_playwright
|
19 |
+
|
20 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
21 |
+
#
|
22 |
+
# Local Imports
|
23 |
+
from App_Function_Libraries.Web_Scraping.Article_Extractor_Lib import scrape_from_sitemap, scrape_by_url_level, \
|
24 |
+
scrape_article, collect_bookmarks, scrape_and_summarize_multiple, collect_urls_from_file
|
25 |
+
from App_Function_Libraries.DB.DB_Manager import list_prompts
|
26 |
+
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
27 |
+
from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize
|
28 |
+
|
29 |
+
|
30 |
+
#
|
31 |
+
########################################################################################################################
|
32 |
+
#
|
33 |
+
# Functions:
|
34 |
+
|
35 |
+
def get_url_depth(url: str) -> int:
|
36 |
+
return len(urlparse(url).path.strip('/').split('/'))
|
37 |
+
|
38 |
+
|
39 |
+
def sync_recursive_scrape(url_input, max_pages, max_depth, progress_callback, delay=1.0, custom_cookies=None):
|
40 |
+
def run_async_scrape():
|
41 |
+
loop = asyncio.new_event_loop()
|
42 |
+
asyncio.set_event_loop(loop)
|
43 |
+
return loop.run_until_complete(
|
44 |
+
recursive_scrape(url_input, max_pages, max_depth, progress_callback, delay, custom_cookies=custom_cookies)
|
45 |
+
)
|
46 |
+
|
47 |
+
with ThreadPoolExecutor() as executor:
|
48 |
+
future = executor.submit(run_async_scrape)
|
49 |
+
return future.result()
|
50 |
+
|
51 |
+
|
52 |
+
async def recursive_scrape(
|
53 |
+
base_url: str,
|
54 |
+
max_pages: int,
|
55 |
+
max_depth: int,
|
56 |
+
progress_callback: callable,
|
57 |
+
delay: float = 1.0,
|
58 |
+
resume_file: str = 'scrape_progress.json',
|
59 |
+
user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
|
60 |
+
custom_cookies: Optional[List[Dict[str, Any]]] = None
|
61 |
+
) -> List[Dict]:
|
62 |
+
async def save_progress():
|
63 |
+
temp_file = resume_file + ".tmp"
|
64 |
+
with open(temp_file, 'w') as f:
|
65 |
+
json.dump({
|
66 |
+
'visited': list(visited),
|
67 |
+
'to_visit': to_visit,
|
68 |
+
'scraped_articles': scraped_articles,
|
69 |
+
'pages_scraped': pages_scraped
|
70 |
+
}, f)
|
71 |
+
os.replace(temp_file, resume_file) # Atomic replace
|
72 |
+
|
73 |
+
def is_valid_url(url: str) -> bool:
|
74 |
+
return url.startswith("http") and len(url) > 0
|
75 |
+
|
76 |
+
# Load progress if resume file exists
|
77 |
+
if os.path.exists(resume_file):
|
78 |
+
with open(resume_file, 'r') as f:
|
79 |
+
progress_data = json.load(f)
|
80 |
+
visited = set(progress_data['visited'])
|
81 |
+
to_visit = progress_data['to_visit']
|
82 |
+
scraped_articles = progress_data['scraped_articles']
|
83 |
+
pages_scraped = progress_data['pages_scraped']
|
84 |
+
else:
|
85 |
+
visited = set()
|
86 |
+
to_visit = [(base_url, 0)] # (url, depth)
|
87 |
+
scraped_articles = []
|
88 |
+
pages_scraped = 0
|
89 |
+
|
90 |
+
try:
|
91 |
+
async with async_playwright() as p:
|
92 |
+
browser = await p.chromium.launch(headless=True)
|
93 |
+
context = await browser.new_context(user_agent=user_agent)
|
94 |
+
|
95 |
+
# Set custom cookies if provided
|
96 |
+
if custom_cookies:
|
97 |
+
await context.add_cookies(custom_cookies)
|
98 |
+
|
99 |
+
try:
|
100 |
+
while to_visit and pages_scraped < max_pages:
|
101 |
+
current_url, current_depth = to_visit.pop(0)
|
102 |
+
|
103 |
+
if current_url in visited or current_depth > max_depth:
|
104 |
+
continue
|
105 |
+
|
106 |
+
visited.add(current_url)
|
107 |
+
|
108 |
+
# Update progress
|
109 |
+
progress_callback(f"Scraping page {pages_scraped + 1}/{max_pages}: {current_url}")
|
110 |
+
|
111 |
+
try:
|
112 |
+
await asyncio.sleep(random.uniform(delay * 0.8, delay * 1.2))
|
113 |
+
|
114 |
+
# This function should be implemented to handle asynchronous scraping
|
115 |
+
article_data = await scrape_article_async(context, current_url)
|
116 |
+
|
117 |
+
if article_data and article_data['extraction_successful']:
|
118 |
+
scraped_articles.append(article_data)
|
119 |
+
pages_scraped += 1
|
120 |
+
|
121 |
+
# If we haven't reached max depth, add child links to to_visit
|
122 |
+
if current_depth < max_depth:
|
123 |
+
page = await context.new_page()
|
124 |
+
await page.goto(current_url)
|
125 |
+
await page.wait_for_load_state("networkidle")
|
126 |
+
|
127 |
+
links = await page.eval_on_selector_all('a[href]',
|
128 |
+
"(elements) => elements.map(el => el.href)")
|
129 |
+
for link in links:
|
130 |
+
child_url = urljoin(base_url, link)
|
131 |
+
if is_valid_url(child_url) and child_url.startswith(
|
132 |
+
base_url) and child_url not in visited and should_scrape_url(child_url):
|
133 |
+
to_visit.append((child_url, current_depth + 1))
|
134 |
+
|
135 |
+
await page.close()
|
136 |
+
|
137 |
+
except Exception as e:
|
138 |
+
logging.error(f"Error scraping {current_url}: {str(e)}")
|
139 |
+
|
140 |
+
# Save progress periodically (e.g., every 10 pages)
|
141 |
+
if pages_scraped % 10 == 0:
|
142 |
+
await save_progress()
|
143 |
+
|
144 |
+
finally:
|
145 |
+
await browser.close()
|
146 |
+
|
147 |
+
finally:
|
148 |
+
# These statements are now guaranteed to be reached after the scraping is done
|
149 |
+
await save_progress()
|
150 |
+
|
151 |
+
# Remove the progress file when scraping is completed successfully
|
152 |
+
if os.path.exists(resume_file):
|
153 |
+
os.remove(resume_file)
|
154 |
+
|
155 |
+
# Final progress update
|
156 |
+
progress_callback(f"Scraping completed. Total pages scraped: {pages_scraped}")
|
157 |
+
|
158 |
+
return scraped_articles
|
159 |
+
|
160 |
+
|
161 |
+
async def scrape_article_async(context, url: str) -> Dict[str, Any]:
|
162 |
+
page = await context.new_page()
|
163 |
+
try:
|
164 |
+
await page.goto(url)
|
165 |
+
await page.wait_for_load_state("networkidle")
|
166 |
+
|
167 |
+
title = await page.title()
|
168 |
+
content = await page.content()
|
169 |
+
|
170 |
+
return {
|
171 |
+
'url': url,
|
172 |
+
'title': title,
|
173 |
+
'content': content,
|
174 |
+
'extraction_successful': True
|
175 |
+
}
|
176 |
+
except Exception as e:
|
177 |
+
logging.error(f"Error scraping article {url}: {str(e)}")
|
178 |
+
return {
|
179 |
+
'url': url,
|
180 |
+
'extraction_successful': False,
|
181 |
+
'error': str(e)
|
182 |
+
}
|
183 |
+
finally:
|
184 |
+
await page.close()
|
185 |
+
|
186 |
+
|
187 |
+
def scrape_article_sync(url: str) -> Dict[str, Any]:
|
188 |
+
with sync_playwright() as p:
|
189 |
+
browser = p.chromium.launch(headless=True)
|
190 |
+
page = browser.new_page()
|
191 |
+
try:
|
192 |
+
page.goto(url)
|
193 |
+
page.wait_for_load_state("networkidle")
|
194 |
+
|
195 |
+
title = page.title()
|
196 |
+
content = page.content()
|
197 |
+
|
198 |
+
return {
|
199 |
+
'url': url,
|
200 |
+
'title': title,
|
201 |
+
'content': content,
|
202 |
+
'extraction_successful': True
|
203 |
+
}
|
204 |
+
except Exception as e:
|
205 |
+
logging.error(f"Error scraping article {url}: {str(e)}")
|
206 |
+
return {
|
207 |
+
'url': url,
|
208 |
+
'extraction_successful': False,
|
209 |
+
'error': str(e)
|
210 |
+
}
|
211 |
+
finally:
|
212 |
+
browser.close()
|
213 |
+
|
214 |
+
|
215 |
+
def should_scrape_url(url: str) -> bool:
|
216 |
+
parsed_url = urlparse(url)
|
217 |
+
path = parsed_url.path.lower()
|
218 |
+
|
219 |
+
# List of patterns to exclude
|
220 |
+
exclude_patterns = [
|
221 |
+
'/tag/', '/category/', '/author/', '/search/', '/page/',
|
222 |
+
'wp-content', 'wp-includes', 'wp-json', 'wp-admin',
|
223 |
+
'login', 'register', 'cart', 'checkout', 'account',
|
224 |
+
'.jpg', '.png', '.gif', '.pdf', '.zip'
|
225 |
+
]
|
226 |
+
|
227 |
+
# Check if the URL contains any exclude patterns
|
228 |
+
if any(pattern in path for pattern in exclude_patterns):
|
229 |
+
return False
|
230 |
+
|
231 |
+
# Add more sophisticated checks here
|
232 |
+
# For example, you might want to only include URLs with certain patterns
|
233 |
+
include_patterns = ['/article/', '/post/', '/blog/']
|
234 |
+
if any(pattern in path for pattern in include_patterns):
|
235 |
+
return True
|
236 |
+
|
237 |
+
# By default, return True if no exclusion or inclusion rules matched
|
238 |
+
return True
|
239 |
+
|
240 |
+
|
241 |
+
async def scrape_with_retry(url: str, max_retries: int = 3, retry_delay: float = 5.0):
|
242 |
+
for attempt in range(max_retries):
|
243 |
+
try:
|
244 |
+
return await scrape_article(url)
|
245 |
+
except TimeoutError:
|
246 |
+
if attempt < max_retries - 1:
|
247 |
+
logging.warning(f"Timeout error scraping {url}. Retrying in {retry_delay} seconds...")
|
248 |
+
await asyncio.sleep(retry_delay)
|
249 |
+
else:
|
250 |
+
logging.error(f"Failed to scrape {url} after {max_retries} attempts.")
|
251 |
+
return None
|
252 |
+
except Exception as e:
|
253 |
+
logging.error(f"Error scraping {url}: {str(e)}")
|
254 |
+
return None
|
255 |
+
|
256 |
+
|
257 |
+
def create_website_scraping_tab():
|
258 |
+
try:
|
259 |
+
default_value = None
|
260 |
+
if default_api_endpoint:
|
261 |
+
if default_api_endpoint in global_api_endpoints:
|
262 |
+
default_value = format_api_name(default_api_endpoint)
|
263 |
+
else:
|
264 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
265 |
+
except Exception as e:
|
266 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
267 |
+
default_value = None
|
268 |
+
with gr.TabItem("Website Scraping", visible=True):
|
269 |
+
gr.Markdown("# Scrape Websites & Summarize Articles")
|
270 |
+
with gr.Row():
|
271 |
+
with gr.Column():
|
272 |
+
scrape_method = gr.Radio(
|
273 |
+
["Individual URLs", "Sitemap", "URL Level", "Recursive Scraping"],
|
274 |
+
label="Scraping Method",
|
275 |
+
value="Individual URLs"
|
276 |
+
)
|
277 |
+
url_input = gr.Textbox(
|
278 |
+
label="Article URLs or Base URL",
|
279 |
+
placeholder="Enter article URLs here, one per line, or base URL for sitemap/URL level/recursive scraping",
|
280 |
+
lines=5
|
281 |
+
)
|
282 |
+
url_level = gr.Slider(
|
283 |
+
minimum=1,
|
284 |
+
maximum=10,
|
285 |
+
step=1,
|
286 |
+
label="URL Level (for URL Level scraping)",
|
287 |
+
value=2,
|
288 |
+
visible=False
|
289 |
+
)
|
290 |
+
max_pages = gr.Slider(
|
291 |
+
minimum=1,
|
292 |
+
maximum=100,
|
293 |
+
step=1,
|
294 |
+
label="Maximum Pages to Scrape (for Recursive Scraping)",
|
295 |
+
value=10,
|
296 |
+
visible=False
|
297 |
+
)
|
298 |
+
max_depth = gr.Slider(
|
299 |
+
minimum=1,
|
300 |
+
maximum=10,
|
301 |
+
step=1,
|
302 |
+
label="Maximum Depth (for Recursive Scraping)",
|
303 |
+
value=3,
|
304 |
+
visible=False
|
305 |
+
)
|
306 |
+
custom_article_title_input = gr.Textbox(
|
307 |
+
label="Custom Article Titles (Optional, one per line)",
|
308 |
+
placeholder="Enter custom titles for the articles, one per line",
|
309 |
+
lines=5
|
310 |
+
)
|
311 |
+
with gr.Row():
|
312 |
+
summarize_checkbox = gr.Checkbox(label="Summarize/Analyze Articles", value=False)
|
313 |
+
custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt", value=False, visible=True)
|
314 |
+
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt", value=False, visible=True)
|
315 |
+
with gr.Row():
|
316 |
+
temp_slider = gr.Slider(0.1, 2.0, 0.7, label="Temperature")
|
317 |
+
|
318 |
+
# Initialize state variables for pagination
|
319 |
+
current_page_state = gr.State(value=1)
|
320 |
+
total_pages_state = gr.State(value=1)
|
321 |
+
with gr.Row():
|
322 |
+
# Add pagination controls
|
323 |
+
preset_prompt = gr.Dropdown(
|
324 |
+
label="Select Preset Prompt",
|
325 |
+
choices=[],
|
326 |
+
visible=False
|
327 |
+
)
|
328 |
+
with gr.Row():
|
329 |
+
prev_page_button = gr.Button("Previous Page", visible=False)
|
330 |
+
page_display = gr.Markdown("Page 1 of X", visible=False)
|
331 |
+
next_page_button = gr.Button("Next Page", visible=False)
|
332 |
+
|
333 |
+
with gr.Row():
|
334 |
+
website_custom_prompt_input = gr.Textbox(
|
335 |
+
label="Custom Prompt",
|
336 |
+
placeholder="Enter custom prompt here",
|
337 |
+
lines=3,
|
338 |
+
visible=False
|
339 |
+
)
|
340 |
+
with gr.Row():
|
341 |
+
system_prompt_input = gr.Textbox(
|
342 |
+
label="System Prompt",
|
343 |
+
value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
344 |
+
**Bulleted Note Creation Guidelines**
|
345 |
+
|
346 |
+
**Headings**:
|
347 |
+
- Based on referenced topics, not categories like quotes or terms
|
348 |
+
- Surrounded by **bold** formatting
|
349 |
+
- Not listed as bullet points
|
350 |
+
- No space between headings and list items underneath
|
351 |
+
|
352 |
+
**Emphasis**:
|
353 |
+
- **Important terms** set in bold font
|
354 |
+
- **Text ending in a colon**: also bolded
|
355 |
+
|
356 |
+
**Review**:
|
357 |
+
- Ensure adherence to specified format
|
358 |
+
- Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
|
359 |
+
""",
|
360 |
+
lines=3,
|
361 |
+
visible=False
|
362 |
+
)
|
363 |
+
|
364 |
+
# Refactored API selection dropdown
|
365 |
+
api_name_input = gr.Dropdown(
|
366 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
367 |
+
value=default_value,
|
368 |
+
label="API for Summarization/Analysis (Optional)"
|
369 |
+
)
|
370 |
+
api_key_input = gr.Textbox(
|
371 |
+
label="API Key (Mandatory if API Name is specified)",
|
372 |
+
placeholder="Enter your API key here; Ignore if using Local API or Built-in API",
|
373 |
+
type="password"
|
374 |
+
)
|
375 |
+
custom_cookies_input = gr.Textbox(
|
376 |
+
label="Custom Cookies (JSON format)",
|
377 |
+
placeholder="Enter custom cookies in JSON format",
|
378 |
+
lines=3,
|
379 |
+
visible=True
|
380 |
+
)
|
381 |
+
keywords_input = gr.Textbox(
|
382 |
+
label="Keywords",
|
383 |
+
placeholder="Enter keywords here (comma-separated)",
|
384 |
+
value="default,no_keyword_set",
|
385 |
+
visible=True
|
386 |
+
)
|
387 |
+
bookmarks_file_input = gr.File(
|
388 |
+
label="Upload Bookmarks File/CSV",
|
389 |
+
type="filepath",
|
390 |
+
file_types=[".json", ".html", ".csv"], # Added .csv
|
391 |
+
visible=True
|
392 |
+
)
|
393 |
+
gr.Markdown("""
|
394 |
+
Supported file formats:
|
395 |
+
- Chrome/Edge bookmarks (JSON)
|
396 |
+
- Firefox bookmarks (HTML)
|
397 |
+
- CSV file with 'url' column (optionally 'title' or 'name' column)
|
398 |
+
""")
|
399 |
+
parsed_urls_output = gr.Textbox(
|
400 |
+
label="Parsed URLs",
|
401 |
+
placeholder="URLs will be displayed here after uploading a file.",
|
402 |
+
lines=10,
|
403 |
+
interactive=False,
|
404 |
+
visible=False
|
405 |
+
)
|
406 |
+
|
407 |
+
scrape_button = gr.Button("Scrape and Summarize")
|
408 |
+
|
409 |
+
with gr.Column():
|
410 |
+
progress_output = gr.Textbox(label="Progress", lines=3)
|
411 |
+
result_output = gr.Textbox(label="Result", lines=20)
|
412 |
+
|
413 |
+
def update_ui_for_scrape_method(method):
|
414 |
+
url_level_update = gr.update(visible=(method == "URL Level"))
|
415 |
+
max_pages_update = gr.update(visible=(method == "Recursive Scraping"))
|
416 |
+
max_depth_update = gr.update(visible=(method == "Recursive Scraping"))
|
417 |
+
url_input_update = gr.update(
|
418 |
+
label="Article URLs" if method == "Individual URLs" else "Base URL",
|
419 |
+
placeholder="Enter article URLs here, one per line" if method == "Individual URLs" else "Enter the base URL for scraping"
|
420 |
+
)
|
421 |
+
return url_level_update, max_pages_update, max_depth_update, url_input_update
|
422 |
+
|
423 |
+
scrape_method.change(
|
424 |
+
fn=update_ui_for_scrape_method,
|
425 |
+
inputs=[scrape_method],
|
426 |
+
outputs=[url_level, max_pages, max_depth, url_input]
|
427 |
+
)
|
428 |
+
|
429 |
+
custom_prompt_checkbox.change(
|
430 |
+
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
431 |
+
inputs=[custom_prompt_checkbox],
|
432 |
+
outputs=[website_custom_prompt_input, system_prompt_input]
|
433 |
+
)
|
434 |
+
|
435 |
+
def on_preset_prompt_checkbox_change(is_checked):
|
436 |
+
if is_checked:
|
437 |
+
prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
|
438 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
439 |
+
return (
|
440 |
+
gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
|
441 |
+
gr.update(visible=True), # prev_page_button
|
442 |
+
gr.update(visible=True), # next_page_button
|
443 |
+
gr.update(value=page_display_text, visible=True), # page_display
|
444 |
+
current_page, # current_page_state
|
445 |
+
total_pages # total_pages_state
|
446 |
+
)
|
447 |
+
else:
|
448 |
+
return (
|
449 |
+
gr.update(visible=False, interactive=False), # preset_prompt
|
450 |
+
gr.update(visible=False), # prev_page_button
|
451 |
+
gr.update(visible=False), # next_page_button
|
452 |
+
gr.update(visible=False), # page_display
|
453 |
+
1, # current_page_state
|
454 |
+
1 # total_pages_state
|
455 |
+
)
|
456 |
+
|
457 |
+
preset_prompt_checkbox.change(
|
458 |
+
fn=on_preset_prompt_checkbox_change,
|
459 |
+
inputs=[preset_prompt_checkbox],
|
460 |
+
outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
|
461 |
+
)
|
462 |
+
|
463 |
+
def on_prev_page_click(current_page, total_pages):
|
464 |
+
new_page = max(current_page - 1, 1)
|
465 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
466 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
467 |
+
return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
468 |
+
|
469 |
+
prev_page_button.click(
|
470 |
+
fn=on_prev_page_click,
|
471 |
+
inputs=[current_page_state, total_pages_state],
|
472 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
473 |
+
)
|
474 |
+
|
475 |
+
def on_next_page_click(current_page, total_pages):
|
476 |
+
new_page = min(current_page + 1, total_pages)
|
477 |
+
prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
|
478 |
+
page_display_text = f"Page {current_page} of {total_pages}"
|
479 |
+
return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
|
480 |
+
|
481 |
+
next_page_button.click(
|
482 |
+
fn=on_next_page_click,
|
483 |
+
inputs=[current_page_state, total_pages_state],
|
484 |
+
outputs=[preset_prompt, page_display, current_page_state]
|
485 |
+
)
|
486 |
+
|
487 |
+
def update_prompts(preset_name):
|
488 |
+
prompts = update_user_prompt(preset_name)
|
489 |
+
return (
|
490 |
+
gr.update(value=prompts["user_prompt"], visible=True),
|
491 |
+
gr.update(value=prompts["system_prompt"], visible=True)
|
492 |
+
)
|
493 |
+
|
494 |
+
preset_prompt.change(
|
495 |
+
update_prompts,
|
496 |
+
inputs=[preset_prompt],
|
497 |
+
outputs=[website_custom_prompt_input, system_prompt_input]
|
498 |
+
)
|
499 |
+
|
500 |
+
def parse_bookmarks(file_path):
|
501 |
+
"""
|
502 |
+
Parses the uploaded bookmarks file and extracts URLs.
|
503 |
+
|
504 |
+
Args:
|
505 |
+
file_path (str): Path to the uploaded bookmarks file.
|
506 |
+
|
507 |
+
Returns:
|
508 |
+
str: Formatted string of extracted URLs or error message.
|
509 |
+
"""
|
510 |
+
try:
|
511 |
+
bookmarks = collect_bookmarks(file_path)
|
512 |
+
# Extract URLs
|
513 |
+
urls = []
|
514 |
+
for value in bookmarks.values():
|
515 |
+
if isinstance(value, list):
|
516 |
+
urls.extend(value)
|
517 |
+
elif isinstance(value, str):
|
518 |
+
urls.append(value)
|
519 |
+
if not urls:
|
520 |
+
return "No URLs found in the bookmarks file."
|
521 |
+
# Format URLs for display
|
522 |
+
formatted_urls = "\n".join(urls)
|
523 |
+
return formatted_urls
|
524 |
+
except Exception as e:
|
525 |
+
logging.error(f"Error parsing bookmarks file: {str(e)}")
|
526 |
+
return f"Error parsing bookmarks file: {str(e)}"
|
527 |
+
|
528 |
+
def show_parsed_urls(urls_file):
|
529 |
+
"""
|
530 |
+
Determines whether to show the parsed URLs output.
|
531 |
+
|
532 |
+
Args:
|
533 |
+
urls_file: Uploaded file object.
|
534 |
+
|
535 |
+
Returns:
|
536 |
+
Tuple indicating visibility and content of parsed_urls_output.
|
537 |
+
"""
|
538 |
+
if urls_file is None:
|
539 |
+
return gr.update(visible=False), ""
|
540 |
+
|
541 |
+
file_path = urls_file.name
|
542 |
+
try:
|
543 |
+
# Use the unified collect_urls_from_file function
|
544 |
+
parsed_urls = collect_urls_from_file(file_path)
|
545 |
+
|
546 |
+
# Format the URLs for display
|
547 |
+
formatted_urls = []
|
548 |
+
for name, urls in parsed_urls.items():
|
549 |
+
if isinstance(urls, list):
|
550 |
+
for url in urls:
|
551 |
+
formatted_urls.append(f"{name}: {url}")
|
552 |
+
else:
|
553 |
+
formatted_urls.append(f"{name}: {urls}")
|
554 |
+
|
555 |
+
return gr.update(visible=True), "\n".join(formatted_urls)
|
556 |
+
except Exception as e:
|
557 |
+
return gr.update(visible=True), f"Error parsing file: {str(e)}"
|
558 |
+
|
559 |
+
# Connect the parsing function to the file upload event
|
560 |
+
bookmarks_file_input.change(
|
561 |
+
fn=show_parsed_urls,
|
562 |
+
inputs=[bookmarks_file_input],
|
563 |
+
outputs=[parsed_urls_output, parsed_urls_output]
|
564 |
+
)
|
565 |
+
|
566 |
+
async def scrape_and_summarize_wrapper(
|
567 |
+
scrape_method: str,
|
568 |
+
url_input: str,
|
569 |
+
url_level: Optional[int],
|
570 |
+
max_pages: int,
|
571 |
+
max_depth: int,
|
572 |
+
summarize_checkbox: bool,
|
573 |
+
custom_prompt: Optional[str],
|
574 |
+
api_name: Optional[str],
|
575 |
+
api_key: Optional[str],
|
576 |
+
keywords: str,
|
577 |
+
custom_titles: Optional[str],
|
578 |
+
system_prompt: Optional[str],
|
579 |
+
temperature: float,
|
580 |
+
custom_cookies: Optional[str],
|
581 |
+
bookmarks_file,
|
582 |
+
progress: gr.Progress = gr.Progress()
|
583 |
+
) -> str:
|
584 |
+
try:
|
585 |
+
result: List[Dict[str, Any]] = []
|
586 |
+
|
587 |
+
# Handle bookmarks file if provided
|
588 |
+
if bookmarks_file is not None:
|
589 |
+
bookmarks = collect_bookmarks(bookmarks_file.name)
|
590 |
+
# Extract URLs from bookmarks
|
591 |
+
urls_from_bookmarks = []
|
592 |
+
for value in bookmarks.values():
|
593 |
+
if isinstance(value, list):
|
594 |
+
urls_from_bookmarks.extend(value)
|
595 |
+
elif isinstance(value, str):
|
596 |
+
urls_from_bookmarks.append(value)
|
597 |
+
if scrape_method == "Individual URLs":
|
598 |
+
url_input = "\n".join(urls_from_bookmarks)
|
599 |
+
else:
|
600 |
+
if urls_from_bookmarks:
|
601 |
+
url_input = urls_from_bookmarks[0]
|
602 |
+
else:
|
603 |
+
return convert_json_to_markdown(json.dumps({"error": "No URLs found in the bookmarks file."}))
|
604 |
+
|
605 |
+
# Handle custom cookies
|
606 |
+
custom_cookies_list = None
|
607 |
+
if custom_cookies:
|
608 |
+
try:
|
609 |
+
custom_cookies_list = json.loads(custom_cookies)
|
610 |
+
if not isinstance(custom_cookies_list, list):
|
611 |
+
custom_cookies_list = [custom_cookies_list]
|
612 |
+
except json.JSONDecodeError as e:
|
613 |
+
return convert_json_to_markdown(json.dumps({"error": f"Invalid JSON format for custom cookies: {e}"}))
|
614 |
+
|
615 |
+
if scrape_method == "Individual URLs":
|
616 |
+
result = await scrape_and_summarize_multiple(url_input, custom_prompt, api_name, api_key, keywords,
|
617 |
+
custom_titles, system_prompt, summarize_checkbox, custom_cookies=custom_cookies_list)
|
618 |
+
elif scrape_method == "Sitemap":
|
619 |
+
result = await asyncio.to_thread(scrape_from_sitemap, url_input)
|
620 |
+
elif scrape_method == "URL Level":
|
621 |
+
if url_level is None:
|
622 |
+
return convert_json_to_markdown(
|
623 |
+
json.dumps({"error": "URL level is required for URL Level scraping."}))
|
624 |
+
result = await asyncio.to_thread(scrape_by_url_level, url_input, url_level)
|
625 |
+
elif scrape_method == "Recursive Scraping":
|
626 |
+
result = await recursive_scrape(url_input, max_pages, max_depth, progress.update, delay=1.0,
|
627 |
+
custom_cookies=custom_cookies_list)
|
628 |
+
else:
|
629 |
+
return convert_json_to_markdown(json.dumps({"error": f"Unknown scraping method: {scrape_method}"}))
|
630 |
+
|
631 |
+
# Ensure result is always a list of dictionaries
|
632 |
+
if isinstance(result, dict):
|
633 |
+
result = [result]
|
634 |
+
elif isinstance(result, list):
|
635 |
+
if all(isinstance(item, str) for item in result):
|
636 |
+
# Convert list of strings to list of dictionaries
|
637 |
+
result = [{"content": item} for item in result]
|
638 |
+
elif not all(isinstance(item, dict) for item in result):
|
639 |
+
raise ValueError("Not all items in result are dictionaries or strings")
|
640 |
+
else:
|
641 |
+
raise ValueError(f"Unexpected result type: {type(result)}")
|
642 |
+
|
643 |
+
# Ensure all items in result are dictionaries
|
644 |
+
if not all(isinstance(item, dict) for item in result):
|
645 |
+
raise ValueError("Not all items in result are dictionaries")
|
646 |
+
|
647 |
+
if summarize_checkbox:
|
648 |
+
total_articles = len(result)
|
649 |
+
for i, article in enumerate(result):
|
650 |
+
progress.update(f"Summarizing article {i + 1}/{total_articles}")
|
651 |
+
content = article.get('content', '')
|
652 |
+
if content:
|
653 |
+
summary = await asyncio.to_thread(summarize, content, custom_prompt, api_name, api_key,
|
654 |
+
temperature, system_prompt)
|
655 |
+
article['summary'] = summary
|
656 |
+
else:
|
657 |
+
article['summary'] = "No content available to summarize."
|
658 |
+
|
659 |
+
# Concatenate all content
|
660 |
+
all_content = "\n\n".join(
|
661 |
+
[f"# {article.get('title', 'Untitled')}\n\n{article.get('content', '')}\n\n" +
|
662 |
+
(f"Summary: {article.get('summary', '')}" if summarize_checkbox else "")
|
663 |
+
for article in result])
|
664 |
+
|
665 |
+
# Collect all unique URLs
|
666 |
+
all_urls = list(set(article.get('url', '') for article in result if article.get('url')))
|
667 |
+
|
668 |
+
# Structure the output for the entire website collection
|
669 |
+
website_collection = {
|
670 |
+
"base_url": url_input,
|
671 |
+
"scrape_method": scrape_method,
|
672 |
+
"summarization_performed": summarize_checkbox,
|
673 |
+
"api_used": api_name if summarize_checkbox else None,
|
674 |
+
"keywords": keywords if summarize_checkbox else None,
|
675 |
+
"url_level": url_level if scrape_method == "URL Level" else None,
|
676 |
+
"max_pages": max_pages if scrape_method == "Recursive Scraping" else None,
|
677 |
+
"max_depth": max_depth if scrape_method == "Recursive Scraping" else None,
|
678 |
+
"total_articles_scraped": len(result),
|
679 |
+
"urls_scraped": all_urls,
|
680 |
+
"content": all_content
|
681 |
+
}
|
682 |
+
|
683 |
+
# Convert the JSON to markdown and return
|
684 |
+
return convert_json_to_markdown(json.dumps(website_collection, indent=2))
|
685 |
+
except Exception as e:
|
686 |
+
return convert_json_to_markdown(json.dumps({"error": f"An error occurred: {str(e)}"}))
|
687 |
+
|
688 |
+
# Update the scrape_button.click to include the temperature parameter
|
689 |
+
scrape_button.click(
|
690 |
+
fn=lambda *args: asyncio.run(scrape_and_summarize_wrapper(*args)),
|
691 |
+
inputs=[scrape_method, url_input, url_level, max_pages, max_depth, summarize_checkbox,
|
692 |
+
website_custom_prompt_input, api_name_input, api_key_input, keywords_input,
|
693 |
+
custom_article_title_input, system_prompt_input, temp_slider,
|
694 |
+
custom_cookies_input, bookmarks_file_input],
|
695 |
+
outputs=[result_output]
|
696 |
+
)
|
697 |
+
|
698 |
+
|
699 |
+
def convert_json_to_markdown(json_str: str) -> str:
|
700 |
+
"""
|
701 |
+
Converts the JSON output from the scraping process into a markdown format.
|
702 |
+
|
703 |
+
Args:
|
704 |
+
json_str (str): JSON-formatted string containing the website collection data
|
705 |
+
|
706 |
+
Returns:
|
707 |
+
str: Markdown-formatted string of the website collection data
|
708 |
+
"""
|
709 |
+
try:
|
710 |
+
# Parse the JSON string
|
711 |
+
data = json.loads(json_str)
|
712 |
+
|
713 |
+
# Check if there's an error in the JSON
|
714 |
+
if "error" in data:
|
715 |
+
return f"# Error\n\n{data['error']}"
|
716 |
+
|
717 |
+
# Start building the markdown string
|
718 |
+
markdown = f"# Website Collection: {data['base_url']}\n\n"
|
719 |
+
|
720 |
+
# Add metadata
|
721 |
+
markdown += "## Metadata\n\n"
|
722 |
+
markdown += f"- **Scrape Method:** {data['scrape_method']}\n"
|
723 |
+
markdown += f"- **API Used:** {data['api_used']}\n"
|
724 |
+
markdown += f"- **Keywords:** {data['keywords']}\n"
|
725 |
+
if data.get('url_level') is not None:
|
726 |
+
markdown += f"- **URL Level:** {data['url_level']}\n"
|
727 |
+
if data.get('max_pages') is not None:
|
728 |
+
markdown += f"- **Maximum Pages:** {data['max_pages']}\n"
|
729 |
+
if data.get('max_depth') is not None:
|
730 |
+
markdown += f"- **Maximum Depth:** {data['max_depth']}\n"
|
731 |
+
markdown += f"- **Total Articles Scraped:** {data['total_articles_scraped']}\n\n"
|
732 |
+
|
733 |
+
# Add URLs Scraped
|
734 |
+
markdown += "## URLs Scraped\n\n"
|
735 |
+
for url in data['urls_scraped']:
|
736 |
+
markdown += f"- {url}\n"
|
737 |
+
markdown += "\n"
|
738 |
+
|
739 |
+
# Add the content
|
740 |
+
markdown += "## Content\n\n"
|
741 |
+
markdown += data['content']
|
742 |
+
|
743 |
+
return markdown
|
744 |
+
|
745 |
+
except json.JSONDecodeError:
|
746 |
+
return "# Error\n\nInvalid JSON string provided."
|
747 |
+
except KeyError as e:
|
748 |
+
return f"# Error\n\nMissing key in JSON data: {str(e)}"
|
749 |
+
except Exception as e:
|
750 |
+
return f"# Error\n\nAn unexpected error occurred: {str(e)}"
|
751 |
+
|
752 |
+
#
|
753 |
+
# End of File
|
754 |
+
########################################################################################################################
|
App_Function_Libraries/Gradio_UI/Workflows_tab.py
ADDED
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Chat_Workflows.py
|
2 |
+
# Description: Gradio UI for Chat Workflows
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
import json
|
6 |
+
import logging
|
7 |
+
from pathlib import Path
|
8 |
+
#
|
9 |
+
# External Imports
|
10 |
+
import gradio as gr
|
11 |
+
#
|
12 |
+
# Local Imports
|
13 |
+
from App_Function_Libraries.Gradio_UI.Chat_ui import chat_wrapper, search_conversations, \
|
14 |
+
load_conversation
|
15 |
+
from App_Function_Libraries.Chat.Chat_Functions import save_chat_history_to_db_wrapper
|
16 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
17 |
+
#
|
18 |
+
############################################################################################################
|
19 |
+
#
|
20 |
+
# Functions:
|
21 |
+
|
22 |
+
# Load workflows from a JSON file
|
23 |
+
json_path = Path('./Helper_Scripts/Workflows/Workflows.json')
|
24 |
+
with json_path.open('r') as f:
|
25 |
+
workflows = json.load(f)
|
26 |
+
|
27 |
+
|
28 |
+
def chat_workflows_tab():
|
29 |
+
try:
|
30 |
+
default_value = None
|
31 |
+
if default_api_endpoint:
|
32 |
+
if default_api_endpoint in global_api_endpoints:
|
33 |
+
default_value = format_api_name(default_api_endpoint)
|
34 |
+
else:
|
35 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
36 |
+
except Exception as e:
|
37 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
38 |
+
default_value = None
|
39 |
+
with gr.TabItem("Chat Workflows", visible=True):
|
40 |
+
gr.Markdown("# Workflows using LLMs")
|
41 |
+
chat_history = gr.State([])
|
42 |
+
media_content = gr.State({})
|
43 |
+
selected_parts = gr.State([])
|
44 |
+
conversation_id = gr.State(None)
|
45 |
+
workflow_state = gr.State({"current_step": 0, "max_steps": 0, "conversation_id": None})
|
46 |
+
|
47 |
+
with gr.Row():
|
48 |
+
with gr.Column():
|
49 |
+
workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
|
50 |
+
# Refactored API selection dropdown
|
51 |
+
api_selector = gr.Dropdown(
|
52 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
53 |
+
value=default_value,
|
54 |
+
label="API for Interaction (Optional)"
|
55 |
+
)
|
56 |
+
api_key_input = gr.Textbox(label="API Key (optional)", type="password")
|
57 |
+
temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
|
58 |
+
save_conversation = gr.Checkbox(label="Save Conversation", value=False)
|
59 |
+
with gr.Column():
|
60 |
+
gr.Markdown("Placeholder")
|
61 |
+
with gr.Row():
|
62 |
+
with gr.Column():
|
63 |
+
conversation_search = gr.Textbox(label="Search Conversations")
|
64 |
+
search_conversations_btn = gr.Button("Search Conversations")
|
65 |
+
with gr.Column():
|
66 |
+
previous_conversations = gr.Dropdown(label="Select Conversation", choices=[], interactive=True)
|
67 |
+
load_conversations_btn = gr.Button("Load Selected Conversation")
|
68 |
+
with gr.Row():
|
69 |
+
with gr.Column():
|
70 |
+
context_input = gr.Textbox(label="Initial Context", lines=5)
|
71 |
+
chatbot = gr.Chatbot(label="Workflow Chat")
|
72 |
+
msg = gr.Textbox(label="Your Input")
|
73 |
+
submit_btn = gr.Button("Submit")
|
74 |
+
clear_btn = gr.Button("Clear Chat")
|
75 |
+
chat_media_name = gr.Textbox(label="Custom Chat Name(optional)")
|
76 |
+
save_btn = gr.Button("Save Chat to Database")
|
77 |
+
save_status = gr.Textbox(label="Save Status", interactive=False)
|
78 |
+
|
79 |
+
def update_workflow_ui(workflow_name):
|
80 |
+
if not workflow_name:
|
81 |
+
return {"current_step": 0, "max_steps": 0, "conversation_id": None}, "", []
|
82 |
+
selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None)
|
83 |
+
if selected_workflow:
|
84 |
+
num_prompts = len(selected_workflow['prompts'])
|
85 |
+
context = selected_workflow.get('context', '')
|
86 |
+
first_prompt = selected_workflow['prompts'][0]
|
87 |
+
initial_chat = [(None, f"{first_prompt}")]
|
88 |
+
logging.info(f"Initializing workflow: {workflow_name} with {num_prompts} steps")
|
89 |
+
return {"current_step": 0, "max_steps": num_prompts, "conversation_id": None}, context, initial_chat
|
90 |
+
else:
|
91 |
+
logging.error(f"Selected workflow not found: {workflow_name}")
|
92 |
+
return {"current_step": 0, "max_steps": 0, "conversation_id": None}, "", []
|
93 |
+
|
94 |
+
def process_workflow_step(message, history, context, workflow_name, api_endpoint, api_key, workflow_state,
|
95 |
+
save_conv, temp):
|
96 |
+
logging.info(f"Process workflow step called with message: {message}")
|
97 |
+
logging.info(f"Current workflow state: {workflow_state}")
|
98 |
+
try:
|
99 |
+
selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None)
|
100 |
+
if not selected_workflow:
|
101 |
+
logging.error(f"Selected workflow not found: {workflow_name}")
|
102 |
+
return history, workflow_state, gr.update(interactive=True)
|
103 |
+
|
104 |
+
current_step = workflow_state["current_step"]
|
105 |
+
max_steps = workflow_state["max_steps"]
|
106 |
+
|
107 |
+
logging.info(f"Current step: {current_step}, Max steps: {max_steps}")
|
108 |
+
|
109 |
+
if current_step >= max_steps:
|
110 |
+
logging.info("Workflow completed, disabling input")
|
111 |
+
return history, workflow_state, gr.update(interactive=False)
|
112 |
+
|
113 |
+
prompt = selected_workflow['prompts'][current_step]
|
114 |
+
full_message = f"{context}\n\nStep {current_step + 1}: {prompt}\nUser: {message}"
|
115 |
+
|
116 |
+
logging.info(f"Calling chat_wrapper with full_message: {full_message[:100]}...")
|
117 |
+
bot_message, new_history, new_conversation_id = chat_wrapper(
|
118 |
+
full_message, history, media_content.value, selected_parts.value,
|
119 |
+
api_endpoint, api_key, "", workflow_state["conversation_id"],
|
120 |
+
save_conv, temp, "You are a helpful assistant guiding through a workflow."
|
121 |
+
)
|
122 |
+
|
123 |
+
logging.info(f"Received bot_message: {bot_message[:100]}...")
|
124 |
+
|
125 |
+
next_step = current_step + 1
|
126 |
+
new_workflow_state = {
|
127 |
+
"current_step": next_step,
|
128 |
+
"max_steps": max_steps,
|
129 |
+
"conversation_id": new_conversation_id
|
130 |
+
}
|
131 |
+
|
132 |
+
if next_step >= max_steps:
|
133 |
+
logging.info("Workflow completed after this step")
|
134 |
+
return new_history, new_workflow_state, gr.update(interactive=False)
|
135 |
+
else:
|
136 |
+
next_prompt = selected_workflow['prompts'][next_step]
|
137 |
+
new_history.append((None, f"Step {next_step + 1}: {next_prompt}"))
|
138 |
+
logging.info(f"Moving to next step: {next_step}")
|
139 |
+
return new_history, new_workflow_state, gr.update(interactive=True)
|
140 |
+
except Exception as e:
|
141 |
+
logging.error(f"Error in process_workflow_step: {str(e)}")
|
142 |
+
return history, workflow_state, gr.update(interactive=True)
|
143 |
+
|
144 |
+
workflow_selector.change(
|
145 |
+
update_workflow_ui,
|
146 |
+
inputs=[workflow_selector],
|
147 |
+
outputs=[workflow_state, context_input, chatbot]
|
148 |
+
)
|
149 |
+
|
150 |
+
submit_btn.click(
|
151 |
+
process_workflow_step,
|
152 |
+
inputs=[msg, chatbot, context_input, workflow_selector, api_selector, api_key_input, workflow_state,
|
153 |
+
save_conversation, temperature],
|
154 |
+
outputs=[chatbot, workflow_state, msg]
|
155 |
+
).then(
|
156 |
+
lambda: gr.update(value=""),
|
157 |
+
outputs=[msg]
|
158 |
+
)
|
159 |
+
|
160 |
+
clear_btn.click(
|
161 |
+
lambda: ([], {"current_step": 0, "max_steps": 0, "conversation_id": None}, ""),
|
162 |
+
outputs=[chatbot, workflow_state, context_input]
|
163 |
+
)
|
164 |
+
|
165 |
+
save_btn.click(
|
166 |
+
save_chat_history_to_db_wrapper,
|
167 |
+
inputs=[chatbot, conversation_id, media_content, chat_media_name],
|
168 |
+
outputs=[conversation_id, save_status]
|
169 |
+
)
|
170 |
+
|
171 |
+
search_conversations_btn.click(
|
172 |
+
search_conversations,
|
173 |
+
inputs=[conversation_search],
|
174 |
+
outputs=[previous_conversations]
|
175 |
+
)
|
176 |
+
|
177 |
+
load_conversations_btn.click(
|
178 |
+
lambda: ([], {"current_step": 0, "max_steps": 0, "conversation_id": None}, ""),
|
179 |
+
outputs=[chatbot, workflow_state, context_input]
|
180 |
+
).then(
|
181 |
+
load_conversation,
|
182 |
+
inputs=[previous_conversations],
|
183 |
+
outputs=[chatbot, conversation_id]
|
184 |
+
)
|
185 |
+
|
186 |
+
return workflow_selector, api_selector, api_key_input, context_input, chatbot, msg, submit_btn, clear_btn, save_btn
|
187 |
+
|
188 |
+
#
|
189 |
+
# End of script
|
190 |
+
############################################################################################################
|
App_Function_Libraries/Gradio_UI/Writing_tab.py
CHANGED
@@ -4,11 +4,16 @@
|
|
4 |
# Imports
|
5 |
#
|
6 |
# External Imports
|
|
|
|
|
7 |
import gradio as gr
|
8 |
import textstat
|
9 |
#
|
10 |
# Local Imports
|
11 |
from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
|
|
|
|
|
|
|
12 |
#
|
13 |
########################################################################################################################
|
14 |
#
|
@@ -42,6 +47,16 @@ def grammar_style_check(input_text, custom_prompt, api_name, api_key, system_pro
|
|
42 |
|
43 |
def create_grammar_style_check_tab():
|
44 |
with gr.TabItem("Grammar and Style Check", visible=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
with gr.Row():
|
46 |
with gr.Column():
|
47 |
gr.Markdown("# Grammar and Style Check")
|
@@ -74,11 +89,11 @@ def create_grammar_style_check_tab():
|
|
74 |
inputs=[custom_prompt_checkbox],
|
75 |
outputs=[custom_prompt_input, system_prompt_input]
|
76 |
)
|
|
|
77 |
api_name_input = gr.Dropdown(
|
78 |
-
choices=[None
|
79 |
-
|
80 |
-
|
81 |
-
label="API for Grammar Check"
|
82 |
)
|
83 |
api_key_input = gr.Textbox(label="API Key (if not set in Config_Files/config.txt)", placeholder="Enter your API key here",
|
84 |
type="password")
|
@@ -302,63 +317,63 @@ def create_document_feedback_tab():
|
|
302 |
with gr.Row():
|
303 |
compare_button = gr.Button("Compare Feedback")
|
304 |
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
|
363 |
return input_text, feedback_output, readability_output, feedback_history_display
|
364 |
|
|
|
4 |
# Imports
|
5 |
#
|
6 |
# External Imports
|
7 |
+
import logging
|
8 |
+
|
9 |
import gradio as gr
|
10 |
import textstat
|
11 |
#
|
12 |
# Local Imports
|
13 |
from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
|
14 |
+
from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
|
15 |
+
|
16 |
+
|
17 |
#
|
18 |
########################################################################################################################
|
19 |
#
|
|
|
47 |
|
48 |
def create_grammar_style_check_tab():
|
49 |
with gr.TabItem("Grammar and Style Check", visible=True):
|
50 |
+
try:
|
51 |
+
default_value = None
|
52 |
+
if default_api_endpoint:
|
53 |
+
if default_api_endpoint in global_api_endpoints:
|
54 |
+
default_value = format_api_name(default_api_endpoint)
|
55 |
+
else:
|
56 |
+
logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
|
57 |
+
except Exception as e:
|
58 |
+
logging.error(f"Error setting default API endpoint: {str(e)}")
|
59 |
+
default_value = None
|
60 |
with gr.Row():
|
61 |
with gr.Column():
|
62 |
gr.Markdown("# Grammar and Style Check")
|
|
|
89 |
inputs=[custom_prompt_checkbox],
|
90 |
outputs=[custom_prompt_input, system_prompt_input]
|
91 |
)
|
92 |
+
# Refactored API selection dropdown
|
93 |
api_name_input = gr.Dropdown(
|
94 |
+
choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
|
95 |
+
value=default_value,
|
96 |
+
label="API for Analysis (Optional)"
|
|
|
97 |
)
|
98 |
api_key_input = gr.Textbox(label="API Key (if not set in Config_Files/config.txt)", placeholder="Enter your API key here",
|
99 |
type="password")
|
|
|
317 |
with gr.Row():
|
318 |
compare_button = gr.Button("Compare Feedback")
|
319 |
|
320 |
+
feedback_history = gr.State([])
|
321 |
+
|
322 |
+
def add_custom_persona(name, description):
|
323 |
+
updated_choices = persona_dropdown.choices + [name]
|
324 |
+
persona_prompts[name] = f"As {name}, {description}, provide feedback on the following text:"
|
325 |
+
return gr.update(choices=updated_choices)
|
326 |
+
|
327 |
+
def update_feedback_history(current_text, persona, feedback):
|
328 |
+
# Ensure feedback_history.value is initialized and is a list
|
329 |
+
if feedback_history.value is None:
|
330 |
+
feedback_history.value = []
|
331 |
+
|
332 |
+
history = feedback_history.value
|
333 |
+
|
334 |
+
# Append the new entry to the history
|
335 |
+
history.append({"text": current_text, "persona": persona, "feedback": feedback})
|
336 |
+
|
337 |
+
# Keep only the last 5 entries in the history
|
338 |
+
feedback_history.value = history[-10:]
|
339 |
+
|
340 |
+
# Generate and return the updated HTML
|
341 |
+
return generate_feedback_history_html(feedback_history.value)
|
342 |
+
|
343 |
+
def compare_feedback(text, selected_personas, api_name, api_key):
|
344 |
+
results = []
|
345 |
+
for persona in selected_personas:
|
346 |
+
feedback = generate_writing_feedback(text, persona, "Overall", api_name, api_key)
|
347 |
+
results.append(f"### {persona}'s Feedback:\n{feedback}\n\n")
|
348 |
+
return "\n".join(results)
|
349 |
+
|
350 |
+
add_custom_persona_button.click(
|
351 |
+
fn=add_custom_persona,
|
352 |
+
inputs=[custom_persona_name, custom_persona_description],
|
353 |
+
outputs=persona_dropdown
|
354 |
+
)
|
355 |
+
|
356 |
+
get_feedback_button.click(
|
357 |
+
fn=lambda text, persona, aspect, api_name, api_key: (
|
358 |
+
generate_writing_feedback(text, persona, aspect, api_name, api_key),
|
359 |
+
calculate_readability(text),
|
360 |
+
update_feedback_history(text, persona, generate_writing_feedback(text, persona, aspect, api_name, api_key))
|
361 |
+
),
|
362 |
+
inputs=[input_text, persona_dropdown, aspect_dropdown, api_name_input, api_key_input],
|
363 |
+
outputs=[feedback_output, readability_output, feedback_history_display]
|
364 |
+
)
|
365 |
+
|
366 |
+
compare_button.click(
|
367 |
+
fn=compare_feedback,
|
368 |
+
inputs=[input_text, compare_personas, api_name_input, api_key_input],
|
369 |
+
outputs=feedback_output
|
370 |
+
)
|
371 |
+
|
372 |
+
generate_prompt_button.click(
|
373 |
+
fn=generate_writing_prompt,
|
374 |
+
inputs=[persona_dropdown, api_name_input, api_key_input],
|
375 |
+
outputs=input_text
|
376 |
+
)
|
377 |
|
378 |
return input_text, feedback_output, readability_output, feedback_history_display
|
379 |
|