oceansweep commited on
Commit
c5b0bb7
1 Parent(s): 34fa93e

Upload 169 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. App_Function_Libraries/Audio/Audio_Files.py +131 -226
  2. App_Function_Libraries/Audio/Audio_Transcription_Lib.py +1 -1
  3. App_Function_Libraries/Benchmarks_Evaluations/InfiniteBench/InifiniteBench/__pycache__/test_chat_API_Calls.cpython-312-pytest-7.2.1.pyc +0 -0
  4. App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py +1 -1
  5. App_Function_Libraries/Books/Book_Ingestion_Lib.py +226 -88
  6. App_Function_Libraries/Chat/Chat_Functions.py +453 -0
  7. App_Function_Libraries/Chat/__init__.py +0 -0
  8. App_Function_Libraries/Chunk_Lib.py +238 -60
  9. App_Function_Libraries/DB/Character_Chat_DB.py +1059 -701
  10. App_Function_Libraries/DB/DB_Backups.py +160 -0
  11. App_Function_Libraries/DB/DB_Manager.py +159 -54
  12. App_Function_Libraries/DB/Prompts_DB.py +626 -0
  13. App_Function_Libraries/DB/RAG_QA_Chat_DB.py +845 -54
  14. App_Function_Libraries/DB/SQLite_DB.py +139 -583
  15. App_Function_Libraries/Gradio_Related.py +600 -420
  16. App_Function_Libraries/Gradio_UI/Anki_tab.py +921 -0
  17. App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py +135 -43
  18. App_Function_Libraries/Gradio_UI/Backup_Functionality.py +2 -13
  19. App_Function_Libraries/Gradio_UI/Backup_RAG_Notes_Character_Chat_tab.py +195 -0
  20. App_Function_Libraries/Gradio_UI/Book_Ingestion_tab.py +84 -40
  21. App_Function_Libraries/Gradio_UI/Character_Chat_tab.py +281 -46
  22. App_Function_Libraries/Gradio_UI/Character_interaction_tab.py +34 -13
  23. App_Function_Libraries/Gradio_UI/Chat_ui.py +691 -366
  24. App_Function_Libraries/Gradio_UI/Embeddings_tab.py +281 -74
  25. App_Function_Libraries/Gradio_UI/Evaluations_Benchmarks_tab.py +17 -2
  26. App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py +140 -30
  27. App_Function_Libraries/Gradio_UI/Export_Functionality.py +747 -119
  28. App_Function_Libraries/Gradio_UI/Gradio_Shared.py +0 -5
  29. App_Function_Libraries/Gradio_UI/Import_Functionality.py +466 -17
  30. App_Function_Libraries/Gradio_UI/Keywords.py +301 -8
  31. App_Function_Libraries/Gradio_UI/Live_Recording.py +19 -0
  32. App_Function_Libraries/Gradio_UI/Llamafile_tab.py +327 -0
  33. App_Function_Libraries/Gradio_UI/Media_edit.py +218 -20
  34. App_Function_Libraries/Gradio_UI/Media_wiki_tab.py +7 -0
  35. App_Function_Libraries/Gradio_UI/Mind_Map_tab.py +128 -0
  36. App_Function_Libraries/Gradio_UI/PDF_ingestion_tab.py +286 -75
  37. App_Function_Libraries/Gradio_UI/Plaintext_tab_import.py +79 -75
  38. App_Function_Libraries/Gradio_UI/Podcast_tab.py +167 -52
  39. App_Function_Libraries/Gradio_UI/Prompt_Suggestion_tab.py +19 -6
  40. App_Function_Libraries/Gradio_UI/Prompts_tab.py +297 -0
  41. App_Function_Libraries/Gradio_UI/RAG_Chat_tab.py +18 -3
  42. App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py +447 -178
  43. App_Function_Libraries/Gradio_UI/Re_summarize_tab.py +60 -10
  44. App_Function_Libraries/Gradio_UI/Search_Tab.py +6 -6
  45. App_Function_Libraries/Gradio_UI/Semantic_Scholar_tab.py +184 -0
  46. App_Function_Libraries/Gradio_UI/Video_transcription_tab.py +149 -29
  47. App_Function_Libraries/Gradio_UI/View_DB_Items_tab.py +606 -121
  48. App_Function_Libraries/Gradio_UI/Website_scraping_tab.py +754 -554
  49. App_Function_Libraries/Gradio_UI/Workflows_tab.py +190 -0
  50. App_Function_Libraries/Gradio_UI/Writing_tab.py +76 -61
App_Function_Libraries/Audio/Audio_Files.py CHANGED
@@ -117,16 +117,15 @@ def process_audio_files(audio_urls, audio_file, whisper_model, api_name, api_key
117
  progress = []
118
  all_transcriptions = []
119
  all_summaries = []
120
- #v2
 
121
  def format_transcription_with_timestamps(segments):
122
  if keep_timestamps:
123
  formatted_segments = []
124
  for segment in segments:
125
  start = segment.get('Time_Start', 0)
126
  end = segment.get('Time_End', 0)
127
- text = segment.get('Text', '').strip() # Ensure text is stripped of leading/trailing spaces
128
-
129
- # Add the formatted timestamp and text to the list, followed by a newline
130
  formatted_segments.append(f"[{start:.2f}-{end:.2f}] {text}")
131
 
132
  # Join the segments with a newline to ensure proper formatting
@@ -191,205 +190,64 @@ def process_audio_files(audio_urls, audio_file, whisper_model, api_name, api_key
191
  'language': chunk_language
192
  }
193
 
194
- # Process multiple URLs
195
- urls = [url.strip() for url in audio_urls.split('\n') if url.strip()]
196
-
197
- for i, url in enumerate(urls):
198
- update_progress(f"Processing URL {i + 1}/{len(urls)}: {url}")
199
-
200
- # Download and process audio file
201
- audio_file_path = download_audio_file(url, use_cookies, cookies)
202
- if not os.path.exists(audio_file_path):
203
- update_progress(f"Downloaded file not found: {audio_file_path}")
204
- failed_count += 1
205
- log_counter(
206
- metric_name="audio_files_failed_total",
207
- labels={"whisper_model": whisper_model, "api_name": api_name},
208
- value=1
209
- )
210
- continue
211
-
212
- temp_files.append(audio_file_path)
213
- update_progress("Audio file downloaded successfully.")
214
-
215
- # Re-encode MP3 to fix potential issues
216
- reencoded_mp3_path = reencode_mp3(audio_file_path)
217
- if not os.path.exists(reencoded_mp3_path):
218
- update_progress(f"Re-encoded file not found: {reencoded_mp3_path}")
219
- failed_count += 1
220
- log_counter(
221
- metric_name="audio_files_failed_total",
222
- labels={"whisper_model": whisper_model, "api_name": api_name},
223
- value=1
224
- )
225
- continue
226
-
227
- temp_files.append(reencoded_mp3_path)
228
-
229
- # Convert re-encoded MP3 to WAV
230
- wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
231
- if not os.path.exists(wav_file_path):
232
- update_progress(f"Converted WAV file not found: {wav_file_path}")
233
- failed_count += 1
234
- log_counter(
235
- metric_name="audio_files_failed_total",
236
- labels={"whisper_model": whisper_model, "api_name": api_name},
237
- value=1
238
- )
239
- continue
240
-
241
- temp_files.append(wav_file_path)
242
-
243
- # Initialize transcription
244
- transcription = ""
245
-
246
- # Transcribe audio
247
- if diarize:
248
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=True)
249
- else:
250
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model)
251
-
252
- # Handle segments nested under 'segments' key
253
- if isinstance(segments, dict) and 'segments' in segments:
254
- segments = segments['segments']
255
-
256
- if isinstance(segments, list):
257
- # Log first 5 segments for debugging
258
- logging.debug(f"Segments before formatting: {segments[:5]}")
259
- transcription = format_transcription_with_timestamps(segments)
260
- logging.debug(f"Formatted transcription (first 500 chars): {transcription[:500]}")
261
- update_progress("Audio transcribed successfully.")
262
- else:
263
- update_progress("Unexpected segments format received from speech_to_text.")
264
- logging.error(f"Unexpected segments format: {segments}")
265
- failed_count += 1
266
- log_counter(
267
- metric_name="audio_files_failed_total",
268
- labels={"whisper_model": whisper_model, "api_name": api_name},
269
- value=1
270
- )
271
- continue
272
-
273
- if not transcription.strip():
274
- update_progress("Transcription is empty.")
275
- failed_count += 1
276
- log_counter(
277
- metric_name="audio_files_failed_total",
278
- labels={"whisper_model": whisper_model, "api_name": api_name},
279
- value=1
280
- )
281
- else:
282
- # Apply chunking
283
- chunked_text = improved_chunking_process(transcription, chunk_options)
284
-
285
- # Summarize
286
- logging.debug(f"Audio Transcription API Name: {api_name}")
287
- if api_name:
288
- try:
289
- summary = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
290
- update_progress("Audio summarized successfully.")
291
- except Exception as e:
292
- logging.error(f"Error during summarization: {str(e)}")
293
- summary = "Summary generation failed"
294
- failed_count += 1
295
- log_counter(
296
- metric_name="audio_files_failed_total",
297
- labels={"whisper_model": whisper_model, "api_name": api_name},
298
- value=1
299
- )
300
- else:
301
- summary = "No summary available (API not provided)"
302
 
303
- all_transcriptions.append(transcription)
304
- all_summaries.append(summary)
 
 
305
 
306
- # Use custom_title if provided, otherwise use the original filename
307
- title = custom_title if custom_title else os.path.basename(wav_file_path)
308
-
309
- # Add to database
310
- add_media_with_keywords(
311
- url=url,
312
- title=title,
313
- media_type='audio',
314
- content=transcription,
315
- keywords=custom_keywords,
316
- prompt=custom_prompt_input,
317
- summary=summary,
318
- transcription_model=whisper_model,
319
- author="Unknown",
320
- ingestion_date=datetime.now().strftime('%Y-%m-%d')
321
- )
322
- update_progress("Audio file processed and added to database.")
323
- processed_count += 1
324
- log_counter(
325
- metric_name="audio_files_processed_total",
326
- labels={"whisper_model": whisper_model, "api_name": api_name},
327
- value=1
328
- )
329
-
330
- # Process uploaded file if provided
331
- if audio_file:
332
- url = generate_unique_id()
333
- if os.path.getsize(audio_file.name) > MAX_FILE_SIZE:
334
- update_progress(
335
- f"Uploaded file size exceeds the maximum limit of {MAX_FILE_SIZE / (1024 * 1024):.2f}MB. Skipping this file.")
336
- else:
337
- try:
338
- # Re-encode MP3 to fix potential issues
339
- reencoded_mp3_path = reencode_mp3(audio_file.name)
340
- if not os.path.exists(reencoded_mp3_path):
341
- update_progress(f"Re-encoded file not found: {reencoded_mp3_path}")
342
- return update_progress("Processing failed: Re-encoded file not found"), "", ""
343
 
 
 
344
  temp_files.append(reencoded_mp3_path)
345
 
346
- # Convert re-encoded MP3 to WAV
347
  wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
348
- if not os.path.exists(wav_file_path):
349
- update_progress(f"Converted WAV file not found: {wav_file_path}")
350
- return update_progress("Processing failed: Converted WAV file not found"), "", ""
351
-
352
  temp_files.append(wav_file_path)
353
 
354
- # Initialize transcription
355
- transcription = ""
356
-
357
- if diarize:
358
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=True)
359
- else:
360
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model)
361
 
362
- # Handle segments nested under 'segments' key
363
  if isinstance(segments, dict) and 'segments' in segments:
364
  segments = segments['segments']
365
 
366
- if isinstance(segments, list):
367
- transcription = format_transcription_with_timestamps(segments)
368
- else:
369
- update_progress("Unexpected segments format received from speech_to_text.")
370
- logging.error(f"Unexpected segments format: {segments}")
371
 
372
- chunked_text = improved_chunking_process(transcription, chunk_options)
 
 
373
 
374
- logging.debug(f"Audio Transcription API Name: {api_name}")
375
- if api_name:
 
 
 
376
  try:
377
- summary = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
 
 
 
378
  update_progress("Audio summarized successfully.")
379
  except Exception as e:
380
- logging.error(f"Error during summarization: {str(e)}")
381
  summary = "Summary generation failed"
382
- else:
383
- summary = "No summary available (API not provided)"
384
 
 
385
  all_transcriptions.append(transcription)
386
  all_summaries.append(summary)
387
 
388
- # Use custom_title if provided, otherwise use the original filename
389
  title = custom_title if custom_title else os.path.basename(wav_file_path)
390
-
391
  add_media_with_keywords(
392
- url="Uploaded File",
393
  title=title,
394
  media_type='audio',
395
  content=transcription,
@@ -400,65 +258,112 @@ def process_audio_files(audio_urls, audio_file, whisper_model, api_name, api_key
400
  author="Unknown",
401
  ingestion_date=datetime.now().strftime('%Y-%m-%d')
402
  )
403
- update_progress("Uploaded file processed and added to database.")
404
  processed_count += 1
405
- log_counter(
406
- metric_name="audio_files_processed_total",
407
- labels={"whisper_model": whisper_model, "api_name": api_name},
408
- value=1
409
- )
410
  except Exception as e:
411
- update_progress(f"Error processing uploaded file: {str(e)}")
412
- logging.error(f"Error processing uploaded file: {str(e)}")
413
  failed_count += 1
414
- log_counter(
415
- metric_name="audio_files_failed_total",
416
- labels={"whisper_model": whisper_model, "api_name": api_name},
417
- value=1
418
- )
419
- return update_progress("Processing failed: Error processing uploaded file"), "", ""
420
- # Final cleanup
421
- if not keep_original:
422
- cleanup_files()
423
 
424
- end_time = time.time()
425
- processing_time = end_time - start_time
426
- # Log processing time
427
- log_histogram(
428
- metric_name="audio_processing_time_seconds",
429
- value=processing_time,
430
- labels={"whisper_model": whisper_model, "api_name": api_name}
431
- )
432
 
433
- # Optionally, log total counts
434
- log_counter(
435
- metric_name="total_audio_files_processed",
436
- labels={"whisper_model": whisper_model, "api_name": api_name},
437
- value=processed_count
438
- )
439
 
440
- log_counter(
441
- metric_name="total_audio_files_failed",
442
- labels={"whisper_model": whisper_model, "api_name": api_name},
443
- value=failed_count
444
- )
 
 
 
 
 
 
445
 
 
 
 
 
 
 
446
 
447
- final_progress = update_progress("All processing complete.")
448
- final_transcriptions = "\n\n".join(all_transcriptions)
449
- final_summaries = "\n\n".join(all_summaries)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
  return final_progress, final_transcriptions, final_summaries
452
 
453
  except Exception as e:
454
- logging.error(f"Error processing audio files: {str(e)}")
455
- log_counter(
456
- metric_name="audio_files_failed_total",
457
- labels={"whisper_model": whisper_model, "api_name": api_name},
458
- value=1
459
- )
460
- cleanup_files()
461
- return update_progress(f"Processing failed: {str(e)}"), "", ""
462
 
463
 
464
  def format_transcription_with_timestamps(segments, keep_timestamps):
 
117
  progress = []
118
  all_transcriptions = []
119
  all_summaries = []
120
+ temp_files = [] # Keep track of temporary files
121
+
122
  def format_transcription_with_timestamps(segments):
123
  if keep_timestamps:
124
  formatted_segments = []
125
  for segment in segments:
126
  start = segment.get('Time_Start', 0)
127
  end = segment.get('Time_End', 0)
128
+ text = segment.get('Text', '').strip()
 
 
129
  formatted_segments.append(f"[{start:.2f}-{end:.2f}] {text}")
130
 
131
  # Join the segments with a newline to ensure proper formatting
 
190
  'language': chunk_language
191
  }
192
 
193
+ # Process URLs if provided
194
+ if audio_urls:
195
+ urls = [url.strip() for url in audio_urls.split('\n') if url.strip()]
196
+ for i, url in enumerate(urls):
197
+ try:
198
+ update_progress(f"Processing URL {i + 1}/{len(urls)}: {url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
+ # Download and process audio file
201
+ audio_file_path = download_audio_file(url, use_cookies, cookies)
202
+ if not audio_file_path:
203
+ raise FileNotFoundError(f"Failed to download audio from URL: {url}")
204
 
205
+ temp_files.append(audio_file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
+ # Process the audio file
208
+ reencoded_mp3_path = reencode_mp3(audio_file_path)
209
  temp_files.append(reencoded_mp3_path)
210
 
 
211
  wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
 
 
 
 
212
  temp_files.append(wav_file_path)
213
 
214
+ # Transcribe audio
215
+ segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=diarize)
 
 
 
 
 
216
 
217
+ # Handle segments format
218
  if isinstance(segments, dict) and 'segments' in segments:
219
  segments = segments['segments']
220
 
221
+ if not isinstance(segments, list):
222
+ raise ValueError("Unexpected segments format received from speech_to_text")
 
 
 
223
 
224
+ transcription = format_transcription_with_timestamps(segments)
225
+ if not transcription.strip():
226
+ raise ValueError("Empty transcription generated")
227
 
228
+ # Initialize summary with default value
229
+ summary = "No summary available"
230
+
231
+ # Attempt summarization if API is provided
232
+ if api_name and api_name.lower() != "none":
233
  try:
234
+ chunked_text = improved_chunking_process(transcription, chunk_options)
235
+ summary_result = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
236
+ if summary_result:
237
+ summary = summary_result
238
  update_progress("Audio summarized successfully.")
239
  except Exception as e:
240
+ logging.error(f"Summarization failed: {str(e)}")
241
  summary = "Summary generation failed"
 
 
242
 
243
+ # Add to results
244
  all_transcriptions.append(transcription)
245
  all_summaries.append(summary)
246
 
247
+ # Add to database
248
  title = custom_title if custom_title else os.path.basename(wav_file_path)
 
249
  add_media_with_keywords(
250
+ url=url,
251
  title=title,
252
  media_type='audio',
253
  content=transcription,
 
258
  author="Unknown",
259
  ingestion_date=datetime.now().strftime('%Y-%m-%d')
260
  )
261
+
262
  processed_count += 1
263
+ update_progress(f"Successfully processed URL {i + 1}")
264
+ log_counter("audio_files_processed_total", 1, {"whisper_model": whisper_model, "api_name": api_name})
265
+
 
 
266
  except Exception as e:
 
 
267
  failed_count += 1
268
+ update_progress(f"Failed to process URL {i + 1}: {str(e)}")
269
+ log_counter("audio_files_failed_total", 1, {"whisper_model": whisper_model, "api_name": api_name})
270
+ continue
 
 
 
 
 
 
271
 
272
+ # Process uploaded file if provided
273
+ if audio_file:
274
+ try:
275
+ update_progress("Processing uploaded file...")
276
+ if os.path.getsize(audio_file.name) > MAX_FILE_SIZE:
277
+ raise ValueError(f"File size exceeds maximum limit of {MAX_FILE_SIZE / (1024 * 1024):.2f}MB")
 
 
278
 
279
+ reencoded_mp3_path = reencode_mp3(audio_file.name)
280
+ temp_files.append(reencoded_mp3_path)
 
 
 
 
281
 
282
+ wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
283
+ temp_files.append(wav_file_path)
284
+
285
+ # Transcribe audio
286
+ segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=diarize)
287
+
288
+ if isinstance(segments, dict) and 'segments' in segments:
289
+ segments = segments['segments']
290
+
291
+ if not isinstance(segments, list):
292
+ raise ValueError("Unexpected segments format received from speech_to_text")
293
 
294
+ transcription = format_transcription_with_timestamps(segments)
295
+ if not transcription.strip():
296
+ raise ValueError("Empty transcription generated")
297
+
298
+ # Initialize summary with default value
299
+ summary = "No summary available"
300
 
301
+ # Attempt summarization if API is provided
302
+ if api_name and api_name.lower() != "none":
303
+ try:
304
+ chunked_text = improved_chunking_process(transcription, chunk_options)
305
+ summary_result = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
306
+ if summary_result:
307
+ summary = summary_result
308
+ update_progress("Audio summarized successfully.")
309
+ except Exception as e:
310
+ logging.error(f"Summarization failed: {str(e)}")
311
+ summary = "Summary generation failed"
312
+
313
+ # Add to results
314
+ all_transcriptions.append(transcription)
315
+ all_summaries.append(summary)
316
+
317
+ # Add to database
318
+ title = custom_title if custom_title else os.path.basename(wav_file_path)
319
+ add_media_with_keywords(
320
+ url="Uploaded File",
321
+ title=title,
322
+ media_type='audio',
323
+ content=transcription,
324
+ keywords=custom_keywords,
325
+ prompt=custom_prompt_input,
326
+ summary=summary,
327
+ transcription_model=whisper_model,
328
+ author="Unknown",
329
+ ingestion_date=datetime.now().strftime('%Y-%m-%d')
330
+ )
331
+
332
+ processed_count += 1
333
+ update_progress("Successfully processed uploaded file")
334
+ log_counter("audio_files_processed_total", 1, {"whisper_model": whisper_model, "api_name": api_name})
335
+
336
+ except Exception as e:
337
+ failed_count += 1
338
+ update_progress(f"Failed to process uploaded file: {str(e)}")
339
+ log_counter("audio_files_failed_total", 1, {"whisper_model": whisper_model, "api_name": api_name})
340
+
341
+ # Cleanup temporary files
342
+ if not keep_original:
343
+ cleanup_files()
344
+
345
+ # Log processing metrics
346
+ processing_time = time.time() - start_time
347
+ log_histogram("audio_processing_time_seconds", processing_time,
348
+ {"whisper_model": whisper_model, "api_name": api_name})
349
+ log_counter("total_audio_files_processed", processed_count,
350
+ {"whisper_model": whisper_model, "api_name": api_name})
351
+ log_counter("total_audio_files_failed", failed_count,
352
+ {"whisper_model": whisper_model, "api_name": api_name})
353
+
354
+ # Prepare final output
355
+ final_progress = update_progress(f"Processing complete. Processed: {processed_count}, Failed: {failed_count}")
356
+ final_transcriptions = "\n\n".join(all_transcriptions) if all_transcriptions else "No transcriptions available"
357
+ final_summaries = "\n\n".join(all_summaries) if all_summaries else "No summaries available"
358
 
359
  return final_progress, final_transcriptions, final_summaries
360
 
361
  except Exception as e:
362
+ logging.error(f"Error in process_audio_files: {str(e)}")
363
+ log_counter("audio_files_failed_total", 1, {"whisper_model": whisper_model, "api_name": api_name})
364
+ if not keep_original:
365
+ cleanup_files()
366
+ return update_progress(f"Processing failed: {str(e)}"), "No transcriptions available", "No summaries available"
 
 
 
367
 
368
 
369
  def format_transcription_with_timestamps(segments, keep_timestamps):
App_Function_Libraries/Audio/Audio_Transcription_Lib.py CHANGED
@@ -332,4 +332,4 @@ def save_audio_temp(audio_data, sample_rate=16000):
332
 
333
  #
334
  #
335
- #######################################################################################################################
 
332
 
333
  #
334
  #
335
+ #######################################################################################################################
App_Function_Libraries/Benchmarks_Evaluations/InfiniteBench/InifiniteBench/__pycache__/test_chat_API_Calls.cpython-312-pytest-7.2.1.pyc ADDED
Binary file (7.15 kB). View file
 
App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py CHANGED
@@ -24,7 +24,7 @@ from tenacity import (
24
  wait_random_exponential,
25
  )
26
 
27
- from App_Function_Libraries.Chat import chat_api_call
28
 
29
  #
30
  #######################################################################################################################
 
24
  wait_random_exponential,
25
  )
26
 
27
+ from App_Function_Libraries.Chat.Chat_Functions import chat_api_call
28
 
29
  #
30
  #######################################################################################################################
App_Function_Libraries/Books/Book_Ingestion_Lib.py CHANGED
@@ -18,6 +18,9 @@ import tempfile
18
  import zipfile
19
  from datetime import datetime
20
  import logging
 
 
 
21
  #
22
  # External Imports
23
  import ebooklib
@@ -241,109 +244,244 @@ def process_zip_file(zip_file,
241
  return "\n".join(results)
242
 
243
 
244
- def import_file_handler(file,
245
- title,
246
- author,
247
- keywords,
248
- system_prompt,
249
- custom_prompt,
250
- auto_summarize,
251
- api_name,
252
- api_key,
253
- max_chunk_size,
254
- chunk_overlap,
255
- custom_chapter_pattern
256
- ):
257
  try:
258
- log_counter("file_import_attempt", labels={"file_name": file.name})
259
-
260
- # Handle max_chunk_size
261
- if isinstance(max_chunk_size, str):
262
- max_chunk_size = int(max_chunk_size) if max_chunk_size.strip() else 4000
263
- elif not isinstance(max_chunk_size, int):
264
- max_chunk_size = 4000 # Default value if not a string or int
265
-
266
- # Handle chunk_overlap
267
- if isinstance(chunk_overlap, str):
268
- chunk_overlap = int(chunk_overlap) if chunk_overlap.strip() else 0
269
- elif not isinstance(chunk_overlap, int):
270
- chunk_overlap = 0 # Default value if not a string or int
271
-
272
- chunk_options = {
273
- 'method': 'chapter',
274
- 'max_size': max_chunk_size,
275
- 'overlap': chunk_overlap,
276
- 'custom_chapter_pattern': custom_chapter_pattern if custom_chapter_pattern else None
277
- }
278
 
279
- if file is None:
280
- log_counter("file_import_error", labels={"error": "No file uploaded"})
281
- return "No file uploaded."
282
 
283
- file_path = file.name
284
- if not os.path.exists(file_path):
285
- log_counter("file_import_error", labels={"error": "File not found", "file_name": file.name})
286
- return "Uploaded file not found."
287
 
288
- start_time = datetime.now()
 
 
 
 
289
 
290
- if file_path.lower().endswith('.epub'):
291
- status = import_epub(
292
- file_path,
293
- title,
294
- author,
295
- keywords,
296
- custom_prompt=custom_prompt,
297
- system_prompt=system_prompt,
298
- summary=None,
299
- auto_summarize=auto_summarize,
300
- api_name=api_name,
301
- api_key=api_key,
302
- chunk_options=chunk_options,
303
- custom_chapter_pattern=custom_chapter_pattern
304
- )
305
- log_counter("epub_import_success", labels={"file_name": file.name})
306
- result = f"📚 EPUB Imported Successfully:\n{status}"
307
- elif file.name.lower().endswith('.zip'):
308
- status = process_zip_file(
309
- zip_file=file,
310
- title=title,
311
- author=author,
312
- keywords=keywords,
313
- custom_prompt=custom_prompt,
314
- system_prompt=system_prompt,
315
- summary=None,
316
- auto_summarize=auto_summarize,
317
- api_name=api_name,
318
- api_key=api_key,
319
- chunk_options=chunk_options
320
- )
321
- log_counter("zip_import_success", labels={"file_name": file.name})
322
- result = f"📦 ZIP Processed Successfully:\n{status}"
323
- elif file.name.lower().endswith(('.chm', '.html', '.pdf', '.xml', '.opml')):
324
- file_type = file.name.split('.')[-1].upper()
325
- log_counter("unsupported_file_type", labels={"file_type": file_type})
326
- result = f"{file_type} file import is not yet supported."
327
- else:
328
- log_counter("unsupported_file_type", labels={"file_type": file.name.split('.')[-1]})
329
- result = "❌ Unsupported file type. Please upload an `.epub` file or a `.zip` file containing `.epub` files."
330
 
331
- end_time = datetime.now()
332
- processing_time = (end_time - start_time).total_seconds()
333
- log_histogram("file_import_duration", processing_time, labels={"file_name": file.name})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  return result
336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  except ValueError as ve:
338
  logging.exception(f"Error parsing input values: {str(ve)}")
339
- log_counter("file_import_error", labels={"error": "Invalid input", "file_name": file.name})
340
  return f"❌ Error: Invalid input for chunk size or overlap. Please enter valid numbers."
341
  except Exception as e:
342
  logging.exception(f"Error during file import: {str(e)}")
343
- log_counter("file_import_error", labels={"error": str(e), "file_name": file.name})
344
  return f"❌ Error during import: {str(e)}"
345
 
346
 
 
347
  def read_epub(file_path):
348
  """
349
  Reads and extracts text from an EPUB file.
@@ -424,9 +562,9 @@ def ingest_text_file(file_path, title=None, author=None, keywords=None):
424
 
425
  # Add the text file to the database
426
  add_media_with_keywords(
427
- url=file_path,
428
  title=title,
429
- media_type='document',
430
  content=content,
431
  keywords=keywords,
432
  prompt='No prompt for text files',
 
18
  import zipfile
19
  from datetime import datetime
20
  import logging
21
+ import xml.etree.ElementTree as ET
22
+ import html2text
23
+ import csv
24
  #
25
  # External Imports
26
  import ebooklib
 
244
  return "\n".join(results)
245
 
246
 
247
+ def import_html(file_path, title=None, author=None, keywords=None, **kwargs):
248
+ """
249
+ Imports an HTML file and converts it to markdown format.
250
+ """
 
 
 
 
 
 
 
 
 
251
  try:
252
+ logging.info(f"Importing HTML file from {file_path}")
253
+ h = html2text.HTML2Text()
254
+ h.ignore_links = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
+ with open(file_path, 'r', encoding='utf-8') as file:
257
+ html_content = file.read()
 
258
 
259
+ markdown_content = h.handle(html_content)
 
 
 
260
 
261
+ # Extract title from HTML if not provided
262
+ if not title:
263
+ soup = BeautifulSoup(html_content, 'html.parser')
264
+ title_tag = soup.find('title')
265
+ title = title_tag.string if title_tag else os.path.basename(file_path)
266
 
267
+ return process_markdown_content(markdown_content, file_path, title, author, keywords, **kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
+ except Exception as e:
270
+ logging.exception(f"Error importing HTML file: {str(e)}")
271
+ raise
272
+
273
+
274
+ def import_xml(file_path, title=None, author=None, keywords=None, **kwargs):
275
+ """
276
+ Imports an XML file and converts it to markdown format.
277
+ """
278
+ try:
279
+ logging.info(f"Importing XML file from {file_path}")
280
+ tree = ET.parse(file_path)
281
+ root = tree.getroot()
282
+
283
+ # Convert XML to markdown
284
+ markdown_content = xml_to_markdown(root)
285
+
286
+ return process_markdown_content(markdown_content, file_path, title, author, keywords, **kwargs)
287
+
288
+ except Exception as e:
289
+ logging.exception(f"Error importing XML file: {str(e)}")
290
+ raise
291
+
292
+
293
+ def import_opml(file_path, title=None, author=None, keywords=None, **kwargs):
294
+ """
295
+ Imports an OPML file and converts it to markdown format.
296
+ """
297
+ try:
298
+ logging.info(f"Importing OPML file from {file_path}")
299
+ tree = ET.parse(file_path)
300
+ root = tree.getroot()
301
+
302
+ # Extract title from OPML if not provided
303
+ if not title:
304
+ title_elem = root.find(".//title")
305
+ title = title_elem.text if title_elem is not None else os.path.basename(file_path)
306
+
307
+ # Convert OPML to markdown
308
+ markdown_content = opml_to_markdown(root)
309
+
310
+ return process_markdown_content(markdown_content, file_path, title, author, keywords, **kwargs)
311
+
312
+ except Exception as e:
313
+ logging.exception(f"Error importing OPML file: {str(e)}")
314
+ raise
315
+
316
+
317
+ def xml_to_markdown(element, level=0):
318
+ """
319
+ Recursively converts XML elements to markdown format.
320
+ """
321
+ markdown = ""
322
+
323
+ # Add element name as heading
324
+ if level > 0:
325
+ markdown += f"{'#' * min(level, 6)} {element.tag}\n\n"
326
+
327
+ # Add element text if it exists
328
+ if element.text and element.text.strip():
329
+ markdown += f"{element.text.strip()}\n\n"
330
+
331
+ # Process child elements
332
+ for child in element:
333
+ markdown += xml_to_markdown(child, level + 1)
334
+
335
+ return markdown
336
 
337
+
338
+ def opml_to_markdown(root):
339
+ """
340
+ Converts OPML structure to markdown format.
341
+ """
342
+ markdown = "# Table of Contents\n\n"
343
+
344
+ def process_outline(outline, level=0):
345
+ result = ""
346
+ for item in outline.findall("outline"):
347
+ text = item.get("text", "")
348
+ result += f"{' ' * level}- {text}\n"
349
+ result += process_outline(item, level + 1)
350
  return result
351
 
352
+ body = root.find(".//body")
353
+ if body is not None:
354
+ markdown += process_outline(body)
355
+
356
+ return markdown
357
+
358
+
359
+ def process_markdown_content(markdown_content, file_path, title, author, keywords, **kwargs):
360
+ """
361
+ Processes markdown content and adds it to the database.
362
+ """
363
+ info_dict = {
364
+ 'title': title or os.path.basename(file_path),
365
+ 'uploader': author or "Unknown",
366
+ 'ingestion_date': datetime.now().strftime('%Y-%m-%d')
367
+ }
368
+
369
+ # Create segments (you may want to adjust the chunking method)
370
+ segments = [{'Text': markdown_content}]
371
+
372
+ # Add to database
373
+ result = add_media_to_database(
374
+ url=file_path,
375
+ info_dict=info_dict,
376
+ segments=segments,
377
+ summary=kwargs.get('summary', "No summary provided"),
378
+ keywords=keywords.split(',') if keywords else [],
379
+ custom_prompt_input=kwargs.get('custom_prompt'),
380
+ whisper_model="Imported",
381
+ media_type="document",
382
+ overwrite=False
383
+ )
384
+
385
+ return f"Document '{title}' imported successfully. Database result: {result}"
386
+
387
+
388
+ def import_file_handler(files,
389
+ author,
390
+ keywords,
391
+ system_prompt,
392
+ custom_prompt,
393
+ auto_summarize,
394
+ api_name,
395
+ api_key,
396
+ max_chunk_size,
397
+ chunk_overlap,
398
+ custom_chapter_pattern):
399
+ try:
400
+ if not files:
401
+ return "No files uploaded."
402
+
403
+ # Convert single file to list for consistent processing
404
+ if not isinstance(files, list):
405
+ files = [files]
406
+
407
+ results = []
408
+ for file in files:
409
+ log_counter("file_import_attempt", labels={"file_name": file.name})
410
+
411
+ # Handle max_chunk_size and chunk_overlap
412
+ chunk_size = int(max_chunk_size) if isinstance(max_chunk_size, (str, int)) else 4000
413
+ overlap = int(chunk_overlap) if isinstance(chunk_overlap, (str, int)) else 0
414
+
415
+ chunk_options = {
416
+ 'method': 'chapter',
417
+ 'max_size': chunk_size,
418
+ 'overlap': overlap,
419
+ 'custom_chapter_pattern': custom_chapter_pattern if custom_chapter_pattern else None
420
+ }
421
+
422
+ file_path = file.name
423
+ if not os.path.exists(file_path):
424
+ results.append(f"❌ File not found: {file.name}")
425
+ continue
426
+
427
+ start_time = datetime.now()
428
+
429
+ # Extract title from filename
430
+ title = os.path.splitext(os.path.basename(file_path))[0]
431
+
432
+ if file_path.lower().endswith('.epub'):
433
+ status = import_epub(
434
+ file_path,
435
+ title=title, # Use filename as title
436
+ author=author,
437
+ keywords=keywords,
438
+ custom_prompt=custom_prompt,
439
+ system_prompt=system_prompt,
440
+ summary=None,
441
+ auto_summarize=auto_summarize,
442
+ api_name=api_name,
443
+ api_key=api_key,
444
+ chunk_options=chunk_options,
445
+ custom_chapter_pattern=custom_chapter_pattern
446
+ )
447
+ log_counter("epub_import_success", labels={"file_name": file.name})
448
+ results.append(f"📚 {file.name}: {status}")
449
+
450
+ elif file_path.lower().endswith('.zip'):
451
+ status = process_zip_file(
452
+ zip_file=file,
453
+ title=None, # Let each file use its own name
454
+ author=author,
455
+ keywords=keywords,
456
+ custom_prompt=custom_prompt,
457
+ system_prompt=system_prompt,
458
+ summary=None,
459
+ auto_summarize=auto_summarize,
460
+ api_name=api_name,
461
+ api_key=api_key,
462
+ chunk_options=chunk_options
463
+ )
464
+ log_counter("zip_import_success", labels={"file_name": file.name})
465
+ results.append(f"📦 {file.name}: {status}")
466
+ else:
467
+ results.append(f"❌ Unsupported file type: {file.name}")
468
+ continue
469
+
470
+ end_time = datetime.now()
471
+ processing_time = (end_time - start_time).total_seconds()
472
+ log_histogram("file_import_duration", processing_time, labels={"file_name": file.name})
473
+
474
+ return "\n\n".join(results)
475
+
476
  except ValueError as ve:
477
  logging.exception(f"Error parsing input values: {str(ve)}")
 
478
  return f"❌ Error: Invalid input for chunk size or overlap. Please enter valid numbers."
479
  except Exception as e:
480
  logging.exception(f"Error during file import: {str(e)}")
 
481
  return f"❌ Error during import: {str(e)}"
482
 
483
 
484
+
485
  def read_epub(file_path):
486
  """
487
  Reads and extracts text from an EPUB file.
 
562
 
563
  # Add the text file to the database
564
  add_media_with_keywords(
565
+ url="its_a_book",
566
  title=title,
567
+ media_type='book',
568
  content=content,
569
  keywords=keywords,
570
  prompt='No prompt for text files',
App_Function_Libraries/Chat/Chat_Functions.py ADDED
@@ -0,0 +1,453 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Chat_Functions.py
2
+ # Chat functions for interacting with the LLMs as chatbots
3
+ import base64
4
+ # Imports
5
+ import json
6
+ import logging
7
+ import os
8
+ import re
9
+ import sqlite3
10
+ import tempfile
11
+ import time
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ #
15
+ # External Imports
16
+ #
17
+ # Local Imports
18
+ from App_Function_Libraries.DB.DB_Manager import start_new_conversation, delete_messages_in_conversation, save_message
19
+ from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_db_connection, get_conversation_name
20
+ from App_Function_Libraries.LLM_API_Calls import chat_with_openai, chat_with_anthropic, chat_with_cohere, \
21
+ chat_with_groq, chat_with_openrouter, chat_with_deepseek, chat_with_mistral, chat_with_huggingface
22
+ from App_Function_Libraries.LLM_API_Calls_Local import chat_with_aphrodite, chat_with_local_llm, chat_with_ollama, \
23
+ chat_with_kobold, chat_with_llama, chat_with_oobabooga, chat_with_tabbyapi, chat_with_vllm, chat_with_custom_openai
24
+ from App_Function_Libraries.DB.SQLite_DB import load_media_content
25
+ from App_Function_Libraries.Utils.Utils import generate_unique_filename, load_and_log_configs
26
+ from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
27
+ #
28
+ ####################################################################################################
29
+ #
30
+ # Functions:
31
+
32
+ def approximate_token_count(history):
33
+ total_text = ''
34
+ for user_msg, bot_msg in history:
35
+ if user_msg:
36
+ total_text += user_msg + ' '
37
+ if bot_msg:
38
+ total_text += bot_msg + ' '
39
+ total_tokens = len(total_text.split())
40
+ return total_tokens
41
+
42
+ def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message=None):
43
+ log_counter("chat_api_call_attempt", labels={"api_endpoint": api_endpoint})
44
+ start_time = time.time()
45
+ if not api_key:
46
+ api_key = None
47
+ model = None
48
+ try:
49
+ logging.info(f"Debug - Chat API Call - API Endpoint: {api_endpoint}")
50
+ logging.info(f"Debug - Chat API Call - API Key: {api_key}")
51
+ logging.info(f"Debug - Chat chat_api_call - API Endpoint: {api_endpoint}")
52
+ if api_endpoint.lower() == 'openai':
53
+ response = chat_with_openai(api_key, input_data, prompt, temp, system_message)
54
+
55
+ elif api_endpoint.lower() == 'anthropic':
56
+ # Retrieve the model from config
57
+ loaded_config_data = load_and_log_configs()
58
+ model = loaded_config_data['models']['anthropic'] if loaded_config_data else None
59
+ response = chat_with_anthropic(
60
+ api_key=api_key,
61
+ input_data=input_data,
62
+ model=model,
63
+ custom_prompt_arg=prompt,
64
+ system_prompt=system_message
65
+ )
66
+
67
+ elif api_endpoint.lower() == "cohere":
68
+ response = chat_with_cohere(
69
+ api_key,
70
+ input_data,
71
+ model=model,
72
+ custom_prompt_arg=prompt,
73
+ system_prompt=system_message,
74
+ temp=temp
75
+ )
76
+
77
+ elif api_endpoint.lower() == "groq":
78
+ response = chat_with_groq(api_key, input_data, prompt, temp, system_message)
79
+
80
+ elif api_endpoint.lower() == "openrouter":
81
+ response = chat_with_openrouter(api_key, input_data, prompt, temp, system_message)
82
+
83
+ elif api_endpoint.lower() == "deepseek":
84
+ response = chat_with_deepseek(api_key, input_data, prompt, temp, system_message)
85
+
86
+ elif api_endpoint.lower() == "mistral":
87
+ response = chat_with_mistral(api_key, input_data, prompt, temp, system_message)
88
+
89
+ elif api_endpoint.lower() == "llama.cpp":
90
+ response = chat_with_llama(input_data, prompt, temp, None, api_key, system_message)
91
+ elif api_endpoint.lower() == "kobold":
92
+ response = chat_with_kobold(input_data, api_key, prompt, temp, system_message)
93
+
94
+ elif api_endpoint.lower() == "ooba":
95
+ response = chat_with_oobabooga(input_data, api_key, prompt, temp, system_message)
96
+
97
+ elif api_endpoint.lower() == "tabbyapi":
98
+ response = chat_with_tabbyapi(input_data, prompt, temp, system_message)
99
+
100
+ elif api_endpoint.lower() == "vllm":
101
+ response = chat_with_vllm(input_data, prompt, system_message)
102
+
103
+ elif api_endpoint.lower() == "local-llm":
104
+ response = chat_with_local_llm(input_data, prompt, temp, system_message)
105
+
106
+ elif api_endpoint.lower() == "huggingface":
107
+ response = chat_with_huggingface(api_key, input_data, prompt, temp) # , system_message)
108
+
109
+ elif api_endpoint.lower() == "ollama":
110
+ response = chat_with_ollama(input_data, prompt, None, api_key, temp, system_message)
111
+
112
+ elif api_endpoint.lower() == "aphrodite":
113
+ response = chat_with_aphrodite(input_data, prompt, temp, system_message)
114
+
115
+ elif api_endpoint.lower() == "custom-openai-api":
116
+ response = chat_with_custom_openai(api_key, input_data, prompt, temp, system_message)
117
+
118
+ else:
119
+ raise ValueError(f"Unsupported API endpoint: {api_endpoint}")
120
+
121
+ call_duration = time.time() - start_time
122
+ log_histogram("chat_api_call_duration", call_duration, labels={"api_endpoint": api_endpoint})
123
+ log_counter("chat_api_call_success", labels={"api_endpoint": api_endpoint})
124
+ return response
125
+
126
+ except Exception as e:
127
+ log_counter("chat_api_call_error", labels={"api_endpoint": api_endpoint, "error": str(e)})
128
+ logging.error(f"Error in chat function: {str(e)}")
129
+ return f"An error occurred: {str(e)}"
130
+
131
+
132
+ def chat(message, history, media_content, selected_parts, api_endpoint, api_key, prompt, temperature,
133
+ system_message=None):
134
+ log_counter("chat_attempt", labels={"api_endpoint": api_endpoint})
135
+ start_time = time.time()
136
+ try:
137
+ logging.info(f"Debug - Chat Function - Message: {message}")
138
+ logging.info(f"Debug - Chat Function - Media Content: {media_content}")
139
+ logging.info(f"Debug - Chat Function - Selected Parts: {selected_parts}")
140
+ logging.info(f"Debug - Chat Function - API Endpoint: {api_endpoint}")
141
+ # logging.info(f"Debug - Chat Function - Prompt: {prompt}")
142
+
143
+ # Ensure selected_parts is a list
144
+ if not isinstance(selected_parts, (list, tuple)):
145
+ selected_parts = [selected_parts] if selected_parts else []
146
+
147
+ # logging.debug(f"Debug - Chat Function - Selected Parts (after check): {selected_parts}")
148
+
149
+ # Combine the selected parts of the media content
150
+ combined_content = "\n\n".join(
151
+ [f"{part.capitalize()}: {media_content.get(part, '')}" for part in selected_parts if part in media_content])
152
+ # Print first 500 chars
153
+ # logging.debug(f"Debug - Chat Function - Combined Content: {combined_content[:500]}...")
154
+
155
+ # Prepare the input for the API
156
+ input_data = f"{combined_content}\n\n" if combined_content else ""
157
+ for old_message, old_response in history:
158
+ input_data += f"{old_message}\nAssistant: {old_response}\n\n"
159
+ input_data += f"{message}\n"
160
+
161
+ if system_message:
162
+ print(f"System message: {system_message}")
163
+ logging.debug(f"Debug - Chat Function - System Message: {system_message}")
164
+ temperature = float(temperature) if temperature else 0.7
165
+ temp = temperature
166
+
167
+ logging.debug(f"Debug - Chat Function - Temperature: {temperature}")
168
+ logging.debug(f"Debug - Chat Function - API Key: {api_key[:10]}")
169
+ logging.debug(f"Debug - Chat Function - Prompt: {prompt}")
170
+
171
+ # Use the existing API request code based on the selected endpoint
172
+ response = chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message)
173
+
174
+ chat_duration = time.time() - start_time
175
+ log_histogram("chat_duration", chat_duration, labels={"api_endpoint": api_endpoint})
176
+ log_counter("chat_success", labels={"api_endpoint": api_endpoint})
177
+ return response
178
+ except Exception as e:
179
+ log_counter("chat_error", labels={"api_endpoint": api_endpoint, "error": str(e)})
180
+ logging.error(f"Error in chat function: {str(e)}")
181
+ return f"An error occurred: {str(e)}"
182
+
183
+
184
+ def save_chat_history_to_db_wrapper(chatbot, conversation_id, media_content, media_name=None):
185
+ log_counter("save_chat_history_to_db_attempt")
186
+ start_time = time.time()
187
+ logging.info(f"Attempting to save chat history. Media content type: {type(media_content)}")
188
+
189
+ try:
190
+ # First check if we can access the database
191
+ try:
192
+ with get_db_connection() as conn:
193
+ cursor = conn.cursor()
194
+ cursor.execute("SELECT 1")
195
+ except sqlite3.DatabaseError as db_error:
196
+ logging.error(f"Database is corrupted or inaccessible: {str(db_error)}")
197
+ return conversation_id, "Database error: The database file appears to be corrupted. Please contact support."
198
+
199
+ # Now attempt the save
200
+ if not conversation_id:
201
+ # Only for new conversations, not updates
202
+ media_id = None
203
+ if isinstance(media_content, dict) and 'content' in media_content:
204
+ try:
205
+ content = media_content['content']
206
+ content_json = content if isinstance(content, dict) else json.loads(content)
207
+ media_id = content_json.get('webpage_url')
208
+ media_name = media_name or content_json.get('title', 'Unnamed Media')
209
+ except (json.JSONDecodeError, AttributeError) as e:
210
+ logging.error(f"Error processing media content: {str(e)}")
211
+ media_id = "unknown_media"
212
+ media_name = media_name or "Unnamed Media"
213
+ else:
214
+ media_id = "unknown_media"
215
+ media_name = media_name or "Unnamed Media"
216
+
217
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
218
+ conversation_title = f"{media_name}_{timestamp}"
219
+ conversation_id = start_new_conversation(title=conversation_title, media_id=media_id)
220
+ logging.info(f"Created new conversation with ID: {conversation_id}")
221
+
222
+ # For both new and existing conversations
223
+ try:
224
+ delete_messages_in_conversation(conversation_id)
225
+ for user_msg, assistant_msg in chatbot:
226
+ if user_msg:
227
+ save_message(conversation_id, "user", user_msg)
228
+ if assistant_msg:
229
+ save_message(conversation_id, "assistant", assistant_msg)
230
+ except sqlite3.DatabaseError as db_error:
231
+ logging.error(f"Database error during message save: {str(db_error)}")
232
+ return conversation_id, "Database error: Unable to save messages. Please try again or contact support."
233
+
234
+ save_duration = time.time() - start_time
235
+ log_histogram("save_chat_history_to_db_duration", save_duration)
236
+ log_counter("save_chat_history_to_db_success")
237
+
238
+ return conversation_id, "Chat history saved successfully!"
239
+
240
+ except Exception as e:
241
+ log_counter("save_chat_history_to_db_error", labels={"error": str(e)})
242
+ error_message = f"Failed to save chat history: {str(e)}"
243
+ logging.error(error_message, exc_info=True)
244
+ return conversation_id, error_message
245
+
246
+
247
+ def save_chat_history(history, conversation_id, media_content):
248
+ log_counter("save_chat_history_attempt")
249
+ start_time = time.time()
250
+ try:
251
+ content, conversation_name = generate_chat_history_content(history, conversation_id, media_content)
252
+
253
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
254
+ safe_conversation_name = re.sub(r'[^a-zA-Z0-9_-]', '_', conversation_name)
255
+ base_filename = f"{safe_conversation_name}_{timestamp}.json"
256
+
257
+ # Create a temporary file
258
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file:
259
+ temp_file.write(content)
260
+ temp_file_path = temp_file.name
261
+
262
+ # Generate a unique filename
263
+ unique_filename = generate_unique_filename(os.path.dirname(temp_file_path), base_filename)
264
+ final_path = os.path.join(os.path.dirname(temp_file_path), unique_filename)
265
+
266
+ # Rename the temporary file to the unique filename
267
+ os.rename(temp_file_path, final_path)
268
+
269
+ save_duration = time.time() - start_time
270
+ log_histogram("save_chat_history_duration", save_duration)
271
+ log_counter("save_chat_history_success")
272
+ return final_path
273
+ except Exception as e:
274
+ log_counter("save_chat_history_error", labels={"error": str(e)})
275
+ logging.error(f"Error saving chat history: {str(e)}")
276
+ return None
277
+
278
+
279
+ def generate_chat_history_content(history, conversation_id, media_content):
280
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
281
+
282
+ conversation_name = get_conversation_name(conversation_id)
283
+
284
+ if not conversation_name:
285
+ media_name = extract_media_name(media_content)
286
+ if media_name:
287
+ conversation_name = f"{media_name}-chat"
288
+ else:
289
+ conversation_name = f"chat-{timestamp}" # Fallback name
290
+
291
+ chat_data = {
292
+ "conversation_id": conversation_id,
293
+ "conversation_name": conversation_name,
294
+ "timestamp": timestamp,
295
+ "history": [
296
+ {
297
+ "role": "user" if i % 2 == 0 else "bot",
298
+ "content": msg[0] if isinstance(msg, tuple) else msg
299
+ }
300
+ for i, msg in enumerate(history)
301
+ ]
302
+ }
303
+
304
+ return json.dumps(chat_data, indent=2), conversation_name
305
+
306
+
307
+ def extract_media_name(media_content):
308
+ if isinstance(media_content, dict):
309
+ content = media_content.get('content', {})
310
+ if isinstance(content, str):
311
+ try:
312
+ content = json.loads(content)
313
+ except json.JSONDecodeError:
314
+ logging.warning("Failed to parse media_content JSON string")
315
+ return None
316
+
317
+ # Try to extract title from the content
318
+ if isinstance(content, dict):
319
+ return content.get('title') or content.get('name')
320
+
321
+ logging.warning(f"Unexpected media_content format: {type(media_content)}")
322
+ return None
323
+
324
+
325
+ def update_chat_content(selected_item, use_content, use_summary, use_prompt, item_mapping):
326
+ log_counter("update_chat_content_attempt")
327
+ start_time = time.time()
328
+ logging.debug(f"Debug - Update Chat Content - Selected Item: {selected_item}\n")
329
+ logging.debug(f"Debug - Update Chat Content - Use Content: {use_content}\n\n\n\n")
330
+ logging.debug(f"Debug - Update Chat Content - Use Summary: {use_summary}\n\n")
331
+ logging.debug(f"Debug - Update Chat Content - Use Prompt: {use_prompt}\n\n")
332
+ logging.debug(f"Debug - Update Chat Content - Item Mapping: {item_mapping}\n\n")
333
+
334
+ if selected_item and selected_item in item_mapping:
335
+ media_id = item_mapping[selected_item]
336
+ content = load_media_content(media_id)
337
+ selected_parts = []
338
+ if use_content and "content" in content:
339
+ selected_parts.append("content")
340
+ if use_summary and "summary" in content:
341
+ selected_parts.append("summary")
342
+ if use_prompt and "prompt" in content:
343
+ selected_parts.append("prompt")
344
+
345
+ # Modified debug print
346
+ if isinstance(content, dict):
347
+ print(f"Debug - Update Chat Content - Content keys: {list(content.keys())}")
348
+ for key, value in content.items():
349
+ print(f"Debug - Update Chat Content - {key} (first 500 char): {str(value)[:500]}\n\n\n\n")
350
+ else:
351
+ print(f"Debug - Update Chat Content - Content(first 500 char): {str(content)[:500]}\n\n\n\n")
352
+
353
+ print(f"Debug - Update Chat Content - Selected Parts: {selected_parts}")
354
+ update_duration = time.time() - start_time
355
+ log_histogram("update_chat_content_duration", update_duration)
356
+ log_counter("update_chat_content_success")
357
+ return content, selected_parts
358
+ else:
359
+ log_counter("update_chat_content_error", labels={"error": str("No item selected or item not in mapping")})
360
+ print(f"Debug - Update Chat Content - No item selected or item not in mapping")
361
+ return {}, []
362
+
363
+ #
364
+ # End of Chat functions
365
+ #######################################################################################################################
366
+
367
+
368
+ #######################################################################################################################
369
+ #
370
+ # Character Card Functions
371
+
372
+ CHARACTERS_FILE = Path('.', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
373
+
374
+
375
+ def save_character(character_data):
376
+ log_counter("save_character_attempt")
377
+ start_time = time.time()
378
+ characters_file = os.path.join(os.path.dirname(__file__), '..', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
379
+ characters_dir = os.path.dirname(characters_file)
380
+
381
+ try:
382
+ if os.path.exists(characters_file):
383
+ with open(characters_file, 'r') as f:
384
+ characters = json.load(f)
385
+ else:
386
+ characters = {}
387
+
388
+ char_name = character_data['name']
389
+
390
+ # Save the image separately if it exists
391
+ if 'image' in character_data:
392
+ img_data = base64.b64decode(character_data['image'])
393
+ img_filename = f"{char_name.replace(' ', '_')}.png"
394
+ img_path = os.path.join(characters_dir, img_filename)
395
+ with open(img_path, 'wb') as f:
396
+ f.write(img_data)
397
+ character_data['image_path'] = os.path.abspath(img_path)
398
+ del character_data['image'] # Remove the base64 image data from the JSON
399
+
400
+ characters[char_name] = character_data
401
+
402
+ with open(characters_file, 'w') as f:
403
+ json.dump(characters, f, indent=2)
404
+
405
+ save_duration = time.time() - start_time
406
+ log_histogram("save_character_duration", save_duration)
407
+ log_counter("save_character_success")
408
+ logging.info(f"Character '{char_name}' saved successfully.")
409
+ except Exception as e:
410
+ log_counter("save_character_error", labels={"error": str(e)})
411
+ logging.error(f"Error saving character: {str(e)}")
412
+
413
+
414
+ def load_characters():
415
+ log_counter("load_characters_attempt")
416
+ start_time = time.time()
417
+ try:
418
+ characters_file = os.path.join(os.path.dirname(__file__), '..', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
419
+ if os.path.exists(characters_file):
420
+ with open(characters_file, 'r') as f:
421
+ characters = json.load(f)
422
+ logging.debug(f"Loaded {len(characters)} characters from {characters_file}")
423
+ load_duration = time.time() - start_time
424
+ log_histogram("load_characters_duration", load_duration)
425
+ log_counter("load_characters_success", labels={"character_count": len(characters)})
426
+ return characters
427
+ else:
428
+ logging.warning(f"Characters file not found: {characters_file}")
429
+ return {}
430
+ except Exception as e:
431
+ log_counter("load_characters_error", labels={"error": str(e)})
432
+ return {}
433
+
434
+
435
+
436
+ def get_character_names():
437
+ log_counter("get_character_names_attempt")
438
+ start_time = time.time()
439
+ try:
440
+ characters = load_characters()
441
+ names = list(characters.keys())
442
+ get_names_duration = time.time() - start_time
443
+ log_histogram("get_character_names_duration", get_names_duration)
444
+ log_counter("get_character_names_success", labels={"name_count": len(names)})
445
+ return names
446
+ except Exception as e:
447
+ log_counter("get_character_names_error", labels={"error": str(e)})
448
+ logging.error(f"Error getting character names: {str(e)}")
449
+ return []
450
+
451
+ #
452
+ # End of Chat.py
453
+ ##########################################################################################################################
App_Function_Libraries/Chat/__init__.py ADDED
File without changes
App_Function_Libraries/Chunk_Lib.py CHANGED
@@ -11,6 +11,7 @@ import json
11
  import logging
12
  import re
13
  from typing import Any, Dict, List, Optional, Tuple
 
14
  #
15
  # Import 3rd party
16
  from openai import OpenAI
@@ -23,7 +24,6 @@ from sklearn.feature_extraction.text import TfidfVectorizer
23
  from sklearn.metrics.pairwise import cosine_similarity
24
  #
25
  # Import Local
26
- from App_Function_Libraries.Tokenization_Methods_Lib import openai_tokenize
27
  from App_Function_Libraries.Utils.Utils import load_comprehensive_config
28
  #
29
  #######################################################################################################################
@@ -106,6 +106,7 @@ def load_document(file_path: str) -> str:
106
 
107
  def improved_chunking_process(text: str, chunk_options: Dict[str, Any] = None) -> List[Dict[str, Any]]:
108
  logging.debug("Improved chunking process started...")
 
109
 
110
  # Extract JSON metadata if present
111
  json_content = {}
@@ -125,49 +126,70 @@ def improved_chunking_process(text: str, chunk_options: Dict[str, Any] = None) -
125
  text = text[len(header_text):].strip()
126
  logging.debug(f"Extracted header text: {header_text}")
127
 
128
- options = chunk_options.copy() if chunk_options else {}
 
129
  if chunk_options:
130
- options.update(chunk_options)
131
-
132
- chunk_method = options.get('method', 'words')
133
- max_size = options.get('max_size', 2000)
134
- overlap = options.get('overlap', 0)
135
- language = options.get('language', None)
 
 
 
 
 
 
 
 
136
 
137
- if language is None:
138
- language = detect_language(text)
 
 
139
 
140
- if chunk_method == 'json':
141
- chunks = chunk_text_by_json(text, max_size=max_size, overlap=overlap)
142
- else:
143
- chunks = chunk_text(text, chunk_method, max_size, overlap, language)
 
 
 
 
 
144
 
145
  chunks_with_metadata = []
146
  total_chunks = len(chunks)
147
- for i, chunk in enumerate(chunks):
148
- metadata = {
149
- 'chunk_index': i + 1,
150
- 'total_chunks': total_chunks,
151
- 'chunk_method': chunk_method,
152
- 'max_size': max_size,
153
- 'overlap': overlap,
154
- 'language': language,
155
- 'relative_position': (i + 1) / total_chunks
156
- }
157
- metadata.update(json_content) # Add the extracted JSON content to metadata
158
- metadata['header_text'] = header_text # Add the header text to metadata
159
-
160
- if chunk_method == 'json':
161
- chunk_text_content = json.dumps(chunk['json'], ensure_ascii=False)
162
- else:
163
- chunk_text_content = chunk
 
164
 
165
- chunks_with_metadata.append({
166
- 'text': chunk_text_content,
167
- 'metadata': metadata
168
- })
169
 
170
- return chunks_with_metadata
 
 
 
 
171
 
172
 
173
  def multi_level_chunking(text: str, method: str, max_size: int, overlap: int, language: str) -> List[str]:
@@ -220,24 +242,35 @@ def determine_chunk_position(relative_position: float) -> str:
220
 
221
  def chunk_text_by_words(text: str, max_words: int = 300, overlap: int = 0, language: str = None) -> List[str]:
222
  logging.debug("chunk_text_by_words...")
223
- if language is None:
224
- language = detect_language(text)
225
-
226
- if language.startswith('zh'): # Chinese
227
- import jieba
228
- words = list(jieba.cut(text))
229
- elif language == 'ja': # Japanese
230
- import fugashi
231
- tagger = fugashi.Tagger()
232
- words = [word.surface for word in tagger(text)]
233
- else: # Default to simple splitting for other languages
234
- words = text.split()
235
 
236
- chunks = []
237
- for i in range(0, len(words), max_words - overlap):
238
- chunk = ' '.join(words[i:i + max_words])
239
- chunks.append(chunk)
240
- return post_process_chunks(chunks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
 
243
  def chunk_text_by_sentences(text: str, max_sentences: int = 10, overlap: int = 0, language: str = None) -> List[str]:
@@ -338,24 +371,24 @@ def get_chunk_metadata(chunk: str, full_text: str, chunk_type: str = "generic",
338
  """
339
  chunk_length = len(chunk)
340
  start_index = full_text.find(chunk)
341
- end_index = start_index + chunk_length if start_index != -1 else None
342
 
343
  # Calculate a hash for the chunk
344
  chunk_hash = hashlib.md5(chunk.encode()).hexdigest()
345
 
346
  metadata = {
347
- 'start_index': start_index,
348
- 'end_index': end_index,
349
- 'word_count': len(chunk.split()),
350
- 'char_count': chunk_length,
351
  'chunk_type': chunk_type,
352
  'language': language,
353
  'chunk_hash': chunk_hash,
354
- 'relative_position': start_index / len(full_text) if len(full_text) > 0 and start_index != -1 else 0
355
  }
356
 
357
  if chunk_type == "chapter":
358
- metadata['chapter_number'] = chapter_number
359
  metadata['chapter_pattern'] = chapter_pattern
360
 
361
  return metadata
@@ -943,6 +976,151 @@ def chunk_ebook_by_chapters(text: str, chunk_options: Dict[str, Any]) -> List[Di
943
  #
944
  # End of ebook chapter chunking
945
  #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
946
 
947
  #######################################################################################################################
948
  #
 
11
  import logging
12
  import re
13
  from typing import Any, Dict, List, Optional, Tuple
14
+ import xml.etree.ElementTree as ET
15
  #
16
  # Import 3rd party
17
  from openai import OpenAI
 
24
  from sklearn.metrics.pairwise import cosine_similarity
25
  #
26
  # Import Local
 
27
  from App_Function_Libraries.Utils.Utils import load_comprehensive_config
28
  #
29
  #######################################################################################################################
 
106
 
107
  def improved_chunking_process(text: str, chunk_options: Dict[str, Any] = None) -> List[Dict[str, Any]]:
108
  logging.debug("Improved chunking process started...")
109
+ logging.debug(f"Received chunk_options: {chunk_options}")
110
 
111
  # Extract JSON metadata if present
112
  json_content = {}
 
126
  text = text[len(header_text):].strip()
127
  logging.debug(f"Extracted header text: {header_text}")
128
 
129
+ # Make a copy of chunk_options and ensure values are correct types
130
+ options = {}
131
  if chunk_options:
132
+ try:
133
+ options['method'] = str(chunk_options.get('method', 'words'))
134
+ options['max_size'] = int(chunk_options.get('max_size', 2000))
135
+ options['overlap'] = int(chunk_options.get('overlap', 0))
136
+ # Handle language specially - it can be None
137
+ lang = chunk_options.get('language')
138
+ options['language'] = str(lang) if lang is not None else None
139
+ logging.debug(f"Processed options: {options}")
140
+ except Exception as e:
141
+ logging.error(f"Error processing chunk options: {e}")
142
+ raise
143
+ else:
144
+ options = {'method': 'words', 'max_size': 2000, 'overlap': 0, 'language': None}
145
+ logging.debug("Using default options")
146
 
147
+ if options.get('language') is None:
148
+ detected_lang = detect_language(text)
149
+ options['language'] = str(detected_lang)
150
+ logging.debug(f"Detected language: {options['language']}")
151
 
152
+ try:
153
+ if options['method'] == 'json':
154
+ chunks = chunk_text_by_json(text, max_size=options['max_size'], overlap=options['overlap'])
155
+ else:
156
+ chunks = chunk_text(text, options['method'], options['max_size'], options['overlap'], options['language'])
157
+ logging.debug(f"Created {len(chunks)} chunks using method {options['method']}")
158
+ except Exception as e:
159
+ logging.error(f"Error in chunking process: {e}")
160
+ raise
161
 
162
  chunks_with_metadata = []
163
  total_chunks = len(chunks)
164
+ try:
165
+ for i, chunk in enumerate(chunks):
166
+ metadata = {
167
+ 'chunk_index': i + 1,
168
+ 'total_chunks': total_chunks,
169
+ 'chunk_method': options['method'],
170
+ 'max_size': options['max_size'],
171
+ 'overlap': options['overlap'],
172
+ 'language': options['language'],
173
+ 'relative_position': float((i + 1) / total_chunks)
174
+ }
175
+ metadata.update(json_content)
176
+ metadata['header_text'] = header_text
177
+
178
+ if options['method'] == 'json':
179
+ chunk_text_content = json.dumps(chunk['json'], ensure_ascii=False)
180
+ else:
181
+ chunk_text_content = chunk
182
 
183
+ chunks_with_metadata.append({
184
+ 'text': chunk_text_content,
185
+ 'metadata': metadata
186
+ })
187
 
188
+ logging.debug(f"Successfully created metadata for all chunks")
189
+ return chunks_with_metadata
190
+ except Exception as e:
191
+ logging.error(f"Error creating chunk metadata: {e}")
192
+ raise
193
 
194
 
195
  def multi_level_chunking(text: str, method: str, max_size: int, overlap: int, language: str) -> List[str]:
 
242
 
243
  def chunk_text_by_words(text: str, max_words: int = 300, overlap: int = 0, language: str = None) -> List[str]:
244
  logging.debug("chunk_text_by_words...")
245
+ logging.debug(f"Parameters: max_words={max_words}, overlap={overlap}, language={language}")
 
 
 
 
 
 
 
 
 
 
 
246
 
247
+ try:
248
+ if language is None:
249
+ language = detect_language(text)
250
+ logging.debug(f"Detected language: {language}")
251
+
252
+ if language.startswith('zh'): # Chinese
253
+ import jieba
254
+ words = list(jieba.cut(text))
255
+ elif language == 'ja': # Japanese
256
+ import fugashi
257
+ tagger = fugashi.Tagger()
258
+ words = [word.surface for word in tagger(text)]
259
+ else: # Default to simple splitting for other languages
260
+ words = text.split()
261
+
262
+ logging.debug(f"Total words: {len(words)}")
263
+
264
+ chunks = []
265
+ for i in range(0, len(words), max_words - overlap):
266
+ chunk = ' '.join(words[i:i + max_words])
267
+ chunks.append(chunk)
268
+ logging.debug(f"Created chunk {len(chunks)} with {len(chunk.split())} words")
269
+
270
+ return post_process_chunks(chunks)
271
+ except Exception as e:
272
+ logging.error(f"Error in chunk_text_by_words: {e}")
273
+ raise
274
 
275
 
276
  def chunk_text_by_sentences(text: str, max_sentences: int = 10, overlap: int = 0, language: str = None) -> List[str]:
 
371
  """
372
  chunk_length = len(chunk)
373
  start_index = full_text.find(chunk)
374
+ end_index = start_index + chunk_length if start_index != -1 else -1
375
 
376
  # Calculate a hash for the chunk
377
  chunk_hash = hashlib.md5(chunk.encode()).hexdigest()
378
 
379
  metadata = {
380
+ 'start_index': int(start_index),
381
+ 'end_index': int(end_index),
382
+ 'word_count': int(len(chunk.split())),
383
+ 'char_count': int(chunk_length),
384
  'chunk_type': chunk_type,
385
  'language': language,
386
  'chunk_hash': chunk_hash,
387
+ 'relative_position': float(start_index / len(full_text) if len(full_text) > 0 and start_index != -1 else 0)
388
  }
389
 
390
  if chunk_type == "chapter":
391
+ metadata['chapter_number'] = int(chapter_number) if chapter_number is not None else None
392
  metadata['chapter_pattern'] = chapter_pattern
393
 
394
  return metadata
 
976
  #
977
  # End of ebook chapter chunking
978
  #######################################################################################################################
979
+ #
980
+ # XML Chunking
981
+
982
+ def extract_xml_structure(element, path=""):
983
+ """
984
+ Recursively extract XML structure and content.
985
+ Returns a list of (path, text) tuples.
986
+ """
987
+ results = []
988
+ current_path = f"{path}/{element.tag}" if path else element.tag
989
+
990
+ # Get direct text content
991
+ if element.text and element.text.strip():
992
+ results.append((current_path, element.text.strip()))
993
+
994
+ # Process attributes if any
995
+ if element.attrib:
996
+ for key, value in element.attrib.items():
997
+ results.append((f"{current_path}/@{key}", value))
998
+
999
+ # Process child elements
1000
+ for child in element:
1001
+ results.extend(extract_xml_structure(child, current_path))
1002
+
1003
+ return results
1004
+
1005
+
1006
+ def chunk_xml(xml_text: str, chunk_options: Dict[str, Any]) -> List[Dict[str, Any]]:
1007
+ """
1008
+ Enhanced XML chunking that preserves structure and hierarchy.
1009
+ Processes XML content into chunks while maintaining structural context.
1010
+
1011
+ Args:
1012
+ xml_text (str): The XML content as a string
1013
+ chunk_options (Dict[str, Any]): Configuration options including:
1014
+ - max_size (int): Maximum chunk size (default: 1000)
1015
+ - overlap (int): Number of overlapping elements (default: 0)
1016
+ - method (str): Chunking method (default: 'xml')
1017
+ - language (str): Content language (default: 'english')
1018
+
1019
+ Returns:
1020
+ List[Dict[str, Any]]: List of chunks, each containing:
1021
+ - text: The chunk content
1022
+ - metadata: Chunk metadata including XML paths and chunking info
1023
+ """
1024
+ logging.debug("Starting XML chunking process...")
1025
+
1026
+ try:
1027
+ # Parse XML content
1028
+ root = ET.fromstring(xml_text)
1029
+ chunks = []
1030
+
1031
+ # Get chunking parameters with defaults
1032
+ max_size = chunk_options.get('max_size', 1000)
1033
+ overlap = chunk_options.get('overlap', 0)
1034
+ language = chunk_options.get('language', 'english')
1035
+
1036
+ logging.debug(f"Chunking parameters - max_size: {max_size}, overlap: {overlap}, language: {language}")
1037
+
1038
+ # Extract full structure with hierarchy
1039
+ xml_content = extract_xml_structure(root)
1040
+ logging.debug(f"Extracted {len(xml_content)} XML elements")
1041
+
1042
+ # Initialize chunking variables
1043
+ current_chunk = []
1044
+ current_size = 0
1045
+ chunk_count = 0
1046
+
1047
+ # Process XML content into chunks
1048
+ for path, content in xml_content:
1049
+ # Calculate content size (by words)
1050
+ content_size = len(content.split())
1051
+
1052
+ # Check if adding this content would exceed max_size
1053
+ if current_size + content_size > max_size and current_chunk:
1054
+ # Create chunk from current content
1055
+ chunk_text = '\n'.join(f"{p}: {c}" for p, c in current_chunk)
1056
+ chunk_count += 1
1057
+
1058
+ # Create chunk with metadata
1059
+ chunks.append({
1060
+ 'text': chunk_text,
1061
+ 'metadata': {
1062
+ 'paths': [p for p, _ in current_chunk],
1063
+ 'chunk_method': 'xml',
1064
+ 'chunk_index': chunk_count,
1065
+ 'max_size': max_size,
1066
+ 'overlap': overlap,
1067
+ 'language': language,
1068
+ 'root_tag': root.tag,
1069
+ 'xml_attributes': dict(root.attrib)
1070
+ }
1071
+ })
1072
+
1073
+ # Handle overlap if specified
1074
+ if overlap > 0:
1075
+ # Keep last few items for overlap
1076
+ overlap_items = current_chunk[-overlap:]
1077
+ current_chunk = overlap_items
1078
+ current_size = sum(len(c.split()) for _, c in overlap_items)
1079
+ logging.debug(f"Created overlap chunk with {len(overlap_items)} items")
1080
+ else:
1081
+ current_chunk = []
1082
+ current_size = 0
1083
+
1084
+ # Add current content to chunk
1085
+ current_chunk.append((path, content))
1086
+ current_size += content_size
1087
+
1088
+ # Process final chunk if content remains
1089
+ if current_chunk:
1090
+ chunk_text = '\n'.join(f"{p}: {c}" for p, c in current_chunk)
1091
+ chunk_count += 1
1092
+
1093
+ chunks.append({
1094
+ 'text': chunk_text,
1095
+ 'metadata': {
1096
+ 'paths': [p for p, _ in current_chunk],
1097
+ 'chunk_method': 'xml',
1098
+ 'chunk_index': chunk_count,
1099
+ 'max_size': max_size,
1100
+ 'overlap': overlap,
1101
+ 'language': language,
1102
+ 'root_tag': root.tag,
1103
+ 'xml_attributes': dict(root.attrib)
1104
+ }
1105
+ })
1106
+
1107
+ # Update total chunks count in metadata
1108
+ for chunk in chunks:
1109
+ chunk['metadata']['total_chunks'] = chunk_count
1110
+
1111
+ logging.debug(f"XML chunking complete. Created {len(chunks)} chunks")
1112
+ return chunks
1113
+
1114
+ except ET.ParseError as e:
1115
+ logging.error(f"XML parsing error: {str(e)}")
1116
+ raise
1117
+ except Exception as e:
1118
+ logging.error(f"Unexpected error during XML chunking: {str(e)}")
1119
+ raise
1120
+
1121
+ #
1122
+ # End of XML Chunking
1123
+ #######################################################################################################################
1124
 
1125
  #######################################################################################################################
1126
  #
App_Function_Libraries/DB/Character_Chat_DB.py CHANGED
@@ -1,701 +1,1059 @@
1
- # character_chat_db.py
2
- # Database functions for managing character cards and chat histories.
3
- # #
4
- # Imports
5
- import configparser
6
- import sqlite3
7
- import json
8
- import os
9
- import sys
10
- from typing import List, Dict, Optional, Tuple, Any, Union
11
-
12
- from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
13
- import logging
14
-
15
- #
16
- #######################################################################################################################
17
- #
18
- #
19
-
20
- def ensure_database_directory():
21
- os.makedirs(get_database_dir(), exist_ok=True)
22
-
23
- ensure_database_directory()
24
-
25
-
26
- # Construct the path to the config file
27
- config_path = get_project_relative_path('Config_Files/config.txt')
28
-
29
- # Read the config file
30
- config = configparser.ConfigParser()
31
- config.read(config_path)
32
-
33
- # Get the chat db path from the config, or use the default if not specified
34
- chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
35
- print(f"Chat Database path: {chat_DB_PATH}")
36
-
37
- ########################################################################################################
38
- #
39
- # Functions
40
-
41
- # FIXME - Setup properly and test/add documentation for its existence...
42
- def initialize_database():
43
- """Initialize the SQLite database with required tables and FTS5 virtual tables."""
44
- conn = None
45
- try:
46
- conn = sqlite3.connect(chat_DB_PATH)
47
- cursor = conn.cursor()
48
-
49
- # Enable foreign key constraints
50
- cursor.execute("PRAGMA foreign_keys = ON;")
51
-
52
- # Create CharacterCards table with V2 fields
53
- cursor.execute("""
54
- CREATE TABLE IF NOT EXISTS CharacterCards (
55
- id INTEGER PRIMARY KEY AUTOINCREMENT,
56
- name TEXT UNIQUE NOT NULL,
57
- description TEXT,
58
- personality TEXT,
59
- scenario TEXT,
60
- image BLOB,
61
- post_history_instructions TEXT,
62
- first_mes TEXT,
63
- mes_example TEXT,
64
- creator_notes TEXT,
65
- system_prompt TEXT,
66
- alternate_greetings TEXT,
67
- tags TEXT,
68
- creator TEXT,
69
- character_version TEXT,
70
- extensions TEXT,
71
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP
72
- );
73
- """)
74
-
75
- # Create CharacterChats table
76
- cursor.execute("""
77
- CREATE TABLE IF NOT EXISTS CharacterChats (
78
- id INTEGER PRIMARY KEY AUTOINCREMENT,
79
- character_id INTEGER NOT NULL,
80
- conversation_name TEXT,
81
- chat_history TEXT,
82
- is_snapshot BOOLEAN DEFAULT FALSE,
83
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
84
- FOREIGN KEY (character_id) REFERENCES CharacterCards(id) ON DELETE CASCADE
85
- );
86
- """)
87
-
88
- # Create FTS5 virtual table for CharacterChats
89
- cursor.execute("""
90
- CREATE VIRTUAL TABLE IF NOT EXISTS CharacterChats_fts USING fts5(
91
- conversation_name,
92
- chat_history,
93
- content='CharacterChats',
94
- content_rowid='id'
95
- );
96
- """)
97
-
98
- # Create triggers to keep FTS5 table in sync with CharacterChats
99
- cursor.executescript("""
100
- CREATE TRIGGER IF NOT EXISTS CharacterChats_ai AFTER INSERT ON CharacterChats BEGIN
101
- INSERT INTO CharacterChats_fts(rowid, conversation_name, chat_history)
102
- VALUES (new.id, new.conversation_name, new.chat_history);
103
- END;
104
-
105
- CREATE TRIGGER IF NOT EXISTS CharacterChats_ad AFTER DELETE ON CharacterChats BEGIN
106
- DELETE FROM CharacterChats_fts WHERE rowid = old.id;
107
- END;
108
-
109
- CREATE TRIGGER IF NOT EXISTS CharacterChats_au AFTER UPDATE ON CharacterChats BEGIN
110
- UPDATE CharacterChats_fts SET conversation_name = new.conversation_name, chat_history = new.chat_history
111
- WHERE rowid = new.id;
112
- END;
113
- """)
114
-
115
- # Create ChatKeywords table
116
- cursor.execute("""
117
- CREATE TABLE IF NOT EXISTS ChatKeywords (
118
- chat_id INTEGER NOT NULL,
119
- keyword TEXT NOT NULL,
120
- FOREIGN KEY (chat_id) REFERENCES CharacterChats(id) ON DELETE CASCADE
121
- );
122
- """)
123
-
124
- # Create indexes for faster searches
125
- cursor.execute("""
126
- CREATE INDEX IF NOT EXISTS idx_chatkeywords_keyword ON ChatKeywords(keyword);
127
- """)
128
- cursor.execute("""
129
- CREATE INDEX IF NOT EXISTS idx_chatkeywords_chat_id ON ChatKeywords(chat_id);
130
- """)
131
-
132
- conn.commit()
133
- logging.info("Database initialized successfully.")
134
- except sqlite3.Error as e:
135
- logging.error(f"SQLite error occurred during database initialization: {e}")
136
- if conn:
137
- conn.rollback()
138
- raise
139
- except Exception as e:
140
- logging.error(f"Unexpected error occurred during database initialization: {e}")
141
- if conn:
142
- conn.rollback()
143
- raise
144
- finally:
145
- if conn:
146
- conn.close()
147
-
148
- # Call initialize_database() at the start of your application
149
- def setup_chat_database():
150
- try:
151
- initialize_database()
152
- except Exception as e:
153
- logging.critical(f"Failed to initialize database: {e}")
154
- sys.exit(1)
155
-
156
- setup_chat_database()
157
-
158
- ########################################################################################################
159
- #
160
- # Character Card handling
161
-
162
- def parse_character_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
163
- """Parse and validate a character card according to V2 specification."""
164
- v2_data = {
165
- 'name': card_data.get('name', ''),
166
- 'description': card_data.get('description', ''),
167
- 'personality': card_data.get('personality', ''),
168
- 'scenario': card_data.get('scenario', ''),
169
- 'first_mes': card_data.get('first_mes', ''),
170
- 'mes_example': card_data.get('mes_example', ''),
171
- 'creator_notes': card_data.get('creator_notes', ''),
172
- 'system_prompt': card_data.get('system_prompt', ''),
173
- 'post_history_instructions': card_data.get('post_history_instructions', ''),
174
- 'alternate_greetings': json.dumps(card_data.get('alternate_greetings', [])),
175
- 'tags': json.dumps(card_data.get('tags', [])),
176
- 'creator': card_data.get('creator', ''),
177
- 'character_version': card_data.get('character_version', ''),
178
- 'extensions': json.dumps(card_data.get('extensions', {}))
179
- }
180
-
181
- # Handle 'image' separately as it might be binary data
182
- if 'image' in card_data:
183
- v2_data['image'] = card_data['image']
184
-
185
- return v2_data
186
-
187
-
188
- def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
189
- """Add or update a character card in the database."""
190
- conn = sqlite3.connect(chat_DB_PATH)
191
- cursor = conn.cursor()
192
- try:
193
- parsed_card = parse_character_card(card_data)
194
-
195
- # Check if character already exists
196
- cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (parsed_card['name'],))
197
- row = cursor.fetchone()
198
-
199
- if row:
200
- # Update existing character
201
- character_id = row[0]
202
- update_query = """
203
- UPDATE CharacterCards
204
- SET description = ?, personality = ?, scenario = ?, image = ?,
205
- post_history_instructions = ?, first_mes = ?, mes_example = ?,
206
- creator_notes = ?, system_prompt = ?, alternate_greetings = ?,
207
- tags = ?, creator = ?, character_version = ?, extensions = ?
208
- WHERE id = ?
209
- """
210
- cursor.execute(update_query, (
211
- parsed_card['description'], parsed_card['personality'], parsed_card['scenario'],
212
- parsed_card['image'], parsed_card['post_history_instructions'], parsed_card['first_mes'],
213
- parsed_card['mes_example'], parsed_card['creator_notes'], parsed_card['system_prompt'],
214
- parsed_card['alternate_greetings'], parsed_card['tags'], parsed_card['creator'],
215
- parsed_card['character_version'], parsed_card['extensions'], character_id
216
- ))
217
- else:
218
- # Insert new character
219
- insert_query = """
220
- INSERT INTO CharacterCards (name, description, personality, scenario, image,
221
- post_history_instructions, first_mes, mes_example, creator_notes, system_prompt,
222
- alternate_greetings, tags, creator, character_version, extensions)
223
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
224
- """
225
- cursor.execute(insert_query, (
226
- parsed_card['name'], parsed_card['description'], parsed_card['personality'],
227
- parsed_card['scenario'], parsed_card['image'], parsed_card['post_history_instructions'],
228
- parsed_card['first_mes'], parsed_card['mes_example'], parsed_card['creator_notes'],
229
- parsed_card['system_prompt'], parsed_card['alternate_greetings'], parsed_card['tags'],
230
- parsed_card['creator'], parsed_card['character_version'], parsed_card['extensions']
231
- ))
232
- character_id = cursor.lastrowid
233
-
234
- conn.commit()
235
- return character_id
236
- except sqlite3.IntegrityError as e:
237
- logging.error(f"Error adding character card: {e}")
238
- return None
239
- except Exception as e:
240
- logging.error(f"Unexpected error adding character card: {e}")
241
- return None
242
- finally:
243
- conn.close()
244
-
245
- # def add_character_card(card_data: Dict) -> Optional[int]:
246
- # """Add or update a character card in the database.
247
- #
248
- # Returns the ID of the inserted character or None if failed.
249
- # """
250
- # conn = sqlite3.connect(chat_DB_PATH)
251
- # cursor = conn.cursor()
252
- # try:
253
- # # Ensure all required fields are present
254
- # required_fields = ['name', 'description', 'personality', 'scenario', 'image', 'post_history_instructions', 'first_message']
255
- # for field in required_fields:
256
- # if field not in card_data:
257
- # card_data[field] = '' # Assign empty string if field is missing
258
- #
259
- # # Check if character already exists
260
- # cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (card_data['name'],))
261
- # row = cursor.fetchone()
262
- #
263
- # if row:
264
- # # Update existing character
265
- # character_id = row[0]
266
- # cursor.execute("""
267
- # UPDATE CharacterCards
268
- # SET description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
269
- # WHERE id = ?
270
- # """, (
271
- # card_data['description'],
272
- # card_data['personality'],
273
- # card_data['scenario'],
274
- # card_data['image'],
275
- # card_data['post_history_instructions'],
276
- # card_data['first_message'],
277
- # character_id
278
- # ))
279
- # else:
280
- # # Insert new character
281
- # cursor.execute("""
282
- # INSERT INTO CharacterCards (name, description, personality, scenario, image, post_history_instructions, first_message)
283
- # VALUES (?, ?, ?, ?, ?, ?, ?)
284
- # """, (
285
- # card_data['name'],
286
- # card_data['description'],
287
- # card_data['personality'],
288
- # card_data['scenario'],
289
- # card_data['image'],
290
- # card_data['post_history_instructions'],
291
- # card_data['first_message']
292
- # ))
293
- # character_id = cursor.lastrowid
294
- #
295
- # conn.commit()
296
- # return cursor.lastrowid
297
- # except sqlite3.IntegrityError as e:
298
- # logging.error(f"Error adding character card: {e}")
299
- # return None
300
- # except Exception as e:
301
- # logging.error(f"Unexpected error adding character card: {e}")
302
- # return None
303
- # finally:
304
- # conn.close()
305
-
306
-
307
- def get_character_cards() -> List[Dict]:
308
- """Retrieve all character cards from the database."""
309
- logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
310
- conn = sqlite3.connect(chat_DB_PATH)
311
- cursor = conn.cursor()
312
- cursor.execute("SELECT * FROM CharacterCards")
313
- rows = cursor.fetchall()
314
- columns = [description[0] for description in cursor.description]
315
- conn.close()
316
- characters = [dict(zip(columns, row)) for row in rows]
317
- #logging.debug(f"Characters fetched from DB: {characters}")
318
- return characters
319
-
320
-
321
- def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
322
- """
323
- Retrieve a single character card by its ID.
324
-
325
- Args:
326
- character_id: Can be either an integer ID or a dictionary containing character data.
327
-
328
- Returns:
329
- A dictionary containing the character card data, or None if not found.
330
- """
331
- conn = sqlite3.connect(chat_DB_PATH)
332
- cursor = conn.cursor()
333
- try:
334
- if isinstance(character_id, dict):
335
- # If a dictionary is passed, assume it's already a character card
336
- return character_id
337
- elif isinstance(character_id, int):
338
- # If an integer is passed, fetch the character from the database
339
- cursor.execute("SELECT * FROM CharacterCards WHERE id = ?", (character_id,))
340
- row = cursor.fetchone()
341
- if row:
342
- columns = [description[0] for description in cursor.description]
343
- return dict(zip(columns, row))
344
- else:
345
- logging.warning(f"Invalid type for character_id: {type(character_id)}")
346
- return None
347
- except Exception as e:
348
- logging.error(f"Error in get_character_card_by_id: {e}")
349
- return None
350
- finally:
351
- conn.close()
352
-
353
-
354
- def update_character_card(character_id: int, card_data: Dict) -> bool:
355
- """Update an existing character card."""
356
- conn = sqlite3.connect(chat_DB_PATH)
357
- cursor = conn.cursor()
358
- try:
359
- cursor.execute("""
360
- UPDATE CharacterCards
361
- SET name = ?, description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
362
- WHERE id = ?
363
- """, (
364
- card_data.get('name'),
365
- card_data.get('description'),
366
- card_data.get('personality'),
367
- card_data.get('scenario'),
368
- card_data.get('image'),
369
- card_data.get('post_history_instructions', ''),
370
- card_data.get('first_message', "Hello! I'm ready to chat."),
371
- character_id
372
- ))
373
- conn.commit()
374
- return cursor.rowcount > 0
375
- except sqlite3.IntegrityError as e:
376
- logging.error(f"Error updating character card: {e}")
377
- return False
378
- finally:
379
- conn.close()
380
-
381
-
382
- def delete_character_card(character_id: int) -> bool:
383
- """Delete a character card and its associated chats."""
384
- conn = sqlite3.connect(chat_DB_PATH)
385
- cursor = conn.cursor()
386
- try:
387
- # Delete associated chats first due to foreign key constraint
388
- cursor.execute("DELETE FROM CharacterChats WHERE character_id = ?", (character_id,))
389
- cursor.execute("DELETE FROM CharacterCards WHERE id = ?", (character_id,))
390
- conn.commit()
391
- return cursor.rowcount > 0
392
- except sqlite3.Error as e:
393
- logging.error(f"Error deleting character card: {e}")
394
- return False
395
- finally:
396
- conn.close()
397
-
398
-
399
- def add_character_chat(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]], keywords: Optional[List[str]] = None, is_snapshot: bool = False) -> Optional[int]:
400
- """
401
- Add a new chat history for a character, optionally associating keywords.
402
-
403
- Args:
404
- character_id (int): The ID of the character.
405
- conversation_name (str): Name of the conversation.
406
- chat_history (List[Tuple[str, str]]): List of (user, bot) message tuples.
407
- keywords (Optional[List[str]]): List of keywords to associate with this chat.
408
- is_snapshot (bool, optional): Whether this chat is a snapshot.
409
-
410
- Returns:
411
- Optional[int]: The ID of the inserted chat or None if failed.
412
- """
413
- conn = sqlite3.connect(chat_DB_PATH)
414
- cursor = conn.cursor()
415
- try:
416
- chat_history_json = json.dumps(chat_history)
417
- cursor.execute("""
418
- INSERT INTO CharacterChats (character_id, conversation_name, chat_history, is_snapshot)
419
- VALUES (?, ?, ?, ?)
420
- """, (
421
- character_id,
422
- conversation_name,
423
- chat_history_json,
424
- is_snapshot
425
- ))
426
- chat_id = cursor.lastrowid
427
-
428
- if keywords:
429
- # Insert keywords into ChatKeywords table
430
- keyword_records = [(chat_id, keyword.strip().lower()) for keyword in keywords]
431
- cursor.executemany("""
432
- INSERT INTO ChatKeywords (chat_id, keyword)
433
- VALUES (?, ?)
434
- """, keyword_records)
435
-
436
- conn.commit()
437
- return chat_id
438
- except sqlite3.Error as e:
439
- logging.error(f"Error adding character chat: {e}")
440
- return None
441
- finally:
442
- conn.close()
443
-
444
-
445
- def get_character_chats(character_id: Optional[int] = None) -> List[Dict]:
446
- """Retrieve all chats, or chats for a specific character if character_id is provided."""
447
- conn = sqlite3.connect(chat_DB_PATH)
448
- cursor = conn.cursor()
449
- if character_id is not None:
450
- cursor.execute("SELECT * FROM CharacterChats WHERE character_id = ?", (character_id,))
451
- else:
452
- cursor.execute("SELECT * FROM CharacterChats")
453
- rows = cursor.fetchall()
454
- columns = [description[0] for description in cursor.description]
455
- conn.close()
456
- return [dict(zip(columns, row)) for row in rows]
457
-
458
-
459
- def get_character_chat_by_id(chat_id: int) -> Optional[Dict]:
460
- """Retrieve a single chat by its ID."""
461
- conn = sqlite3.connect(chat_DB_PATH)
462
- cursor = conn.cursor()
463
- cursor.execute("SELECT * FROM CharacterChats WHERE id = ?", (chat_id,))
464
- row = cursor.fetchone()
465
- conn.close()
466
- if row:
467
- columns = [description[0] for description in cursor.description]
468
- chat = dict(zip(columns, row))
469
- chat['chat_history'] = json.loads(chat['chat_history'])
470
- return chat
471
- return None
472
-
473
-
474
- def search_character_chats(query: str, character_id: Optional[int] = None) -> Tuple[List[Dict], str]:
475
- """
476
- Search for character chats using FTS5, optionally filtered by character_id.
477
-
478
- Args:
479
- query (str): The search query.
480
- character_id (Optional[int]): The ID of the character to filter chats by.
481
-
482
- Returns:
483
- Tuple[List[Dict], str]: A list of matching chats and a status message.
484
- """
485
- if not query.strip():
486
- return [], "Please enter a search query."
487
-
488
- conn = sqlite3.connect(chat_DB_PATH)
489
- cursor = conn.cursor()
490
- try:
491
- if character_id is not None:
492
- # Search with character_id filter
493
- cursor.execute("""
494
- SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
495
- FROM CharacterChats_fts
496
- JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
497
- WHERE CharacterChats_fts MATCH ? AND CharacterChats.character_id = ?
498
- ORDER BY rank
499
- """, (query, character_id))
500
- else:
501
- # Search without character_id filter
502
- cursor.execute("""
503
- SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
504
- FROM CharacterChats_fts
505
- JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
506
- WHERE CharacterChats_fts MATCH ?
507
- ORDER BY rank
508
- """, (query,))
509
-
510
- rows = cursor.fetchall()
511
- columns = [description[0] for description in cursor.description]
512
- results = [dict(zip(columns, row)) for row in rows]
513
-
514
- if character_id is not None:
515
- status_message = f"Found {len(results)} chat(s) matching '{query}' for the selected character."
516
- else:
517
- status_message = f"Found {len(results)} chat(s) matching '{query}' across all characters."
518
-
519
- return results, status_message
520
- except Exception as e:
521
- logging.error(f"Error searching chats with FTS5: {e}")
522
- return [], f"Error occurred during search: {e}"
523
- finally:
524
- conn.close()
525
-
526
- def update_character_chat(chat_id: int, chat_history: List[Tuple[str, str]]) -> bool:
527
- """Update an existing chat history."""
528
- conn = sqlite3.connect(chat_DB_PATH)
529
- cursor = conn.cursor()
530
- try:
531
- chat_history_json = json.dumps(chat_history)
532
- cursor.execute("""
533
- UPDATE CharacterChats
534
- SET chat_history = ?
535
- WHERE id = ?
536
- """, (
537
- chat_history_json,
538
- chat_id
539
- ))
540
- conn.commit()
541
- return cursor.rowcount > 0
542
- except sqlite3.Error as e:
543
- logging.error(f"Error updating character chat: {e}")
544
- return False
545
- finally:
546
- conn.close()
547
-
548
-
549
- def delete_character_chat(chat_id: int) -> bool:
550
- """Delete a specific chat."""
551
- conn = sqlite3.connect(chat_DB_PATH)
552
- cursor = conn.cursor()
553
- try:
554
- cursor.execute("DELETE FROM CharacterChats WHERE id = ?", (chat_id,))
555
- conn.commit()
556
- return cursor.rowcount > 0
557
- except sqlite3.Error as e:
558
- logging.error(f"Error deleting character chat: {e}")
559
- return False
560
- finally:
561
- conn.close()
562
-
563
- def fetch_keywords_for_chats(keywords: List[str]) -> List[int]:
564
- """
565
- Fetch chat IDs associated with any of the specified keywords.
566
-
567
- Args:
568
- keywords (List[str]): List of keywords to search for.
569
-
570
- Returns:
571
- List[int]: List of chat IDs associated with the keywords.
572
- """
573
- if not keywords:
574
- return []
575
-
576
- conn = sqlite3.connect(chat_DB_PATH)
577
- cursor = conn.cursor()
578
- try:
579
- # Construct the WHERE clause to search for each keyword
580
- keyword_clauses = " OR ".join(["keyword = ?"] * len(keywords))
581
- sql_query = f"SELECT DISTINCT chat_id FROM ChatKeywords WHERE {keyword_clauses}"
582
- cursor.execute(sql_query, keywords)
583
- rows = cursor.fetchall()
584
- chat_ids = [row[0] for row in rows]
585
- return chat_ids
586
- except Exception as e:
587
- logging.error(f"Error in fetch_keywords_for_chats: {e}")
588
- return []
589
- finally:
590
- conn.close()
591
-
592
- def save_chat_history_to_character_db(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]]) -> Optional[int]:
593
- """Save chat history to the CharacterChats table.
594
-
595
- Returns the ID of the inserted chat or None if failed.
596
- """
597
- return add_character_chat(character_id, conversation_name, chat_history)
598
-
599
- def migrate_chat_to_media_db():
600
- pass
601
-
602
-
603
- def search_db(query: str, fields: List[str], where_clause: str = "", page: int = 1, results_per_page: int = 5) -> List[Dict[str, Any]]:
604
- """
605
- Perform a full-text search on specified fields with optional filtering and pagination.
606
-
607
- Args:
608
- query (str): The search query.
609
- fields (List[str]): List of fields to search in.
610
- where_clause (str, optional): Additional SQL WHERE clause to filter results.
611
- page (int, optional): Page number for pagination.
612
- results_per_page (int, optional): Number of results per page.
613
-
614
- Returns:
615
- List[Dict[str, Any]]: List of matching chat records with content and metadata.
616
- """
617
- if not query.strip():
618
- return []
619
-
620
- conn = sqlite3.connect(chat_DB_PATH)
621
- cursor = conn.cursor()
622
- try:
623
- # Construct the MATCH query for FTS5
624
- match_query = " AND ".join(fields) + f" MATCH ?"
625
- # Adjust the query with the fields
626
- fts_query = f"""
627
- SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
628
- FROM CharacterChats_fts
629
- JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
630
- WHERE {match_query}
631
- """
632
- if where_clause:
633
- fts_query += f" AND ({where_clause})"
634
- fts_query += " ORDER BY rank LIMIT ? OFFSET ?"
635
- offset = (page - 1) * results_per_page
636
- cursor.execute(fts_query, (query, results_per_page, offset))
637
- rows = cursor.fetchall()
638
- columns = [description[0] for description in cursor.description]
639
- results = [dict(zip(columns, row)) for row in rows]
640
- return results
641
- except Exception as e:
642
- logging.error(f"Error in search_db: {e}")
643
- return []
644
- finally:
645
- conn.close()
646
-
647
-
648
- def perform_full_text_search_chat(query: str, relevant_chat_ids: List[int], page: int = 1, results_per_page: int = 5) -> \
649
- List[Dict[str, Any]]:
650
- """
651
- Perform a full-text search within the specified chat IDs using FTS5.
652
-
653
- Args:
654
- query (str): The user's query.
655
- relevant_chat_ids (List[int]): List of chat IDs to search within.
656
- page (int): Pagination page number.
657
- results_per_page (int): Number of results per page.
658
-
659
- Returns:
660
- List[Dict[str, Any]]: List of search results with content and metadata.
661
- """
662
- try:
663
- # Construct a WHERE clause to limit the search to relevant chat IDs
664
- where_clause = " OR ".join([f"media_id = {chat_id}" for chat_id in relevant_chat_ids])
665
- if not where_clause:
666
- where_clause = "1" # No restriction if no chat IDs
667
-
668
- # Perform full-text search using FTS5
669
- fts_results = search_db(query, ["content"], where_clause, page=page, results_per_page=results_per_page)
670
-
671
- filtered_fts_results = [
672
- {
673
- "content": result['content'],
674
- "metadata": {"media_id": result['id']}
675
- }
676
- for result in fts_results
677
- if result['id'] in relevant_chat_ids
678
- ]
679
- return filtered_fts_results
680
- except Exception as e:
681
- logging.error(f"Error in perform_full_text_search_chat: {str(e)}")
682
- return []
683
-
684
-
685
- def fetch_all_chats() -> List[Dict[str, Any]]:
686
- """
687
- Fetch all chat messages from the database.
688
-
689
- Returns:
690
- List[Dict[str, Any]]: List of chat messages with relevant metadata.
691
- """
692
- try:
693
- chats = get_character_chats() # Modify this function to retrieve all chats
694
- return chats
695
- except Exception as e:
696
- logging.error(f"Error fetching all chats: {str(e)}")
697
- return []
698
-
699
- #
700
- # End of Character_Chat_DB.py
701
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # character_chat_db.py
2
+ # Database functions for managing character cards and chat histories.
3
+ # #
4
+ # Imports
5
+ import configparser
6
+ import sqlite3
7
+ import json
8
+ import os
9
+ import sys
10
+ from typing import List, Dict, Optional, Tuple, Any, Union
11
+
12
+ from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
13
+ from Tests.Chat_APIs.Chat_APIs_Integration_test import logging
14
+
15
+ #
16
+ #######################################################################################################################
17
+ #
18
+ #
19
+
20
+ def ensure_database_directory():
21
+ os.makedirs(get_database_dir(), exist_ok=True)
22
+
23
+ ensure_database_directory()
24
+
25
+
26
+ # Construct the path to the config file
27
+ config_path = get_project_relative_path('Config_Files/config.txt')
28
+
29
+ # Read the config file
30
+ config = configparser.ConfigParser()
31
+ config.read(config_path)
32
+
33
+ # Get the chat db path from the config, or use the default if not specified
34
+ chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
35
+ print(f"Chat Database path: {chat_DB_PATH}")
36
+
37
+ ########################################################################################################
38
+ #
39
+ # Functions
40
+
41
+ # FIXME - Setup properly and test/add documentation for its existence...
42
+ def initialize_database():
43
+ """Initialize the SQLite database with required tables and FTS5 virtual tables."""
44
+ conn = None
45
+ try:
46
+ conn = sqlite3.connect(chat_DB_PATH)
47
+ cursor = conn.cursor()
48
+
49
+ # Enable foreign key constraints
50
+ cursor.execute("PRAGMA foreign_keys = ON;")
51
+
52
+ # Create CharacterCards table with V2 fields
53
+ cursor.execute("""
54
+ CREATE TABLE IF NOT EXISTS CharacterCards (
55
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
56
+ name TEXT UNIQUE NOT NULL,
57
+ description TEXT,
58
+ personality TEXT,
59
+ scenario TEXT,
60
+ image BLOB,
61
+ post_history_instructions TEXT,
62
+ first_mes TEXT,
63
+ mes_example TEXT,
64
+ creator_notes TEXT,
65
+ system_prompt TEXT,
66
+ alternate_greetings TEXT,
67
+ tags TEXT,
68
+ creator TEXT,
69
+ character_version TEXT,
70
+ extensions TEXT,
71
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
72
+ );
73
+ """)
74
+
75
+ # Create FTS5 virtual table for CharacterCards
76
+ cursor.execute("""
77
+ CREATE VIRTUAL TABLE IF NOT EXISTS CharacterCards_fts USING fts5(
78
+ name,
79
+ description,
80
+ personality,
81
+ scenario,
82
+ system_prompt,
83
+ content='CharacterCards',
84
+ content_rowid='id'
85
+ );
86
+ """)
87
+
88
+ # Create triggers to keep FTS5 table in sync with CharacterCards
89
+ cursor.executescript("""
90
+ CREATE TRIGGER IF NOT EXISTS CharacterCards_ai AFTER INSERT ON CharacterCards BEGIN
91
+ INSERT INTO CharacterCards_fts(
92
+ rowid,
93
+ name,
94
+ description,
95
+ personality,
96
+ scenario,
97
+ system_prompt
98
+ ) VALUES (
99
+ new.id,
100
+ new.name,
101
+ new.description,
102
+ new.personality,
103
+ new.scenario,
104
+ new.system_prompt
105
+ );
106
+ END;
107
+
108
+ CREATE TRIGGER IF NOT EXISTS CharacterCards_ad AFTER DELETE ON CharacterCards BEGIN
109
+ DELETE FROM CharacterCards_fts WHERE rowid = old.id;
110
+ END;
111
+
112
+ CREATE TRIGGER IF NOT EXISTS CharacterCards_au AFTER UPDATE ON CharacterCards BEGIN
113
+ UPDATE CharacterCards_fts SET
114
+ name = new.name,
115
+ description = new.description,
116
+ personality = new.personality,
117
+ scenario = new.scenario,
118
+ system_prompt = new.system_prompt
119
+ WHERE rowid = new.id;
120
+ END;
121
+ """)
122
+
123
+ # Create CharacterChats table
124
+ cursor.execute("""
125
+ CREATE TABLE IF NOT EXISTS CharacterChats (
126
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
127
+ character_id INTEGER NOT NULL,
128
+ conversation_name TEXT,
129
+ chat_history TEXT,
130
+ is_snapshot BOOLEAN DEFAULT FALSE,
131
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
132
+ FOREIGN KEY (character_id) REFERENCES CharacterCards(id) ON DELETE CASCADE
133
+ );
134
+ """)
135
+
136
+ # Create FTS5 virtual table for CharacterChats
137
+ cursor.execute("""
138
+ CREATE VIRTUAL TABLE IF NOT EXISTS CharacterChats_fts USING fts5(
139
+ conversation_name,
140
+ chat_history,
141
+ content='CharacterChats',
142
+ content_rowid='id'
143
+ );
144
+ """)
145
+
146
+ # Create triggers to keep FTS5 table in sync with CharacterChats
147
+ cursor.executescript("""
148
+ CREATE TRIGGER IF NOT EXISTS CharacterChats_ai AFTER INSERT ON CharacterChats BEGIN
149
+ INSERT INTO CharacterChats_fts(rowid, conversation_name, chat_history)
150
+ VALUES (new.id, new.conversation_name, new.chat_history);
151
+ END;
152
+
153
+ CREATE TRIGGER IF NOT EXISTS CharacterChats_ad AFTER DELETE ON CharacterChats BEGIN
154
+ DELETE FROM CharacterChats_fts WHERE rowid = old.id;
155
+ END;
156
+
157
+ CREATE TRIGGER IF NOT EXISTS CharacterChats_au AFTER UPDATE ON CharacterChats BEGIN
158
+ UPDATE CharacterChats_fts SET conversation_name = new.conversation_name, chat_history = new.chat_history
159
+ WHERE rowid = new.id;
160
+ END;
161
+ """)
162
+
163
+ # Create ChatKeywords table
164
+ cursor.execute("""
165
+ CREATE TABLE IF NOT EXISTS ChatKeywords (
166
+ chat_id INTEGER NOT NULL,
167
+ keyword TEXT NOT NULL,
168
+ FOREIGN KEY (chat_id) REFERENCES CharacterChats(id) ON DELETE CASCADE
169
+ );
170
+ """)
171
+
172
+ # Create indexes for faster searches
173
+ cursor.execute("""
174
+ CREATE INDEX IF NOT EXISTS idx_chatkeywords_keyword ON ChatKeywords(keyword);
175
+ """)
176
+ cursor.execute("""
177
+ CREATE INDEX IF NOT EXISTS idx_chatkeywords_chat_id ON ChatKeywords(chat_id);
178
+ """)
179
+
180
+ conn.commit()
181
+ logging.info("Database initialized successfully.")
182
+ except sqlite3.Error as e:
183
+ logging.error(f"SQLite error occurred during database initialization: {e}")
184
+ if conn:
185
+ conn.rollback()
186
+ raise
187
+ except Exception as e:
188
+ logging.error(f"Unexpected error occurred during database initialization: {e}")
189
+ if conn:
190
+ conn.rollback()
191
+ raise
192
+ finally:
193
+ if conn:
194
+ conn.close()
195
+
196
+ # Call initialize_database() at the start of your application
197
+ def setup_chat_database():
198
+ try:
199
+ initialize_database()
200
+ except Exception as e:
201
+ logging.critical(f"Failed to initialize database: {e}")
202
+ sys.exit(1)
203
+
204
+ setup_chat_database()
205
+
206
+
207
+ ########################################################################################################
208
+ #
209
+ # Character Card handling
210
+
211
+ def parse_character_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
212
+ """Parse and validate a character card according to V2 specification."""
213
+ v2_data = {
214
+ 'name': card_data.get('name', ''),
215
+ 'description': card_data.get('description', ''),
216
+ 'personality': card_data.get('personality', ''),
217
+ 'scenario': card_data.get('scenario', ''),
218
+ 'first_mes': card_data.get('first_mes', ''),
219
+ 'mes_example': card_data.get('mes_example', ''),
220
+ 'creator_notes': card_data.get('creator_notes', ''),
221
+ 'system_prompt': card_data.get('system_prompt', ''),
222
+ 'post_history_instructions': card_data.get('post_history_instructions', ''),
223
+ 'alternate_greetings': json.dumps(card_data.get('alternate_greetings', [])),
224
+ 'tags': json.dumps(card_data.get('tags', [])),
225
+ 'creator': card_data.get('creator', ''),
226
+ 'character_version': card_data.get('character_version', ''),
227
+ 'extensions': json.dumps(card_data.get('extensions', {}))
228
+ }
229
+
230
+ # Handle 'image' separately as it might be binary data
231
+ if 'image' in card_data:
232
+ v2_data['image'] = card_data['image']
233
+
234
+ return v2_data
235
+
236
+
237
+ def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
238
+ """Add or update a character card in the database."""
239
+ conn = sqlite3.connect(chat_DB_PATH)
240
+ cursor = conn.cursor()
241
+ try:
242
+ parsed_card = parse_character_card(card_data)
243
+
244
+ # Check if character already exists
245
+ cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (parsed_card['name'],))
246
+ row = cursor.fetchone()
247
+
248
+ if row:
249
+ # Update existing character
250
+ character_id = row[0]
251
+ update_query = """
252
+ UPDATE CharacterCards
253
+ SET description = ?, personality = ?, scenario = ?, image = ?,
254
+ post_history_instructions = ?, first_mes = ?, mes_example = ?,
255
+ creator_notes = ?, system_prompt = ?, alternate_greetings = ?,
256
+ tags = ?, creator = ?, character_version = ?, extensions = ?
257
+ WHERE id = ?
258
+ """
259
+ cursor.execute(update_query, (
260
+ parsed_card['description'], parsed_card['personality'], parsed_card['scenario'],
261
+ parsed_card['image'], parsed_card['post_history_instructions'], parsed_card['first_mes'],
262
+ parsed_card['mes_example'], parsed_card['creator_notes'], parsed_card['system_prompt'],
263
+ parsed_card['alternate_greetings'], parsed_card['tags'], parsed_card['creator'],
264
+ parsed_card['character_version'], parsed_card['extensions'], character_id
265
+ ))
266
+ else:
267
+ # Insert new character
268
+ insert_query = """
269
+ INSERT INTO CharacterCards (name, description, personality, scenario, image,
270
+ post_history_instructions, first_mes, mes_example, creator_notes, system_prompt,
271
+ alternate_greetings, tags, creator, character_version, extensions)
272
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
273
+ """
274
+ cursor.execute(insert_query, (
275
+ parsed_card['name'], parsed_card['description'], parsed_card['personality'],
276
+ parsed_card['scenario'], parsed_card['image'], parsed_card['post_history_instructions'],
277
+ parsed_card['first_mes'], parsed_card['mes_example'], parsed_card['creator_notes'],
278
+ parsed_card['system_prompt'], parsed_card['alternate_greetings'], parsed_card['tags'],
279
+ parsed_card['creator'], parsed_card['character_version'], parsed_card['extensions']
280
+ ))
281
+ character_id = cursor.lastrowid
282
+
283
+ conn.commit()
284
+ return character_id
285
+ except sqlite3.IntegrityError as e:
286
+ logging.error(f"Error adding character card: {e}")
287
+ return None
288
+ except Exception as e:
289
+ logging.error(f"Unexpected error adding character card: {e}")
290
+ return None
291
+ finally:
292
+ conn.close()
293
+
294
+ # def add_character_card(card_data: Dict) -> Optional[int]:
295
+ # """Add or update a character card in the database.
296
+ #
297
+ # Returns the ID of the inserted character or None if failed.
298
+ # """
299
+ # conn = sqlite3.connect(chat_DB_PATH)
300
+ # cursor = conn.cursor()
301
+ # try:
302
+ # # Ensure all required fields are present
303
+ # required_fields = ['name', 'description', 'personality', 'scenario', 'image', 'post_history_instructions', 'first_message']
304
+ # for field in required_fields:
305
+ # if field not in card_data:
306
+ # card_data[field] = '' # Assign empty string if field is missing
307
+ #
308
+ # # Check if character already exists
309
+ # cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (card_data['name'],))
310
+ # row = cursor.fetchone()
311
+ #
312
+ # if row:
313
+ # # Update existing character
314
+ # character_id = row[0]
315
+ # cursor.execute("""
316
+ # UPDATE CharacterCards
317
+ # SET description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
318
+ # WHERE id = ?
319
+ # """, (
320
+ # card_data['description'],
321
+ # card_data['personality'],
322
+ # card_data['scenario'],
323
+ # card_data['image'],
324
+ # card_data['post_history_instructions'],
325
+ # card_data['first_message'],
326
+ # character_id
327
+ # ))
328
+ # else:
329
+ # # Insert new character
330
+ # cursor.execute("""
331
+ # INSERT INTO CharacterCards (name, description, personality, scenario, image, post_history_instructions, first_message)
332
+ # VALUES (?, ?, ?, ?, ?, ?, ?)
333
+ # """, (
334
+ # card_data['name'],
335
+ # card_data['description'],
336
+ # card_data['personality'],
337
+ # card_data['scenario'],
338
+ # card_data['image'],
339
+ # card_data['post_history_instructions'],
340
+ # card_data['first_message']
341
+ # ))
342
+ # character_id = cursor.lastrowid
343
+ #
344
+ # conn.commit()
345
+ # return cursor.lastrowid
346
+ # except sqlite3.IntegrityError as e:
347
+ # logging.error(f"Error adding character card: {e}")
348
+ # return None
349
+ # except Exception as e:
350
+ # logging.error(f"Unexpected error adding character card: {e}")
351
+ # return None
352
+ # finally:
353
+ # conn.close()
354
+
355
+
356
+ def get_character_cards() -> List[Dict]:
357
+ """Retrieve all character cards from the database."""
358
+ logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
359
+ conn = sqlite3.connect(chat_DB_PATH)
360
+ cursor = conn.cursor()
361
+ cursor.execute("SELECT * FROM CharacterCards")
362
+ rows = cursor.fetchall()
363
+ columns = [description[0] for description in cursor.description]
364
+ conn.close()
365
+ characters = [dict(zip(columns, row)) for row in rows]
366
+ #logging.debug(f"Characters fetched from DB: {characters}")
367
+ return characters
368
+
369
+
370
+ def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
371
+ """
372
+ Retrieve a single character card by its ID.
373
+
374
+ Args:
375
+ character_id: Can be either an integer ID or a dictionary containing character data.
376
+
377
+ Returns:
378
+ A dictionary containing the character card data, or None if not found.
379
+ """
380
+ conn = sqlite3.connect(chat_DB_PATH)
381
+ cursor = conn.cursor()
382
+ try:
383
+ if isinstance(character_id, dict):
384
+ # If a dictionary is passed, assume it's already a character card
385
+ return character_id
386
+ elif isinstance(character_id, int):
387
+ # If an integer is passed, fetch the character from the database
388
+ cursor.execute("SELECT * FROM CharacterCards WHERE id = ?", (character_id,))
389
+ row = cursor.fetchone()
390
+ if row:
391
+ columns = [description[0] for description in cursor.description]
392
+ return dict(zip(columns, row))
393
+ else:
394
+ logging.warning(f"Invalid type for character_id: {type(character_id)}")
395
+ return None
396
+ except Exception as e:
397
+ logging.error(f"Error in get_character_card_by_id: {e}")
398
+ return None
399
+ finally:
400
+ conn.close()
401
+
402
+
403
+ def update_character_card(character_id: int, card_data: Dict) -> bool:
404
+ """Update an existing character card."""
405
+ conn = sqlite3.connect(chat_DB_PATH)
406
+ cursor = conn.cursor()
407
+ try:
408
+ cursor.execute("""
409
+ UPDATE CharacterCards
410
+ SET name = ?, description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
411
+ WHERE id = ?
412
+ """, (
413
+ card_data.get('name'),
414
+ card_data.get('description'),
415
+ card_data.get('personality'),
416
+ card_data.get('scenario'),
417
+ card_data.get('image'),
418
+ card_data.get('post_history_instructions', ''),
419
+ card_data.get('first_message', "Hello! I'm ready to chat."),
420
+ character_id
421
+ ))
422
+ conn.commit()
423
+ return cursor.rowcount > 0
424
+ except sqlite3.IntegrityError as e:
425
+ logging.error(f"Error updating character card: {e}")
426
+ return False
427
+ finally:
428
+ conn.close()
429
+
430
+
431
+ def delete_character_card(character_id: int) -> bool:
432
+ """Delete a character card and its associated chats."""
433
+ conn = sqlite3.connect(chat_DB_PATH)
434
+ cursor = conn.cursor()
435
+ try:
436
+ # Delete associated chats first due to foreign key constraint
437
+ cursor.execute("DELETE FROM CharacterChats WHERE character_id = ?", (character_id,))
438
+ cursor.execute("DELETE FROM CharacterCards WHERE id = ?", (character_id,))
439
+ conn.commit()
440
+ return cursor.rowcount > 0
441
+ except sqlite3.Error as e:
442
+ logging.error(f"Error deleting character card: {e}")
443
+ return False
444
+ finally:
445
+ conn.close()
446
+
447
+
448
+ def add_character_chat(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]], keywords: Optional[List[str]] = None, is_snapshot: bool = False) -> Optional[int]:
449
+ """
450
+ Add a new chat history for a character, optionally associating keywords.
451
+
452
+ Args:
453
+ character_id (int): The ID of the character.
454
+ conversation_name (str): Name of the conversation.
455
+ chat_history (List[Tuple[str, str]]): List of (user, bot) message tuples.
456
+ keywords (Optional[List[str]]): List of keywords to associate with this chat.
457
+ is_snapshot (bool, optional): Whether this chat is a snapshot.
458
+
459
+ Returns:
460
+ Optional[int]: The ID of the inserted chat or None if failed.
461
+ """
462
+ conn = sqlite3.connect(chat_DB_PATH)
463
+ cursor = conn.cursor()
464
+ try:
465
+ chat_history_json = json.dumps(chat_history)
466
+ cursor.execute("""
467
+ INSERT INTO CharacterChats (character_id, conversation_name, chat_history, is_snapshot)
468
+ VALUES (?, ?, ?, ?)
469
+ """, (
470
+ character_id,
471
+ conversation_name,
472
+ chat_history_json,
473
+ is_snapshot
474
+ ))
475
+ chat_id = cursor.lastrowid
476
+
477
+ if keywords:
478
+ # Insert keywords into ChatKeywords table
479
+ keyword_records = [(chat_id, keyword.strip().lower()) for keyword in keywords]
480
+ cursor.executemany("""
481
+ INSERT INTO ChatKeywords (chat_id, keyword)
482
+ VALUES (?, ?)
483
+ """, keyword_records)
484
+
485
+ conn.commit()
486
+ return chat_id
487
+ except sqlite3.Error as e:
488
+ logging.error(f"Error adding character chat: {e}")
489
+ return None
490
+ finally:
491
+ conn.close()
492
+
493
+
494
+ def get_character_chats(character_id: Optional[int] = None) -> List[Dict]:
495
+ """Retrieve all chats, or chats for a specific character if character_id is provided."""
496
+ conn = sqlite3.connect(chat_DB_PATH)
497
+ cursor = conn.cursor()
498
+ if character_id is not None:
499
+ cursor.execute("SELECT * FROM CharacterChats WHERE character_id = ?", (character_id,))
500
+ else:
501
+ cursor.execute("SELECT * FROM CharacterChats")
502
+ rows = cursor.fetchall()
503
+ columns = [description[0] for description in cursor.description]
504
+ conn.close()
505
+ return [dict(zip(columns, row)) for row in rows]
506
+
507
+
508
+ def get_character_chat_by_id(chat_id: int) -> Optional[Dict]:
509
+ """Retrieve a single chat by its ID."""
510
+ conn = sqlite3.connect(chat_DB_PATH)
511
+ cursor = conn.cursor()
512
+ cursor.execute("SELECT * FROM CharacterChats WHERE id = ?", (chat_id,))
513
+ row = cursor.fetchone()
514
+ conn.close()
515
+ if row:
516
+ columns = [description[0] for description in cursor.description]
517
+ chat = dict(zip(columns, row))
518
+ chat['chat_history'] = json.loads(chat['chat_history'])
519
+ return chat
520
+ return None
521
+
522
+
523
+ def search_character_chats(query: str, character_id: Optional[int] = None) -> Tuple[List[Dict], str]:
524
+ """
525
+ Search for character chats using FTS5, optionally filtered by character_id.
526
+
527
+ Args:
528
+ query (str): The search query.
529
+ character_id (Optional[int]): The ID of the character to filter chats by.
530
+
531
+ Returns:
532
+ Tuple[List[Dict], str]: A list of matching chats and a status message.
533
+ """
534
+ if not query.strip():
535
+ return [], "Please enter a search query."
536
+
537
+ conn = sqlite3.connect(chat_DB_PATH)
538
+ cursor = conn.cursor()
539
+ try:
540
+ if character_id is not None:
541
+ # Search with character_id filter
542
+ cursor.execute("""
543
+ SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
544
+ FROM CharacterChats_fts
545
+ JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
546
+ WHERE CharacterChats_fts MATCH ? AND CharacterChats.character_id = ?
547
+ ORDER BY rank
548
+ """, (query, character_id))
549
+ else:
550
+ # Search without character_id filter
551
+ cursor.execute("""
552
+ SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
553
+ FROM CharacterChats_fts
554
+ JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
555
+ WHERE CharacterChats_fts MATCH ?
556
+ ORDER BY rank
557
+ """, (query,))
558
+
559
+ rows = cursor.fetchall()
560
+ columns = [description[0] for description in cursor.description]
561
+ results = [dict(zip(columns, row)) for row in rows]
562
+
563
+ if character_id is not None:
564
+ status_message = f"Found {len(results)} chat(s) matching '{query}' for the selected character."
565
+ else:
566
+ status_message = f"Found {len(results)} chat(s) matching '{query}' across all characters."
567
+
568
+ return results, status_message
569
+ except Exception as e:
570
+ logging.error(f"Error searching chats with FTS5: {e}")
571
+ return [], f"Error occurred during search: {e}"
572
+ finally:
573
+ conn.close()
574
+
575
+ def update_character_chat(chat_id: int, chat_history: List[Tuple[str, str]]) -> bool:
576
+ """Update an existing chat history."""
577
+ conn = sqlite3.connect(chat_DB_PATH)
578
+ cursor = conn.cursor()
579
+ try:
580
+ chat_history_json = json.dumps(chat_history)
581
+ cursor.execute("""
582
+ UPDATE CharacterChats
583
+ SET chat_history = ?
584
+ WHERE id = ?
585
+ """, (
586
+ chat_history_json,
587
+ chat_id
588
+ ))
589
+ conn.commit()
590
+ return cursor.rowcount > 0
591
+ except sqlite3.Error as e:
592
+ logging.error(f"Error updating character chat: {e}")
593
+ return False
594
+ finally:
595
+ conn.close()
596
+
597
+
598
+ def delete_character_chat(chat_id: int) -> bool:
599
+ """Delete a specific chat."""
600
+ conn = sqlite3.connect(chat_DB_PATH)
601
+ cursor = conn.cursor()
602
+ try:
603
+ cursor.execute("DELETE FROM CharacterChats WHERE id = ?", (chat_id,))
604
+ conn.commit()
605
+ return cursor.rowcount > 0
606
+ except sqlite3.Error as e:
607
+ logging.error(f"Error deleting character chat: {e}")
608
+ return False
609
+ finally:
610
+ conn.close()
611
+
612
+
613
+ def fetch_keywords_for_chats(keywords: List[str]) -> List[int]:
614
+ """
615
+ Fetch chat IDs associated with any of the specified keywords.
616
+
617
+ Args:
618
+ keywords (List[str]): List of keywords to search for.
619
+
620
+ Returns:
621
+ List[int]: List of chat IDs associated with the keywords.
622
+ """
623
+ if not keywords:
624
+ return []
625
+
626
+ conn = sqlite3.connect(chat_DB_PATH)
627
+ cursor = conn.cursor()
628
+ try:
629
+ # Construct the WHERE clause to search for each keyword
630
+ keyword_clauses = " OR ".join(["keyword = ?"] * len(keywords))
631
+ sql_query = f"SELECT DISTINCT chat_id FROM ChatKeywords WHERE {keyword_clauses}"
632
+ cursor.execute(sql_query, keywords)
633
+ rows = cursor.fetchall()
634
+ chat_ids = [row[0] for row in rows]
635
+ return chat_ids
636
+ except Exception as e:
637
+ logging.error(f"Error in fetch_keywords_for_chats: {e}")
638
+ return []
639
+ finally:
640
+ conn.close()
641
+
642
+
643
+ def save_chat_history_to_character_db(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]]) -> Optional[int]:
644
+ """Save chat history to the CharacterChats table.
645
+
646
+ Returns the ID of the inserted chat or None if failed.
647
+ """
648
+ return add_character_chat(character_id, conversation_name, chat_history)
649
+
650
+
651
+ def search_db(query: str, fields: List[str], where_clause: str = "", page: int = 1, results_per_page: int = 5) -> List[Dict[str, Any]]:
652
+ """
653
+ Perform a full-text search on specified fields with optional filtering and pagination.
654
+
655
+ Args:
656
+ query (str): The search query.
657
+ fields (List[str]): List of fields to search in.
658
+ where_clause (str, optional): Additional SQL WHERE clause to filter results.
659
+ page (int, optional): Page number for pagination.
660
+ results_per_page (int, optional): Number of results per page.
661
+
662
+ Returns:
663
+ List[Dict[str, Any]]: List of matching chat records with content and metadata.
664
+ """
665
+ if not query.strip():
666
+ return []
667
+
668
+ conn = sqlite3.connect(chat_DB_PATH)
669
+ cursor = conn.cursor()
670
+ try:
671
+ # Construct the MATCH query for FTS5
672
+ match_query = " AND ".join(fields) + f" MATCH ?"
673
+ # Adjust the query with the fields
674
+ fts_query = f"""
675
+ SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
676
+ FROM CharacterChats_fts
677
+ JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
678
+ WHERE {match_query}
679
+ """
680
+ if where_clause:
681
+ fts_query += f" AND ({where_clause})"
682
+ fts_query += " ORDER BY rank LIMIT ? OFFSET ?"
683
+ offset = (page - 1) * results_per_page
684
+ cursor.execute(fts_query, (query, results_per_page, offset))
685
+ rows = cursor.fetchall()
686
+ columns = [description[0] for description in cursor.description]
687
+ results = [dict(zip(columns, row)) for row in rows]
688
+ return results
689
+ except Exception as e:
690
+ logging.error(f"Error in search_db: {e}")
691
+ return []
692
+ finally:
693
+ conn.close()
694
+
695
+
696
+ def perform_full_text_search_chat(query: str, relevant_chat_ids: List[int], page: int = 1, results_per_page: int = 5) -> \
697
+ List[Dict[str, Any]]:
698
+ """
699
+ Perform a full-text search within the specified chat IDs using FTS5.
700
+
701
+ Args:
702
+ query (str): The user's query.
703
+ relevant_chat_ids (List[int]): List of chat IDs to search within.
704
+ page (int): Pagination page number.
705
+ results_per_page (int): Number of results per page.
706
+
707
+ Returns:
708
+ List[Dict[str, Any]]: List of search results with content and metadata.
709
+ """
710
+ try:
711
+ # Construct a WHERE clause to limit the search to relevant chat IDs
712
+ where_clause = " OR ".join([f"media_id = {chat_id}" for chat_id in relevant_chat_ids])
713
+ if not where_clause:
714
+ where_clause = "1" # No restriction if no chat IDs
715
+
716
+ # Perform full-text search using FTS5
717
+ fts_results = search_db(query, ["content"], where_clause, page=page, results_per_page=results_per_page)
718
+
719
+ filtered_fts_results = [
720
+ {
721
+ "content": result['content'],
722
+ "metadata": {"media_id": result['id']}
723
+ }
724
+ for result in fts_results
725
+ if result['id'] in relevant_chat_ids
726
+ ]
727
+ return filtered_fts_results
728
+ except Exception as e:
729
+ logging.error(f"Error in perform_full_text_search_chat: {str(e)}")
730
+ return []
731
+
732
+
733
+ def fetch_all_chats() -> List[Dict[str, Any]]:
734
+ """
735
+ Fetch all chat messages from the database.
736
+
737
+ Returns:
738
+ List[Dict[str, Any]]: List of chat messages with relevant metadata.
739
+ """
740
+ try:
741
+ chats = get_character_chats() # Modify this function to retrieve all chats
742
+ return chats
743
+ except Exception as e:
744
+ logging.error(f"Error fetching all chats: {str(e)}")
745
+ return []
746
+
747
+
748
+ def search_character_chat(query: str, fts_top_k: int = 10, relevant_media_ids: List[str] = None) -> List[Dict[str, Any]]:
749
+ """
750
+ Perform a full-text search on the Character Chat database.
751
+
752
+ Args:
753
+ query: Search query string.
754
+ fts_top_k: Maximum number of results to return.
755
+ relevant_media_ids: Optional list of character IDs to filter results.
756
+
757
+ Returns:
758
+ List of search results with content and metadata.
759
+ """
760
+ if not query.strip():
761
+ return []
762
+
763
+ try:
764
+ # Construct a WHERE clause to limit the search to relevant character IDs
765
+ where_clause = ""
766
+ if relevant_media_ids:
767
+ placeholders = ','.join(['?'] * len(relevant_media_ids))
768
+ where_clause = f"CharacterChats.character_id IN ({placeholders})"
769
+
770
+ # Perform full-text search using existing search_db function
771
+ results = search_db(query, ["conversation_name", "chat_history"], where_clause, results_per_page=fts_top_k)
772
+
773
+ # Format results
774
+ formatted_results = []
775
+ for r in results:
776
+ formatted_results.append({
777
+ "content": r['chat_history'],
778
+ "metadata": {
779
+ "chat_id": r['id'],
780
+ "conversation_name": r['conversation_name'],
781
+ "character_id": r['character_id']
782
+ }
783
+ })
784
+
785
+ return formatted_results
786
+
787
+ except Exception as e:
788
+ logging.error(f"Error in search_character_chat: {e}")
789
+ return []
790
+
791
+
792
+ def search_character_cards(query: str, fts_top_k: int = 10, relevant_media_ids: List[str] = None) -> List[Dict[str, Any]]:
793
+ """
794
+ Perform a full-text search on the Character Cards database.
795
+
796
+ Args:
797
+ query: Search query string.
798
+ fts_top_k: Maximum number of results to return.
799
+ relevant_media_ids: Optional list of character IDs to filter results.
800
+
801
+ Returns:
802
+ List of search results with content and metadata.
803
+ """
804
+ if not query.strip():
805
+ return []
806
+
807
+ try:
808
+ conn = sqlite3.connect(chat_DB_PATH)
809
+ cursor = conn.cursor()
810
+
811
+ # Construct the query
812
+ sql_query = """
813
+ SELECT CharacterCards.id, CharacterCards.name, CharacterCards.description, CharacterCards.personality, CharacterCards.scenario
814
+ FROM CharacterCards_fts
815
+ JOIN CharacterCards ON CharacterCards_fts.rowid = CharacterCards.id
816
+ WHERE CharacterCards_fts MATCH ?
817
+ """
818
+
819
+ params = [query]
820
+
821
+ # Add filtering by character IDs if provided
822
+ if relevant_media_ids:
823
+ placeholders = ','.join(['?'] * len(relevant_media_ids))
824
+ sql_query += f" AND CharacterCards.id IN ({placeholders})"
825
+ params.extend(relevant_media_ids)
826
+
827
+ sql_query += " LIMIT ?"
828
+ params.append(fts_top_k)
829
+
830
+ cursor.execute(sql_query, params)
831
+ rows = cursor.fetchall()
832
+ columns = [description[0] for description in cursor.description]
833
+
834
+ results = [dict(zip(columns, row)) for row in rows]
835
+
836
+ # Format results
837
+ formatted_results = []
838
+ for r in results:
839
+ content = f"Name: {r['name']}\nDescription: {r['description']}\nPersonality: {r['personality']}\nScenario: {r['scenario']}"
840
+ formatted_results.append({
841
+ "content": content,
842
+ "metadata": {
843
+ "character_id": r['id'],
844
+ "name": r['name']
845
+ }
846
+ })
847
+
848
+ return formatted_results
849
+
850
+ except Exception as e:
851
+ logging.error(f"Error in search_character_cards: {e}")
852
+ return []
853
+ finally:
854
+ conn.close()
855
+
856
+
857
+ def fetch_character_ids_by_keywords(keywords: List[str]) -> List[int]:
858
+ """
859
+ Fetch character IDs associated with any of the specified keywords.
860
+
861
+ Args:
862
+ keywords (List[str]): List of keywords to search for.
863
+
864
+ Returns:
865
+ List[int]: List of character IDs associated with the keywords.
866
+ """
867
+ if not keywords:
868
+ return []
869
+
870
+ conn = sqlite3.connect(chat_DB_PATH)
871
+ cursor = conn.cursor()
872
+ try:
873
+ # Assuming 'tags' column in CharacterCards table stores tags as JSON array
874
+ placeholders = ','.join(['?'] * len(keywords))
875
+ sql_query = f"""
876
+ SELECT DISTINCT id FROM CharacterCards
877
+ WHERE EXISTS (
878
+ SELECT 1 FROM json_each(tags)
879
+ WHERE json_each.value IN ({placeholders})
880
+ )
881
+ """
882
+ cursor.execute(sql_query, keywords)
883
+ rows = cursor.fetchall()
884
+ character_ids = [row[0] for row in rows]
885
+ return character_ids
886
+ except Exception as e:
887
+ logging.error(f"Error in fetch_character_ids_by_keywords: {e}")
888
+ return []
889
+ finally:
890
+ conn.close()
891
+
892
+
893
+ ###################################################################
894
+ #
895
+ # Character Keywords
896
+
897
+ def view_char_keywords():
898
+ try:
899
+ with sqlite3.connect(chat_DB_PATH) as conn:
900
+ cursor = conn.cursor()
901
+ cursor.execute("""
902
+ SELECT DISTINCT keyword
903
+ FROM CharacterCards
904
+ CROSS JOIN json_each(tags)
905
+ WHERE json_valid(tags)
906
+ ORDER BY keyword
907
+ """)
908
+ keywords = cursor.fetchall()
909
+ if keywords:
910
+ keyword_list = [k[0] for k in keywords]
911
+ return "### Current Character Keywords:\n" + "\n".join(
912
+ [f"- {k}" for k in keyword_list])
913
+ return "No keywords found."
914
+ except Exception as e:
915
+ return f"Error retrieving keywords: {str(e)}"
916
+
917
+
918
+ def add_char_keywords(name: str, keywords: str):
919
+ try:
920
+ keywords_list = [k.strip() for k in keywords.split(",") if k.strip()]
921
+ with sqlite3.connect('character_chat.db') as conn:
922
+ cursor = conn.cursor()
923
+ cursor.execute(
924
+ "SELECT tags FROM CharacterCards WHERE name = ?",
925
+ (name,)
926
+ )
927
+ result = cursor.fetchone()
928
+ if not result:
929
+ return "Character not found."
930
+
931
+ current_tags = result[0] if result[0] else "[]"
932
+ current_keywords = set(current_tags[1:-1].split(',')) if current_tags != "[]" else set()
933
+ updated_keywords = current_keywords.union(set(keywords_list))
934
+
935
+ cursor.execute(
936
+ "UPDATE CharacterCards SET tags = ? WHERE name = ?",
937
+ (str(list(updated_keywords)), name)
938
+ )
939
+ conn.commit()
940
+ return f"Successfully added keywords to character {name}"
941
+ except Exception as e:
942
+ return f"Error adding keywords: {str(e)}"
943
+
944
+
945
+ def delete_char_keyword(char_name: str, keyword: str) -> str:
946
+ """
947
+ Delete a keyword from a character's tags.
948
+
949
+ Args:
950
+ char_name (str): The name of the character
951
+ keyword (str): The keyword to delete
952
+
953
+ Returns:
954
+ str: Success/failure message
955
+ """
956
+ try:
957
+ with sqlite3.connect(chat_DB_PATH) as conn:
958
+ cursor = conn.cursor()
959
+
960
+ # First, check if the character exists
961
+ cursor.execute("SELECT tags FROM CharacterCards WHERE name = ?", (char_name,))
962
+ result = cursor.fetchone()
963
+
964
+ if not result:
965
+ return f"Character '{char_name}' not found."
966
+
967
+ # Parse existing tags
968
+ current_tags = json.loads(result[0]) if result[0] else []
969
+
970
+ if keyword not in current_tags:
971
+ return f"Keyword '{keyword}' not found in character '{char_name}' tags."
972
+
973
+ # Remove the keyword
974
+ updated_tags = [tag for tag in current_tags if tag != keyword]
975
+
976
+ # Update the character's tags
977
+ cursor.execute(
978
+ "UPDATE CharacterCards SET tags = ? WHERE name = ?",
979
+ (json.dumps(updated_tags), char_name)
980
+ )
981
+ conn.commit()
982
+
983
+ logging.info(f"Keyword '{keyword}' deleted from character '{char_name}'")
984
+ return f"Successfully deleted keyword '{keyword}' from character '{char_name}'."
985
+
986
+ except Exception as e:
987
+ error_msg = f"Error deleting keyword: {str(e)}"
988
+ logging.error(error_msg)
989
+ return error_msg
990
+
991
+
992
+ def export_char_keywords_to_csv() -> Tuple[str, str]:
993
+ """
994
+ Export all character keywords to a CSV file with associated metadata.
995
+
996
+ Returns:
997
+ Tuple[str, str]: (status_message, file_path)
998
+ """
999
+ import csv
1000
+ from tempfile import NamedTemporaryFile
1001
+ from datetime import datetime
1002
+
1003
+ try:
1004
+ # Create a temporary CSV file
1005
+ temp_file = NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', newline='')
1006
+
1007
+ with sqlite3.connect(chat_DB_PATH) as conn:
1008
+ cursor = conn.cursor()
1009
+
1010
+ # Get all characters and their tags
1011
+ cursor.execute("""
1012
+ SELECT
1013
+ name,
1014
+ tags,
1015
+ (SELECT COUNT(*) FROM CharacterChats WHERE CharacterChats.character_id = CharacterCards.id) as chat_count
1016
+ FROM CharacterCards
1017
+ WHERE json_valid(tags)
1018
+ ORDER BY name
1019
+ """)
1020
+
1021
+ results = cursor.fetchall()
1022
+
1023
+ # Process the results to create rows for the CSV
1024
+ csv_rows = []
1025
+ for name, tags_json, chat_count in results:
1026
+ tags = json.loads(tags_json) if tags_json else []
1027
+ for tag in tags:
1028
+ csv_rows.append([
1029
+ tag, # keyword
1030
+ name, # character name
1031
+ chat_count # number of chats
1032
+ ])
1033
+
1034
+ # Write to CSV
1035
+ writer = csv.writer(temp_file)
1036
+ writer.writerow(['Keyword', 'Character Name', 'Number of Chats'])
1037
+ writer.writerows(csv_rows)
1038
+
1039
+ temp_file.close()
1040
+
1041
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1042
+ status_msg = f"Successfully exported {len(csv_rows)} character keyword entries to CSV."
1043
+ logging.info(status_msg)
1044
+
1045
+ return status_msg, temp_file.name
1046
+
1047
+ except Exception as e:
1048
+ error_msg = f"Error exporting keywords: {str(e)}"
1049
+ logging.error(error_msg)
1050
+ return error_msg, ""
1051
+
1052
+ #
1053
+ # End of Character chat keyword functions
1054
+ ######################################################
1055
+
1056
+
1057
+ #
1058
+ # End of Character_Chat_DB.py
1059
+ #######################################################################################################################
App_Function_Libraries/DB/DB_Backups.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backup_Manager.py
2
+ #
3
+ # Imports:
4
+ import os
5
+ import shutil
6
+ import sqlite3
7
+ from datetime import datetime
8
+ import logging
9
+ #
10
+ # Local Imports:
11
+ from App_Function_Libraries.DB.Character_Chat_DB import chat_DB_PATH
12
+ from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_rag_qa_db_path
13
+ from App_Function_Libraries.Utils.Utils import get_project_relative_path
14
+ #
15
+ # End of Imports
16
+ #######################################################################################################################
17
+ #
18
+ # Functions:
19
+
20
+ def init_backup_directory(backup_base_dir: str, db_name: str) -> str:
21
+ """Initialize backup directory for a specific database."""
22
+ backup_dir = os.path.join(backup_base_dir, db_name)
23
+ os.makedirs(backup_dir, exist_ok=True)
24
+ return backup_dir
25
+
26
+
27
+ def create_backup(db_path: str, backup_dir: str, db_name: str) -> str:
28
+ """Create a full backup of the database."""
29
+ try:
30
+ db_path = os.path.abspath(db_path)
31
+ backup_dir = os.path.abspath(backup_dir)
32
+
33
+ logging.info(f"Creating backup:")
34
+ logging.info(f" DB Path: {db_path}")
35
+ logging.info(f" Backup Dir: {backup_dir}")
36
+ logging.info(f" DB Name: {db_name}")
37
+
38
+ # Create subdirectory based on db_name
39
+ specific_backup_dir = os.path.join(backup_dir, db_name)
40
+ os.makedirs(specific_backup_dir, exist_ok=True)
41
+
42
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
43
+ backup_file = os.path.join(specific_backup_dir, f"{db_name}_backup_{timestamp}.db")
44
+ logging.info(f" Full backup path: {backup_file}")
45
+
46
+ # Create a backup using SQLite's backup API
47
+ with sqlite3.connect(db_path) as source, \
48
+ sqlite3.connect(backup_file) as target:
49
+ source.backup(target)
50
+
51
+ logging.info(f"Backup created successfully: {backup_file}")
52
+ return f"Backup created: {backup_file}"
53
+ except Exception as e:
54
+ error_msg = f"Failed to create backup: {str(e)}"
55
+ logging.error(error_msg)
56
+ return error_msg
57
+
58
+
59
+ def create_incremental_backup(db_path: str, backup_dir: str, db_name: str) -> str:
60
+ """Create an incremental backup using VACUUM INTO."""
61
+ try:
62
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
63
+ backup_file = os.path.join(backup_dir,
64
+ f"{db_name}_incremental_{timestamp}.sqlib")
65
+
66
+ with sqlite3.connect(db_path) as conn:
67
+ conn.execute(f"VACUUM INTO '{backup_file}'")
68
+
69
+ logging.info(f"Incremental backup created: {backup_file}")
70
+ return f"Incremental backup created: {backup_file}"
71
+ except Exception as e:
72
+ error_msg = f"Failed to create incremental backup: {str(e)}"
73
+ logging.error(error_msg)
74
+ return error_msg
75
+
76
+
77
+ def list_backups(backup_dir: str) -> str:
78
+ """List all available backups."""
79
+ try:
80
+ backups = [f for f in os.listdir(backup_dir)
81
+ if f.endswith(('.db', '.sqlib'))]
82
+ backups.sort(reverse=True) # Most recent first
83
+ return "\n".join(backups) if backups else "No backups found"
84
+ except Exception as e:
85
+ error_msg = f"Failed to list backups: {str(e)}"
86
+ logging.error(error_msg)
87
+ return error_msg
88
+
89
+
90
+ def restore_single_db_backup(db_path: str, backup_dir: str, db_name: str, backup_name: str) -> str:
91
+ """Restore database from a backup file."""
92
+ try:
93
+ logging.info(f"Restoring backup: {backup_name}")
94
+ backup_path = os.path.join(backup_dir, backup_name)
95
+ if not os.path.exists(backup_path):
96
+ logging.error(f"Backup file not found: {backup_name}")
97
+ return f"Backup file not found: {backup_name}"
98
+
99
+ # Create a timestamp for the current db
100
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
101
+ current_backup = os.path.join(backup_dir,
102
+ f"{db_name}_pre_restore_{timestamp}.db")
103
+
104
+ # Backup current database before restore
105
+ logging.info(f"Creating backup of current database: {current_backup}")
106
+ shutil.copy2(db_path, current_backup)
107
+
108
+ # Restore the backup
109
+ logging.info(f"Restoring database from {backup_name}")
110
+ shutil.copy2(backup_path, db_path)
111
+
112
+ logging.info(f"Database restored from {backup_name}")
113
+ return f"Database restored from {backup_name}"
114
+ except Exception as e:
115
+ error_msg = f"Failed to restore backup: {str(e)}"
116
+ logging.error(error_msg)
117
+ return error_msg
118
+
119
+
120
+ def setup_backup_config():
121
+ """Setup configuration for database backups."""
122
+ backup_base_dir = get_project_relative_path('tldw_DB_Backups')
123
+ logging.info(f"Base backup directory: {os.path.abspath(backup_base_dir)}")
124
+
125
+ # RAG Chat DB configuration
126
+ rag_db_path = get_rag_qa_db_path()
127
+ rag_backup_dir = os.path.join(backup_base_dir, 'rag_chat')
128
+ os.makedirs(rag_backup_dir, exist_ok=True)
129
+ logging.info(f"RAG backup directory: {os.path.abspath(rag_backup_dir)}")
130
+
131
+ rag_db_config = {
132
+ 'db_path': rag_db_path,
133
+ 'backup_dir': rag_backup_dir, # Make sure we use the full path
134
+ 'db_name': 'rag_qa'
135
+ }
136
+
137
+ # Character Chat DB configuration
138
+ char_backup_dir = os.path.join(backup_base_dir, 'character_chat')
139
+ os.makedirs(char_backup_dir, exist_ok=True)
140
+ logging.info(f"Character backup directory: {os.path.abspath(char_backup_dir)}")
141
+
142
+ char_db_config = {
143
+ 'db_path': chat_DB_PATH,
144
+ 'backup_dir': char_backup_dir, # Make sure we use the full path
145
+ 'db_name': 'chatDB'
146
+ }
147
+
148
+ # Media DB configuration (based on your logs)
149
+ media_backup_dir = os.path.join(backup_base_dir, 'media')
150
+ os.makedirs(media_backup_dir, exist_ok=True)
151
+ logging.info(f"Media backup directory: {os.path.abspath(media_backup_dir)}")
152
+
153
+ media_db_config = {
154
+ 'db_path': os.path.join(os.path.dirname(chat_DB_PATH), 'media_summary.db'),
155
+ 'backup_dir': media_backup_dir,
156
+ 'db_name': 'media'
157
+ }
158
+
159
+ return rag_db_config, char_db_config, media_db_config
160
+
App_Function_Libraries/DB/DB_Manager.py CHANGED
@@ -13,11 +13,14 @@ from elasticsearch import Elasticsearch
13
  #
14
  # Import your existing SQLite functions
15
  from App_Function_Libraries.DB.SQLite_DB import DatabaseError
 
 
 
 
 
16
  from App_Function_Libraries.DB.SQLite_DB import (
17
  update_media_content as sqlite_update_media_content,
18
- list_prompts as sqlite_list_prompts,
19
  search_and_display as sqlite_search_and_display,
20
- fetch_prompt_details as sqlite_fetch_prompt_details,
21
  keywords_browser_interface as sqlite_keywords_browser_interface,
22
  add_keyword as sqlite_add_keyword,
23
  delete_keyword as sqlite_delete_keyword,
@@ -25,31 +28,17 @@ from App_Function_Libraries.DB.SQLite_DB import (
25
  ingest_article_to_db as sqlite_ingest_article_to_db,
26
  add_media_to_database as sqlite_add_media_to_database,
27
  import_obsidian_note_to_db as sqlite_import_obsidian_note_to_db,
28
- add_prompt as sqlite_add_prompt,
29
- delete_chat_message as sqlite_delete_chat_message,
30
- update_chat_message as sqlite_update_chat_message,
31
- add_chat_message as sqlite_add_chat_message,
32
- get_chat_messages as sqlite_get_chat_messages,
33
- search_chat_conversations as sqlite_search_chat_conversations,
34
- create_chat_conversation as sqlite_create_chat_conversation,
35
- save_chat_history_to_database as sqlite_save_chat_history_to_database,
36
  view_database as sqlite_view_database,
37
  get_transcripts as sqlite_get_transcripts,
38
  get_trashed_items as sqlite_get_trashed_items,
39
  user_delete_item as sqlite_user_delete_item,
40
  empty_trash as sqlite_empty_trash,
41
  create_automated_backup as sqlite_create_automated_backup,
42
- add_or_update_prompt as sqlite_add_or_update_prompt,
43
- load_prompt_details as sqlite_load_prompt_details,
44
- load_preset_prompts as sqlite_load_preset_prompts,
45
- insert_prompt_to_db as sqlite_insert_prompt_to_db,
46
- delete_prompt as sqlite_delete_prompt,
47
  search_and_display_items as sqlite_search_and_display_items,
48
- get_conversation_name as sqlite_get_conversation_name,
49
  add_media_with_keywords as sqlite_add_media_with_keywords,
50
  check_media_and_whisper_model as sqlite_check_media_and_whisper_model, \
51
  create_document_version as sqlite_create_document_version,
52
- get_document_version as sqlite_get_document_version, sqlite_search_db, add_media_chunk as sqlite_add_media_chunk,
53
  sqlite_update_fts_for_media, get_unprocessed_media as sqlite_get_unprocessed_media, fetch_item_details as sqlite_fetch_item_details, \
54
  search_media_database as sqlite_search_media_database, mark_as_trash as sqlite_mark_as_trash, \
55
  get_media_transcripts as sqlite_get_media_transcripts, get_specific_transcript as sqlite_get_specific_transcript, \
@@ -60,23 +49,35 @@ from App_Function_Libraries.DB.SQLite_DB import (
60
  delete_specific_prompt as sqlite_delete_specific_prompt,
61
  fetch_keywords_for_media as sqlite_fetch_keywords_for_media, \
62
  update_keywords_for_media as sqlite_update_keywords_for_media, check_media_exists as sqlite_check_media_exists, \
63
- search_prompts as sqlite_search_prompts, get_media_content as sqlite_get_media_content, \
64
- get_paginated_files as sqlite_get_paginated_files, get_media_title as sqlite_get_media_title, \
65
- get_all_content_from_database as sqlite_get_all_content_from_database,
66
- get_next_media_id as sqlite_get_next_media_id, \
67
- batch_insert_chunks as sqlite_batch_insert_chunks, Database, save_workflow_chat_to_db as sqlite_save_workflow_chat_to_db, \
68
- get_workflow_chat as sqlite_get_workflow_chat, update_media_content_with_version as sqlite_update_media_content_with_version, \
69
  check_existing_media as sqlite_check_existing_media, get_all_document_versions as sqlite_get_all_document_versions, \
70
  fetch_paginated_data as sqlite_fetch_paginated_data, get_latest_transcription as sqlite_get_latest_transcription, \
71
  mark_media_as_processed as sqlite_mark_media_as_processed,
72
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  from App_Function_Libraries.DB.Character_Chat_DB import (
74
  add_character_card as sqlite_add_character_card, get_character_cards as sqlite_get_character_cards, \
75
  get_character_card_by_id as sqlite_get_character_card_by_id, update_character_card as sqlite_update_character_card, \
76
  delete_character_card as sqlite_delete_character_card, add_character_chat as sqlite_add_character_chat, \
77
  get_character_chats as sqlite_get_character_chats, get_character_chat_by_id as sqlite_get_character_chat_by_id, \
78
- update_character_chat as sqlite_update_character_chat, delete_character_chat as sqlite_delete_character_chat, \
79
- migrate_chat_to_media_db as sqlite_migrate_chat_to_media_db,
80
  )
81
  #
82
  # Local Imports
@@ -214,9 +215,9 @@ print(f"Database path: {db.db_path}")
214
  #
215
  # DB Search functions
216
 
217
- def search_db(search_query: str, search_fields: List[str], keywords: str, page: int = 1, results_per_page: int = 10):
218
  if db_type == 'sqlite':
219
- return sqlite_search_db(search_query, search_fields, keywords, page, results_per_page)
220
  elif db_type == 'elasticsearch':
221
  # Implement Elasticsearch version when available
222
  raise NotImplementedError("Elasticsearch version of search_db not yet implemented")
@@ -500,13 +501,6 @@ def load_prompt_details(*args, **kwargs):
500
  # Implement Elasticsearch version
501
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
502
 
503
- def load_preset_prompts(*args, **kwargs):
504
- if db_type == 'sqlite':
505
- return sqlite_load_preset_prompts()
506
- elif db_type == 'elasticsearch':
507
- # Implement Elasticsearch version
508
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
509
-
510
  def insert_prompt_to_db(*args, **kwargs):
511
  if db_type == 'sqlite':
512
  return sqlite_insert_prompt_to_db(*args, **kwargs)
@@ -539,7 +533,6 @@ def mark_as_trash(media_id: int) -> None:
539
  else:
540
  raise ValueError(f"Unsupported database type: {db_type}")
541
 
542
-
543
  def get_latest_transcription(*args, **kwargs):
544
  if db_type == 'sqlite':
545
  return sqlite_get_latest_transcription(*args, **kwargs)
@@ -721,62 +714,132 @@ def fetch_keywords_for_media(*args, **kwargs):
721
  #
722
  # Chat-related Functions
723
 
724
- def delete_chat_message(*args, **kwargs):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
725
  if db_type == 'sqlite':
726
- return sqlite_delete_chat_message(*args, **kwargs)
727
  elif db_type == 'elasticsearch':
728
  # Implement Elasticsearch version
729
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
730
 
731
- def update_chat_message(*args, **kwargs):
732
  if db_type == 'sqlite':
733
- return sqlite_update_chat_message(*args, **kwargs)
734
  elif db_type == 'elasticsearch':
735
  # Implement Elasticsearch version
736
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
737
 
738
- def add_chat_message(*args, **kwargs):
739
  if db_type == 'sqlite':
740
- return sqlite_add_chat_message(*args, **kwargs)
741
  elif db_type == 'elasticsearch':
742
  # Implement Elasticsearch version
743
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
744
 
745
- def get_chat_messages(*args, **kwargs):
746
  if db_type == 'sqlite':
747
- return sqlite_get_chat_messages(*args, **kwargs)
748
  elif db_type == 'elasticsearch':
749
  # Implement Elasticsearch version
750
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
751
 
752
- def search_chat_conversations(*args, **kwargs):
753
  if db_type == 'sqlite':
754
- return sqlite_search_chat_conversations(*args, **kwargs)
755
  elif db_type == 'elasticsearch':
756
  # Implement Elasticsearch version
757
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
758
 
759
- def create_chat_conversation(*args, **kwargs):
760
  if db_type == 'sqlite':
761
- return sqlite_create_chat_conversation(*args, **kwargs)
762
  elif db_type == 'elasticsearch':
763
  # Implement Elasticsearch version
764
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
765
 
766
- def save_chat_history_to_database(*args, **kwargs):
767
  if db_type == 'sqlite':
768
- return sqlite_save_chat_history_to_database(*args, **kwargs)
769
  elif db_type == 'elasticsearch':
770
  # Implement Elasticsearch version
771
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
772
 
773
- def get_conversation_name(*args, **kwargs):
774
  if db_type == 'sqlite':
775
- return sqlite_get_conversation_name(*args, **kwargs)
776
  elif db_type == 'elasticsearch':
777
  # Implement Elasticsearch version
778
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
779
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  #
781
  # End of Chat-related Functions
782
  ############################################################################################################
@@ -856,12 +919,54 @@ def delete_character_chat(*args, **kwargs):
856
  # Implement Elasticsearch version
857
  raise NotImplementedError("Elasticsearch version of delete_character_chat not yet implemented")
858
 
859
- def migrate_chat_to_media_db(*args, **kwargs):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860
  if db_type == 'sqlite':
861
- return sqlite_migrate_chat_to_media_db(*args, **kwargs)
862
  elif db_type == 'elasticsearch':
863
  # Implement Elasticsearch version
864
- raise NotImplementedError("Elasticsearch version of migrate_chat_to_media_db not yet implemented")
865
 
866
  #
867
  # End of Character Chat-related Functions
 
13
  #
14
  # Import your existing SQLite functions
15
  from App_Function_Libraries.DB.SQLite_DB import DatabaseError
16
+ from App_Function_Libraries.DB.Prompts_DB import list_prompts as sqlite_list_prompts, \
17
+ fetch_prompt_details as sqlite_fetch_prompt_details, add_prompt as sqlite_add_prompt, \
18
+ search_prompts as sqlite_search_prompts, add_or_update_prompt as sqlite_add_or_update_prompt, \
19
+ load_prompt_details as sqlite_load_prompt_details, insert_prompt_to_db as sqlite_insert_prompt_to_db, \
20
+ delete_prompt as sqlite_delete_prompt
21
  from App_Function_Libraries.DB.SQLite_DB import (
22
  update_media_content as sqlite_update_media_content,
 
23
  search_and_display as sqlite_search_and_display,
 
24
  keywords_browser_interface as sqlite_keywords_browser_interface,
25
  add_keyword as sqlite_add_keyword,
26
  delete_keyword as sqlite_delete_keyword,
 
28
  ingest_article_to_db as sqlite_ingest_article_to_db,
29
  add_media_to_database as sqlite_add_media_to_database,
30
  import_obsidian_note_to_db as sqlite_import_obsidian_note_to_db,
 
 
 
 
 
 
 
 
31
  view_database as sqlite_view_database,
32
  get_transcripts as sqlite_get_transcripts,
33
  get_trashed_items as sqlite_get_trashed_items,
34
  user_delete_item as sqlite_user_delete_item,
35
  empty_trash as sqlite_empty_trash,
36
  create_automated_backup as sqlite_create_automated_backup,
 
 
 
 
 
37
  search_and_display_items as sqlite_search_and_display_items,
 
38
  add_media_with_keywords as sqlite_add_media_with_keywords,
39
  check_media_and_whisper_model as sqlite_check_media_and_whisper_model, \
40
  create_document_version as sqlite_create_document_version,
41
+ get_document_version as sqlite_get_document_version, search_media_db as sqlite_search_media_db, add_media_chunk as sqlite_add_media_chunk,
42
  sqlite_update_fts_for_media, get_unprocessed_media as sqlite_get_unprocessed_media, fetch_item_details as sqlite_fetch_item_details, \
43
  search_media_database as sqlite_search_media_database, mark_as_trash as sqlite_mark_as_trash, \
44
  get_media_transcripts as sqlite_get_media_transcripts, get_specific_transcript as sqlite_get_specific_transcript, \
 
49
  delete_specific_prompt as sqlite_delete_specific_prompt,
50
  fetch_keywords_for_media as sqlite_fetch_keywords_for_media, \
51
  update_keywords_for_media as sqlite_update_keywords_for_media, check_media_exists as sqlite_check_media_exists, \
52
+ get_media_content as sqlite_get_media_content, get_paginated_files as sqlite_get_paginated_files, \
53
+ get_media_title as sqlite_get_media_title, get_all_content_from_database as sqlite_get_all_content_from_database, \
54
+ get_next_media_id as sqlite_get_next_media_id, batch_insert_chunks as sqlite_batch_insert_chunks, Database, \
55
+ save_workflow_chat_to_db as sqlite_save_workflow_chat_to_db, get_workflow_chat as sqlite_get_workflow_chat, \
56
+ update_media_content_with_version as sqlite_update_media_content_with_version, \
 
57
  check_existing_media as sqlite_check_existing_media, get_all_document_versions as sqlite_get_all_document_versions, \
58
  fetch_paginated_data as sqlite_fetch_paginated_data, get_latest_transcription as sqlite_get_latest_transcription, \
59
  mark_media_as_processed as sqlite_mark_media_as_processed,
60
  )
61
+ from App_Function_Libraries.DB.RAG_QA_Chat_DB import start_new_conversation as sqlite_start_new_conversation, \
62
+ save_message as sqlite_save_message, load_chat_history as sqlite_load_chat_history, \
63
+ get_all_conversations as sqlite_get_all_conversations, get_notes_by_keywords as sqlite_get_notes_by_keywords, \
64
+ get_note_by_id as sqlite_get_note_by_id, update_note as sqlite_update_note, save_notes as sqlite_save_notes, \
65
+ clear_keywords_from_note as sqlite_clear_keywords_from_note, add_keywords_to_note as sqlite_add_keywords_to_note, \
66
+ add_keywords_to_conversation as sqlite_add_keywords_to_conversation, \
67
+ get_keywords_for_note as sqlite_get_keywords_for_note, delete_note as sqlite_delete_note, \
68
+ search_conversations_by_keywords as sqlite_search_conversations_by_keywords, \
69
+ delete_conversation as sqlite_delete_conversation, get_conversation_title as sqlite_get_conversation_title, \
70
+ update_conversation_title as sqlite_update_conversation_title, \
71
+ fetch_all_conversations as sqlite_fetch_all_conversations, fetch_all_notes as sqlite_fetch_all_notes, \
72
+ fetch_conversations_by_ids as sqlite_fetch_conversations_by_ids, fetch_notes_by_ids as sqlite_fetch_notes_by_ids, \
73
+ delete_messages_in_conversation as sqlite_delete_messages_in_conversation, \
74
+ get_conversation_text as sqlite_get_conversation_text, search_notes_titles as sqlite_search_notes_titles
75
  from App_Function_Libraries.DB.Character_Chat_DB import (
76
  add_character_card as sqlite_add_character_card, get_character_cards as sqlite_get_character_cards, \
77
  get_character_card_by_id as sqlite_get_character_card_by_id, update_character_card as sqlite_update_character_card, \
78
  delete_character_card as sqlite_delete_character_card, add_character_chat as sqlite_add_character_chat, \
79
  get_character_chats as sqlite_get_character_chats, get_character_chat_by_id as sqlite_get_character_chat_by_id, \
80
+ update_character_chat as sqlite_update_character_chat, delete_character_chat as sqlite_delete_character_chat
 
81
  )
82
  #
83
  # Local Imports
 
215
  #
216
  # DB Search functions
217
 
218
+ def search_media_db(search_query: str, search_fields: List[str], keywords: str, page: int = 1, results_per_page: int = 10):
219
  if db_type == 'sqlite':
220
+ return sqlite_search_media_db(search_query, search_fields, keywords, page, results_per_page)
221
  elif db_type == 'elasticsearch':
222
  # Implement Elasticsearch version when available
223
  raise NotImplementedError("Elasticsearch version of search_db not yet implemented")
 
501
  # Implement Elasticsearch version
502
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
503
 
 
 
 
 
 
 
 
504
  def insert_prompt_to_db(*args, **kwargs):
505
  if db_type == 'sqlite':
506
  return sqlite_insert_prompt_to_db(*args, **kwargs)
 
533
  else:
534
  raise ValueError(f"Unsupported database type: {db_type}")
535
 
 
536
  def get_latest_transcription(*args, **kwargs):
537
  if db_type == 'sqlite':
538
  return sqlite_get_latest_transcription(*args, **kwargs)
 
714
  #
715
  # Chat-related Functions
716
 
717
+ def search_notes_titles(*args, **kwargs):
718
+ if db_type == 'sqlite':
719
+ return sqlite_search_notes_titles(*args, **kwargs)
720
+ elif db_type == 'elasticsearch':
721
+ # Implement Elasticsearch version
722
+ raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
723
+
724
+ def save_message(*args, **kwargs):
725
+ if db_type == 'sqlite':
726
+ return sqlite_save_message(*args, **kwargs)
727
+ elif db_type == 'elasticsearch':
728
+ # Implement Elasticsearch version
729
+ raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
730
+
731
+ def load_chat_history(*args, **kwargs):
732
+ if db_type == 'sqlite':
733
+ return sqlite_load_chat_history(*args, **kwargs)
734
+ elif db_type == 'elasticsearch':
735
+ # Implement Elasticsearch version
736
+ raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
737
+
738
+ def start_new_conversation(*args, **kwargs):
739
+ if db_type == 'sqlite':
740
+ return sqlite_start_new_conversation(*args, **kwargs)
741
+ elif db_type == 'elasticsearch':
742
+ # Implement Elasticsearch version
743
+ raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
744
+
745
+ def get_all_conversations(*args, **kwargs):
746
+ if db_type == 'sqlite':
747
+ return sqlite_get_all_conversations(*args, **kwargs)
748
+ elif db_type == 'elasticsearch':
749
+ # Implement Elasticsearch version
750
+ raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
751
+
752
+ def get_notes_by_keywords(*args, **kwargs):
753
+ if db_type == 'sqlite':
754
+ return sqlite_get_notes_by_keywords(*args, **kwargs)
755
+ elif db_type == 'elasticsearch':
756
+ # Implement Elasticsearch version
757
+ raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
758
+
759
+ def get_note_by_id(*args, **kwargs):
760
+ if db_type == 'sqlite':
761
+ return sqlite_get_note_by_id(*args, **kwargs)
762
+ elif db_type == 'elasticsearch':
763
+ # Implement Elasticsearch version
764
+ raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
765
+
766
+ def add_keywords_to_conversation(*args, **kwargs):
767
+ if db_type == 'sqlite':
768
+ return sqlite_add_keywords_to_conversation(*args, **kwargs)
769
+ elif db_type == 'elasticsearch':
770
+ # Implement Elasticsearch version
771
+ raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
772
+
773
+ def get_keywords_for_note(*args, **kwargs):
774
  if db_type == 'sqlite':
775
+ return sqlite_get_keywords_for_note(*args, **kwargs)
776
  elif db_type == 'elasticsearch':
777
  # Implement Elasticsearch version
778
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
779
 
780
+ def delete_note(*args, **kwargs):
781
  if db_type == 'sqlite':
782
+ return sqlite_delete_note(*args, **kwargs)
783
  elif db_type == 'elasticsearch':
784
  # Implement Elasticsearch version
785
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
786
 
787
+ def search_conversations_by_keywords(*args, **kwargs):
788
  if db_type == 'sqlite':
789
+ return sqlite_search_conversations_by_keywords(*args, **kwargs)
790
  elif db_type == 'elasticsearch':
791
  # Implement Elasticsearch version
792
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
793
 
794
+ def delete_conversation(*args, **kwargs):
795
  if db_type == 'sqlite':
796
+ return sqlite_delete_conversation(*args, **kwargs)
797
  elif db_type == 'elasticsearch':
798
  # Implement Elasticsearch version
799
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
800
 
801
+ def get_conversation_title(*args, **kwargs):
802
  if db_type == 'sqlite':
803
+ return sqlite_get_conversation_title(*args, **kwargs)
804
  elif db_type == 'elasticsearch':
805
  # Implement Elasticsearch version
806
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
807
 
808
+ def update_conversation_title(*args, **kwargs):
809
  if db_type == 'sqlite':
810
+ return sqlite_update_conversation_title(*args, **kwargs)
811
  elif db_type == 'elasticsearch':
812
  # Implement Elasticsearch version
813
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
814
 
815
+ def fetch_all_conversations(*args, **kwargs):
816
  if db_type == 'sqlite':
817
+ return sqlite_fetch_all_conversations()
818
  elif db_type == 'elasticsearch':
819
  # Implement Elasticsearch version
820
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
821
 
822
+ def fetch_all_notes(*args, **kwargs):
823
  if db_type == 'sqlite':
824
+ return sqlite_fetch_all_notes()
825
  elif db_type == 'elasticsearch':
826
  # Implement Elasticsearch version
827
  raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
828
 
829
+ def delete_messages_in_conversation(*args, **kwargs):
830
+ if db_type == 'sqlite':
831
+ return sqlite_delete_messages_in_conversation(*args, **kwargs)
832
+ elif db_type == 'elasticsearch':
833
+ # Implement Elasticsearch version
834
+ raise NotImplementedError("Elasticsearch version of delete_messages_in_conversation not yet implemented")
835
+
836
+ def get_conversation_text(*args, **kwargs):
837
+ if db_type == 'sqlite':
838
+ return sqlite_get_conversation_text(*args, **kwargs)
839
+ elif db_type == 'elasticsearch':
840
+ # Implement Elasticsearch version
841
+ raise NotImplementedError("Elasticsearch version of get_conversation_text not yet implemented")
842
+
843
  #
844
  # End of Chat-related Functions
845
  ############################################################################################################
 
919
  # Implement Elasticsearch version
920
  raise NotImplementedError("Elasticsearch version of delete_character_chat not yet implemented")
921
 
922
+ def update_note(*args, **kwargs):
923
+ if db_type == 'sqlite':
924
+ return sqlite_update_note(*args, **kwargs)
925
+ elif db_type == 'elasticsearch':
926
+ # Implement Elasticsearch version
927
+ raise NotImplementedError("Elasticsearch version of update_note not yet implemented")
928
+
929
+ def save_notes(*args, **kwargs):
930
+ if db_type == 'sqlite':
931
+ return sqlite_save_notes(*args, **kwargs)
932
+ elif db_type == 'elasticsearch':
933
+ # Implement Elasticsearch version
934
+ raise NotImplementedError("Elasticsearch version of save_notes not yet implemented")
935
+
936
+ def clear_keywords(*args, **kwargs):
937
+ if db_type == 'sqlite':
938
+ return sqlite_clear_keywords_from_note(*args, **kwargs)
939
+ elif db_type == 'elasticsearch':
940
+ # Implement Elasticsearch version
941
+ raise NotImplementedError("Elasticsearch version of clear_keywords not yet implemented")
942
+
943
+ def clear_keywords_from_note(*args, **kwargs):
944
+ if db_type == 'sqlite':
945
+ return sqlite_clear_keywords_from_note(*args, **kwargs)
946
+ elif db_type == 'elasticsearch':
947
+ # Implement Elasticsearch version
948
+ raise NotImplementedError("Elasticsearch version of clear_keywords_from_note not yet implemented")
949
+
950
+ def add_keywords_to_note(*args, **kwargs):
951
+ if db_type == 'sqlite':
952
+ return sqlite_add_keywords_to_note(*args, **kwargs)
953
+ elif db_type == 'elasticsearch':
954
+ # Implement Elasticsearch version
955
+ raise NotImplementedError("Elasticsearch version of add_keywords_to_note not yet implemented")
956
+
957
+ def fetch_conversations_by_ids(*args, **kwargs):
958
+ if db_type == 'sqlite':
959
+ return sqlite_fetch_conversations_by_ids(*args, **kwargs)
960
+ elif db_type == 'elasticsearch':
961
+ # Implement Elasticsearch version
962
+ raise NotImplementedError("Elasticsearch version of fetch_conversations_by_ids not yet implemented")
963
+
964
+ def fetch_notes_by_ids(*args, **kwargs):
965
  if db_type == 'sqlite':
966
+ return sqlite_fetch_notes_by_ids(*args, **kwargs)
967
  elif db_type == 'elasticsearch':
968
  # Implement Elasticsearch version
969
+ raise NotImplementedError("Elasticsearch version of fetch_notes_by_ids not yet implemented")
970
 
971
  #
972
  # End of Character Chat-related Functions
App_Function_Libraries/DB/Prompts_DB.py ADDED
@@ -0,0 +1,626 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Prompts_DB.py
2
+ # Description: Functions to manage the prompts database.
3
+ #
4
+ # Imports
5
+ import sqlite3
6
+ import logging
7
+ #
8
+ # External Imports
9
+ import re
10
+ from typing import Tuple
11
+ #
12
+ # Local Imports
13
+ from App_Function_Libraries.Utils.Utils import get_database_path
14
+ #
15
+ #######################################################################################################################
16
+ #
17
+ # Functions to manage prompts DB
18
+
19
+ def create_prompts_db():
20
+ logging.debug("create_prompts_db: Creating prompts database.")
21
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
22
+ cursor = conn.cursor()
23
+ cursor.executescript('''
24
+ CREATE TABLE IF NOT EXISTS Prompts (
25
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
26
+ name TEXT NOT NULL UNIQUE,
27
+ author TEXT,
28
+ details TEXT,
29
+ system TEXT,
30
+ user TEXT
31
+ );
32
+ CREATE TABLE IF NOT EXISTS Keywords (
33
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
34
+ keyword TEXT NOT NULL UNIQUE COLLATE NOCASE
35
+ );
36
+ CREATE TABLE IF NOT EXISTS PromptKeywords (
37
+ prompt_id INTEGER,
38
+ keyword_id INTEGER,
39
+ FOREIGN KEY (prompt_id) REFERENCES Prompts (id),
40
+ FOREIGN KEY (keyword_id) REFERENCES Keywords (id),
41
+ PRIMARY KEY (prompt_id, keyword_id)
42
+ );
43
+ CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON Keywords(keyword);
44
+ CREATE INDEX IF NOT EXISTS idx_promptkeywords_prompt_id ON PromptKeywords(prompt_id);
45
+ CREATE INDEX IF NOT EXISTS idx_promptkeywords_keyword_id ON PromptKeywords(keyword_id);
46
+ ''')
47
+
48
+ # FIXME - dirty hack that should be removed later...
49
+ # Migration function to add the 'author' column to the Prompts table
50
+ def add_author_column_to_prompts():
51
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
52
+ cursor = conn.cursor()
53
+ # Check if 'author' column already exists
54
+ cursor.execute("PRAGMA table_info(Prompts)")
55
+ columns = [col[1] for col in cursor.fetchall()]
56
+
57
+ if 'author' not in columns:
58
+ # Add the 'author' column
59
+ cursor.execute('ALTER TABLE Prompts ADD COLUMN author TEXT')
60
+ print("Author column added to Prompts table.")
61
+ else:
62
+ print("Author column already exists in Prompts table.")
63
+
64
+ add_author_column_to_prompts()
65
+
66
+ def normalize_keyword(keyword):
67
+ return re.sub(r'\s+', ' ', keyword.strip().lower())
68
+
69
+
70
+ # FIXME - update calls to this function to use the new args
71
+ def add_prompt(name, author, details, system=None, user=None, keywords=None):
72
+ logging.debug(f"add_prompt: Adding prompt with name: {name}, author: {author}, system: {system}, user: {user}, keywords: {keywords}")
73
+ if not name:
74
+ logging.error("add_prompt: A name is required.")
75
+ return "A name is required."
76
+
77
+ try:
78
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
79
+ cursor = conn.cursor()
80
+ cursor.execute('''
81
+ INSERT INTO Prompts (name, author, details, system, user)
82
+ VALUES (?, ?, ?, ?, ?)
83
+ ''', (name, author, details, system, user))
84
+ prompt_id = cursor.lastrowid
85
+
86
+ if keywords:
87
+ normalized_keywords = [normalize_keyword(k) for k in keywords if k.strip()]
88
+ for keyword in set(normalized_keywords): # Use set to remove duplicates
89
+ cursor.execute('''
90
+ INSERT OR IGNORE INTO Keywords (keyword) VALUES (?)
91
+ ''', (keyword,))
92
+ cursor.execute('SELECT id FROM Keywords WHERE keyword = ?', (keyword,))
93
+ keyword_id = cursor.fetchone()[0]
94
+ cursor.execute('''
95
+ INSERT OR IGNORE INTO PromptKeywords (prompt_id, keyword_id) VALUES (?, ?)
96
+ ''', (prompt_id, keyword_id))
97
+ return "Prompt added successfully."
98
+ except sqlite3.IntegrityError:
99
+ return "Prompt with this name already exists."
100
+ except sqlite3.Error as e:
101
+ return f"Database error: {e}"
102
+
103
+
104
+ def fetch_prompt_details(name):
105
+ logging.debug(f"fetch_prompt_details: Fetching details for prompt: {name}")
106
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
107
+ cursor = conn.cursor()
108
+ cursor.execute('''
109
+ SELECT p.name, p.author, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
110
+ FROM Prompts p
111
+ LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
112
+ LEFT JOIN Keywords k ON pk.keyword_id = k.id
113
+ WHERE p.name = ?
114
+ GROUP BY p.id
115
+ ''', (name,))
116
+ return cursor.fetchone()
117
+
118
+
119
+ def list_prompts(page=1, per_page=10):
120
+ logging.debug(f"list_prompts: Listing prompts for page {page} with {per_page} prompts per page.")
121
+ offset = (page - 1) * per_page
122
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
123
+ cursor = conn.cursor()
124
+ cursor.execute('SELECT name FROM Prompts LIMIT ? OFFSET ?', (per_page, offset))
125
+ prompts = [row[0] for row in cursor.fetchall()]
126
+
127
+ # Get total count of prompts
128
+ cursor.execute('SELECT COUNT(*) FROM Prompts')
129
+ total_count = cursor.fetchone()[0]
130
+
131
+ total_pages = (total_count + per_page - 1) // per_page
132
+ return prompts, total_pages, page
133
+
134
+
135
+ def insert_prompt_to_db(title, author, description, system_prompt, user_prompt, keywords=None):
136
+ return add_prompt(title, author, description, system_prompt, user_prompt, keywords)
137
+
138
+
139
+ def get_prompt_db_connection():
140
+ prompt_db_path = get_database_path('prompts.db')
141
+ return sqlite3.connect(prompt_db_path)
142
+
143
+
144
+ def search_prompts(query):
145
+ logging.debug(f"search_prompts: Searching prompts with query: {query}")
146
+ try:
147
+ with get_prompt_db_connection() as conn:
148
+ cursor = conn.cursor()
149
+ cursor.execute("""
150
+ SELECT p.name, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
151
+ FROM Prompts p
152
+ LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
153
+ LEFT JOIN Keywords k ON pk.keyword_id = k.id
154
+ WHERE p.name LIKE ? OR p.details LIKE ? OR p.system LIKE ? OR p.user LIKE ? OR k.keyword LIKE ?
155
+ GROUP BY p.id
156
+ ORDER BY p.name
157
+ """, (f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%'))
158
+ return cursor.fetchall()
159
+ except sqlite3.Error as e:
160
+ logging.error(f"Error searching prompts: {e}")
161
+ return []
162
+
163
+
164
+ def search_prompts_by_keyword(keyword, page=1, per_page=10):
165
+ logging.debug(f"search_prompts_by_keyword: Searching prompts by keyword: {keyword}")
166
+ normalized_keyword = normalize_keyword(keyword)
167
+ offset = (page - 1) * per_page
168
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
169
+ cursor = conn.cursor()
170
+ cursor.execute('''
171
+ SELECT DISTINCT p.name
172
+ FROM Prompts p
173
+ JOIN PromptKeywords pk ON p.id = pk.prompt_id
174
+ JOIN Keywords k ON pk.keyword_id = k.id
175
+ WHERE k.keyword LIKE ?
176
+ LIMIT ? OFFSET ?
177
+ ''', ('%' + normalized_keyword + '%', per_page, offset))
178
+ prompts = [row[0] for row in cursor.fetchall()]
179
+
180
+ # Get total count of matching prompts
181
+ cursor.execute('''
182
+ SELECT COUNT(DISTINCT p.id)
183
+ FROM Prompts p
184
+ JOIN PromptKeywords pk ON p.id = pk.prompt_id
185
+ JOIN Keywords k ON pk.keyword_id = k.id
186
+ WHERE k.keyword LIKE ?
187
+ ''', ('%' + normalized_keyword + '%',))
188
+ total_count = cursor.fetchone()[0]
189
+
190
+ total_pages = (total_count + per_page - 1) // per_page
191
+ return prompts, total_pages, page
192
+
193
+
194
+ def update_prompt_keywords(prompt_name, new_keywords):
195
+ logging.debug(f"update_prompt_keywords: Updating keywords for prompt: {prompt_name}")
196
+ try:
197
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
198
+ cursor = conn.cursor()
199
+
200
+ cursor.execute('SELECT id FROM Prompts WHERE name = ?', (prompt_name,))
201
+ prompt_id = cursor.fetchone()
202
+ if not prompt_id:
203
+ return "Prompt not found."
204
+ prompt_id = prompt_id[0]
205
+
206
+ cursor.execute('DELETE FROM PromptKeywords WHERE prompt_id = ?', (prompt_id,))
207
+
208
+ normalized_keywords = [normalize_keyword(k) for k in new_keywords if k.strip()]
209
+ for keyword in set(normalized_keywords): # Use set to remove duplicates
210
+ cursor.execute('INSERT OR IGNORE INTO Keywords (keyword) VALUES (?)', (keyword,))
211
+ cursor.execute('SELECT id FROM Keywords WHERE keyword = ?', (keyword,))
212
+ keyword_id = cursor.fetchone()[0]
213
+ cursor.execute('INSERT INTO PromptKeywords (prompt_id, keyword_id) VALUES (?, ?)',
214
+ (prompt_id, keyword_id))
215
+
216
+ # Remove unused keywords
217
+ cursor.execute('''
218
+ DELETE FROM Keywords
219
+ WHERE id NOT IN (SELECT DISTINCT keyword_id FROM PromptKeywords)
220
+ ''')
221
+ return "Keywords updated successfully."
222
+ except sqlite3.Error as e:
223
+ return f"Database error: {e}"
224
+
225
+
226
+ def add_or_update_prompt(title, author, description, system_prompt, user_prompt, keywords=None):
227
+ logging.debug(f"add_or_update_prompt: Adding or updating prompt: {title}")
228
+ if not title:
229
+ return "Error: Title is required."
230
+
231
+ existing_prompt = fetch_prompt_details(title)
232
+ if existing_prompt:
233
+ # Update existing prompt
234
+ result = update_prompt_in_db(title, author, description, system_prompt, user_prompt)
235
+ if "successfully" in result:
236
+ # Update keywords if the prompt update was successful
237
+ keyword_result = update_prompt_keywords(title, keywords or [])
238
+ result += f" {keyword_result}"
239
+ else:
240
+ # Insert new prompt
241
+ result = insert_prompt_to_db(title, author, description, system_prompt, user_prompt, keywords)
242
+
243
+ return result
244
+
245
+
246
+ def load_prompt_details(selected_prompt):
247
+ logging.debug(f"load_prompt_details: Loading prompt details for {selected_prompt}")
248
+ if selected_prompt:
249
+ details = fetch_prompt_details(selected_prompt)
250
+ if details:
251
+ return details[0], details[1], details[2], details[3], details[4], details[5]
252
+ return "", "", "", "", "", ""
253
+
254
+
255
+ def update_prompt_in_db(title, author, description, system_prompt, user_prompt):
256
+ logging.debug(f"update_prompt_in_db: Updating prompt: {title}")
257
+ try:
258
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
259
+ cursor = conn.cursor()
260
+ cursor.execute(
261
+ "UPDATE Prompts SET author = ?, details = ?, system = ?, user = ? WHERE name = ?",
262
+ (author, description, system_prompt, user_prompt, title)
263
+ )
264
+ if cursor.rowcount == 0:
265
+ return "No prompt found with the given title."
266
+ return "Prompt updated successfully!"
267
+ except sqlite3.Error as e:
268
+ return f"Error updating prompt: {e}"
269
+
270
+
271
+ def delete_prompt(prompt_id):
272
+ logging.debug(f"delete_prompt: Deleting prompt with ID: {prompt_id}")
273
+ try:
274
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
275
+ cursor = conn.cursor()
276
+
277
+ # Delete associated keywords
278
+ cursor.execute("DELETE FROM PromptKeywords WHERE prompt_id = ?", (prompt_id,))
279
+
280
+ # Delete the prompt
281
+ cursor.execute("DELETE FROM Prompts WHERE id = ?", (prompt_id,))
282
+
283
+ if cursor.rowcount == 0:
284
+ return f"No prompt found with ID {prompt_id}"
285
+ else:
286
+ conn.commit()
287
+ return f"Prompt with ID {prompt_id} has been successfully deleted"
288
+ except sqlite3.Error as e:
289
+ return f"An error occurred: {e}"
290
+
291
+
292
+ def delete_prompt_keyword(keyword: str) -> str:
293
+ """
294
+ Delete a keyword and its associations from the prompts database.
295
+
296
+ Args:
297
+ keyword (str): The keyword to delete
298
+
299
+ Returns:
300
+ str: Success/failure message
301
+ """
302
+ logging.debug(f"delete_prompt_keyword: Deleting keyword: {keyword}")
303
+ try:
304
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
305
+ cursor = conn.cursor()
306
+
307
+ # First normalize the keyword
308
+ normalized_keyword = normalize_keyword(keyword)
309
+
310
+ # Get the keyword ID
311
+ cursor.execute("SELECT id FROM Keywords WHERE keyword = ?", (normalized_keyword,))
312
+ result = cursor.fetchone()
313
+
314
+ if not result:
315
+ return f"Keyword '{keyword}' not found."
316
+
317
+ keyword_id = result[0]
318
+
319
+ # Delete keyword associations from PromptKeywords
320
+ cursor.execute("DELETE FROM PromptKeywords WHERE keyword_id = ?", (keyword_id,))
321
+
322
+ # Delete the keyword itself
323
+ cursor.execute("DELETE FROM Keywords WHERE id = ?", (keyword_id,))
324
+
325
+ # Get the number of affected prompts
326
+ affected_prompts = cursor.rowcount
327
+
328
+ conn.commit()
329
+
330
+ logging.info(f"Keyword '{keyword}' deleted successfully")
331
+ return f"Successfully deleted keyword '{keyword}' and removed it from {affected_prompts} prompts."
332
+
333
+ except sqlite3.Error as e:
334
+ error_msg = f"Database error deleting keyword: {str(e)}"
335
+ logging.error(error_msg)
336
+ return error_msg
337
+ except Exception as e:
338
+ error_msg = f"Error deleting keyword: {str(e)}"
339
+ logging.error(error_msg)
340
+ return error_msg
341
+
342
+
343
+ def export_prompt_keywords_to_csv() -> Tuple[str, str]:
344
+ """
345
+ Export all prompt keywords to a CSV file with associated metadata.
346
+
347
+ Returns:
348
+ Tuple[str, str]: (status_message, file_path)
349
+ """
350
+ import csv
351
+ import tempfile
352
+ import os
353
+ from datetime import datetime
354
+
355
+ logging.debug("export_prompt_keywords_to_csv: Starting export")
356
+ try:
357
+ # Create a temporary file with a specific name in the system's temp directory
358
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
359
+ temp_dir = tempfile.gettempdir()
360
+ file_path = os.path.join(temp_dir, f'prompt_keywords_export_{timestamp}.csv')
361
+
362
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
363
+ cursor = conn.cursor()
364
+
365
+ # Get keywords with related prompt information
366
+ query = '''
367
+ SELECT
368
+ k.keyword,
369
+ GROUP_CONCAT(p.name, ' | ') as prompt_names,
370
+ COUNT(DISTINCT p.id) as num_prompts,
371
+ GROUP_CONCAT(DISTINCT p.author, ' | ') as authors
372
+ FROM Keywords k
373
+ LEFT JOIN PromptKeywords pk ON k.id = pk.keyword_id
374
+ LEFT JOIN Prompts p ON pk.prompt_id = p.id
375
+ GROUP BY k.id, k.keyword
376
+ ORDER BY k.keyword
377
+ '''
378
+
379
+ cursor.execute(query)
380
+ results = cursor.fetchall()
381
+
382
+ # Write to CSV
383
+ with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
384
+ writer = csv.writer(csvfile)
385
+ writer.writerow([
386
+ 'Keyword',
387
+ 'Associated Prompts',
388
+ 'Number of Prompts',
389
+ 'Authors'
390
+ ])
391
+
392
+ for row in results:
393
+ writer.writerow([
394
+ row[0], # keyword
395
+ row[1] if row[1] else '', # prompt_names (may be None)
396
+ row[2], # num_prompts
397
+ row[3] if row[3] else '' # authors (may be None)
398
+ ])
399
+
400
+ status_msg = f"Successfully exported {len(results)} prompt keywords to CSV."
401
+ logging.info(status_msg)
402
+
403
+ return status_msg, file_path
404
+
405
+ except sqlite3.Error as e:
406
+ error_msg = f"Database error exporting keywords: {str(e)}"
407
+ logging.error(error_msg)
408
+ return error_msg, "None"
409
+ except Exception as e:
410
+ error_msg = f"Error exporting keywords: {str(e)}"
411
+ logging.error(error_msg)
412
+ return error_msg, "None"
413
+
414
+
415
+ def view_prompt_keywords() -> str:
416
+ """
417
+ View all keywords currently in the prompts database.
418
+
419
+ Returns:
420
+ str: Markdown formatted string of all keywords
421
+ """
422
+ logging.debug("view_prompt_keywords: Retrieving all keywords")
423
+ try:
424
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
425
+ cursor = conn.cursor()
426
+ cursor.execute("""
427
+ SELECT k.keyword, COUNT(DISTINCT pk.prompt_id) as prompt_count
428
+ FROM Keywords k
429
+ LEFT JOIN PromptKeywords pk ON k.id = pk.keyword_id
430
+ GROUP BY k.id, k.keyword
431
+ ORDER BY k.keyword
432
+ """)
433
+
434
+ keywords = cursor.fetchall()
435
+ if keywords:
436
+ keyword_list = [f"- {k[0]} ({k[1]} prompts)" for k in keywords]
437
+ return "### Current Prompt Keywords:\n" + "\n".join(keyword_list)
438
+ return "No keywords found."
439
+
440
+ except Exception as e:
441
+ error_msg = f"Error retrieving keywords: {str(e)}"
442
+ logging.error(error_msg)
443
+ return error_msg
444
+
445
+
446
+ def export_prompts(
447
+ export_format='csv',
448
+ filter_keywords=None,
449
+ include_system=True,
450
+ include_user=True,
451
+ include_details=True,
452
+ include_author=True,
453
+ include_keywords=True,
454
+ markdown_template=None
455
+ ) -> Tuple[str, str]:
456
+ """
457
+ Export prompts to CSV or Markdown with configurable options.
458
+
459
+ Args:
460
+ export_format (str): 'csv' or 'markdown'
461
+ filter_keywords (List[str], optional): Keywords to filter prompts by
462
+ include_system (bool): Include system prompts in export
463
+ include_user (bool): Include user prompts in export
464
+ include_details (bool): Include prompt details/descriptions
465
+ include_author (bool): Include author information
466
+ include_keywords (bool): Include associated keywords
467
+ markdown_template (str, optional): Template for markdown export
468
+
469
+ Returns:
470
+ Tuple[str, str]: (status_message, file_path)
471
+ """
472
+ import csv
473
+ import tempfile
474
+ import os
475
+ import zipfile
476
+ from datetime import datetime
477
+
478
+ try:
479
+ # Get prompts data
480
+ with get_prompt_db_connection() as conn:
481
+ cursor = conn.cursor()
482
+
483
+ # Build query based on included fields
484
+ select_fields = ['p.name']
485
+ if include_author:
486
+ select_fields.append('p.author')
487
+ if include_details:
488
+ select_fields.append('p.details')
489
+ if include_system:
490
+ select_fields.append('p.system')
491
+ if include_user:
492
+ select_fields.append('p.user')
493
+
494
+ query = f"""
495
+ SELECT DISTINCT {', '.join(select_fields)}
496
+ FROM Prompts p
497
+ """
498
+
499
+ # Add keyword filtering if specified
500
+ if filter_keywords:
501
+ placeholders = ','.join(['?' for _ in filter_keywords])
502
+ query += f"""
503
+ JOIN PromptKeywords pk ON p.id = pk.prompt_id
504
+ JOIN Keywords k ON pk.keyword_id = k.id
505
+ WHERE k.keyword IN ({placeholders})
506
+ """
507
+
508
+ cursor.execute(query, filter_keywords if filter_keywords else ())
509
+ prompts = cursor.fetchall()
510
+
511
+ # Get keywords for each prompt if needed
512
+ if include_keywords:
513
+ prompt_keywords = {}
514
+ for prompt in prompts:
515
+ cursor.execute("""
516
+ SELECT k.keyword
517
+ FROM Keywords k
518
+ JOIN PromptKeywords pk ON k.id = pk.keyword_id
519
+ JOIN Prompts p ON pk.prompt_id = p.id
520
+ WHERE p.name = ?
521
+ """, (prompt[0],))
522
+ prompt_keywords[prompt[0]] = [row[0] for row in cursor.fetchall()]
523
+
524
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
525
+
526
+ if export_format == 'csv':
527
+ # Export as CSV
528
+ temp_file = os.path.join(tempfile.gettempdir(), f'prompts_export_{timestamp}.csv')
529
+ with open(temp_file, 'w', newline='', encoding='utf-8') as csvfile:
530
+ writer = csv.writer(csvfile)
531
+
532
+ # Write header
533
+ header = ['Name']
534
+ if include_author:
535
+ header.append('Author')
536
+ if include_details:
537
+ header.append('Details')
538
+ if include_system:
539
+ header.append('System Prompt')
540
+ if include_user:
541
+ header.append('User Prompt')
542
+ if include_keywords:
543
+ header.append('Keywords')
544
+ writer.writerow(header)
545
+
546
+ # Write data
547
+ for prompt in prompts:
548
+ row = list(prompt)
549
+ if include_keywords:
550
+ row.append(', '.join(prompt_keywords.get(prompt[0], [])))
551
+ writer.writerow(row)
552
+
553
+ return f"Successfully exported {len(prompts)} prompts to CSV.", temp_file
554
+
555
+ else:
556
+ # Export as Markdown files in ZIP
557
+ temp_dir = tempfile.mkdtemp()
558
+ zip_path = os.path.join(tempfile.gettempdir(), f'prompts_export_{timestamp}.zip')
559
+
560
+ # Define markdown templates
561
+ templates = {
562
+ "Basic Template": """# {title}
563
+ {author_section}
564
+ {details_section}
565
+ {system_section}
566
+ {user_section}
567
+ {keywords_section}
568
+ """,
569
+ "Detailed Template": """# {title}
570
+
571
+ ## Author
572
+ {author_section}
573
+
574
+ ## Description
575
+ {details_section}
576
+
577
+ ## System Prompt
578
+ {system_section}
579
+
580
+ ## User Prompt
581
+ {user_section}
582
+
583
+ ## Keywords
584
+ {keywords_section}
585
+ """
586
+ }
587
+
588
+ template = templates.get(markdown_template, markdown_template or templates["Basic Template"])
589
+
590
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
591
+ for prompt in prompts:
592
+ # Create markdown content
593
+ md_content = template.format(
594
+ title=prompt[0],
595
+ author_section=f"Author: {prompt[1]}" if include_author else "",
596
+ details_section=prompt[2] if include_details else "",
597
+ system_section=prompt[3] if include_system else "",
598
+ user_section=prompt[4] if include_user else "",
599
+ keywords_section=', '.join(prompt_keywords.get(prompt[0], [])) if include_keywords else ""
600
+ )
601
+
602
+ # Create safe filename
603
+ safe_filename = re.sub(r'[^\w\-_\. ]', '_', prompt[0])
604
+ md_path = os.path.join(temp_dir, f"{safe_filename}.md")
605
+
606
+ # Write markdown file
607
+ with open(md_path, 'w', encoding='utf-8') as f:
608
+ f.write(md_content)
609
+
610
+ # Add to ZIP
611
+ zipf.write(md_path, os.path.basename(md_path))
612
+
613
+ return f"Successfully exported {len(prompts)} prompts to Markdown files.", zip_path
614
+
615
+ except Exception as e:
616
+ error_msg = f"Error exporting prompts: {str(e)}"
617
+ logging.error(error_msg)
618
+ return error_msg, "None"
619
+
620
+
621
+ create_prompts_db()
622
+
623
+ #
624
+ # End of Propmts_DB.py
625
+ #######################################################################################################################
626
+
App_Function_Libraries/DB/RAG_QA_Chat_DB.py CHANGED
@@ -4,39 +4,37 @@
4
  # Imports
5
  import configparser
6
  import logging
 
7
  import re
8
  import sqlite3
9
  import uuid
10
  from contextlib import contextmanager
11
  from datetime import datetime
12
-
13
- from App_Function_Libraries.Utils.Utils import get_project_relative_path, get_database_path
14
-
15
  #
16
  # External Imports
17
  # (No external imports)
18
  #
19
  # Local Imports
20
- # (No additional local imports)
 
21
  #
22
  ########################################################################################################################
23
  #
24
  # Functions:
25
 
26
- # Construct the path to the config file
27
- config_path = get_project_relative_path('Config_Files/config.txt')
28
-
29
- # Read the config file
30
- config = configparser.ConfigParser()
31
- config.read(config_path)
32
-
33
- # Get the SQLite path from the config, or use the default if not specified
34
- if config.has_section('Database') and config.has_option('Database', 'rag_qa_db_path'):
35
- rag_qa_db_path = config.get('Database', 'rag_qa_db_path')
36
- else:
37
- rag_qa_db_path = get_database_path('RAG_QA_Chat.db')
38
-
39
- print(f"RAG QA Chat Database path: {rag_qa_db_path}")
40
 
41
  # Set up logging
42
  logging.basicConfig(level=logging.INFO)
@@ -58,7 +56,9 @@ CREATE TABLE IF NOT EXISTS conversation_metadata (
58
  conversation_id TEXT PRIMARY KEY,
59
  created_at DATETIME NOT NULL,
60
  last_updated DATETIME NOT NULL,
61
- title TEXT NOT NULL
 
 
62
  );
63
 
64
  -- Table for storing keywords
@@ -122,19 +122,137 @@ CREATE INDEX IF NOT EXISTS idx_rag_qa_keyword_collections_parent_id ON rag_qa_ke
122
  CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_collection_id ON rag_qa_collection_keywords(collection_id);
123
  CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_keyword_id ON rag_qa_collection_keywords(keyword_id);
124
 
125
- -- Full-text search virtual table for chat content
126
- CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_chats_fts USING fts5(conversation_id, timestamp, role, content);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- -- Trigger to keep the FTS table up to date
 
129
  CREATE TRIGGER IF NOT EXISTS rag_qa_chats_ai AFTER INSERT ON rag_qa_chats BEGIN
130
- INSERT INTO rag_qa_chats_fts(conversation_id, timestamp, role, content) VALUES (new.conversation_id, new.timestamp, new.role, new.content);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  END;
132
  '''
133
 
134
  # Database connection management
135
  @contextmanager
136
  def get_db_connection():
137
- conn = sqlite3.connect(rag_qa_db_path)
 
138
  try:
139
  yield conn
140
  finally:
@@ -168,10 +286,43 @@ def execute_query(query, params=None, conn=None):
168
  conn.commit()
169
  return cursor.fetchall()
170
 
 
171
  def create_tables():
 
172
  with get_db_connection() as conn:
173
- conn.executescript(SCHEMA_SQL)
174
- logger.info("All RAG QA Chat tables created successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  # Initialize the database
177
  create_tables()
@@ -197,6 +348,7 @@ def validate_keyword(keyword):
197
  raise ValueError("Keyword contains invalid characters")
198
  return keyword.strip()
199
 
 
200
  def validate_collection_name(name):
201
  if not isinstance(name, str):
202
  raise ValueError("Collection name must be a string")
@@ -208,6 +360,7 @@ def validate_collection_name(name):
208
  raise ValueError("Collection name contains invalid characters")
209
  return name.strip()
210
 
 
211
  # Core functions
212
  def add_keyword(keyword, conn=None):
213
  try:
@@ -222,6 +375,7 @@ def add_keyword(keyword, conn=None):
222
  logger.error(f"Error adding keyword '{keyword}': {e}")
223
  raise
224
 
 
225
  def create_keyword_collection(name, parent_id=None):
226
  try:
227
  validated_name = validate_collection_name(name)
@@ -235,6 +389,7 @@ def create_keyword_collection(name, parent_id=None):
235
  logger.error(f"Error creating keyword collection '{name}': {e}")
236
  raise
237
 
 
238
  def add_keyword_to_collection(collection_name, keyword):
239
  try:
240
  validated_collection_name = validate_collection_name(collection_name)
@@ -259,6 +414,7 @@ def add_keyword_to_collection(collection_name, keyword):
259
  logger.error(f"Error adding keyword '{keyword}' to collection '{collection_name}': {e}")
260
  raise
261
 
 
262
  def add_keywords_to_conversation(conversation_id, keywords):
263
  if not isinstance(keywords, (list, tuple)):
264
  raise ValueError("Keywords must be a list or tuple")
@@ -282,6 +438,23 @@ def add_keywords_to_conversation(conversation_id, keywords):
282
  logger.error(f"Error adding keywords to conversation '{conversation_id}': {e}")
283
  raise
284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  def get_keywords_for_conversation(conversation_id):
286
  try:
287
  query = '''
@@ -298,6 +471,7 @@ def get_keywords_for_conversation(conversation_id):
298
  logger.error(f"Error getting keywords for conversation '{conversation_id}': {e}")
299
  raise
300
 
 
301
  def get_keywords_for_collection(collection_name):
302
  try:
303
  query = '''
@@ -315,6 +489,116 @@ def get_keywords_for_collection(collection_name):
315
  logger.error(f"Error getting keywords for collection '{collection_name}': {e}")
316
  raise
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  #
319
  # End of Keyword-related functions
320
  ###################################################
@@ -339,6 +623,7 @@ def save_notes(conversation_id, title, content):
339
  logger.error(f"Error saving notes for conversation '{conversation_id}': {e}")
340
  raise
341
 
 
342
  def update_note(note_id, title, content):
343
  try:
344
  query = "UPDATE rag_qa_notes SET title = ?, content = ?, timestamp = ? WHERE id = ?"
@@ -349,6 +634,121 @@ def update_note(note_id, title, content):
349
  logger.error(f"Error updating note ID '{note_id}': {e}")
350
  raise
351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  def get_notes(conversation_id):
353
  """Retrieve notes for a given conversation."""
354
  try:
@@ -361,6 +761,7 @@ def get_notes(conversation_id):
361
  logger.error(f"Error getting notes for conversation '{conversation_id}': {e}")
362
  raise
363
 
 
364
  def get_note_by_id(note_id):
365
  try:
366
  query = "SELECT id, title, content FROM rag_qa_notes WHERE id = ?"
@@ -370,9 +771,21 @@ def get_note_by_id(note_id):
370
  logger.error(f"Error getting note by ID '{note_id}': {e}")
371
  raise
372
 
 
373
  def get_notes_by_keywords(keywords, page=1, page_size=20):
374
  try:
375
- placeholders = ','.join(['?'] * len(keywords))
 
 
 
 
 
 
 
 
 
 
 
376
  query = f'''
377
  SELECT n.id, n.title, n.content, n.timestamp
378
  FROM rag_qa_notes n
@@ -381,14 +794,15 @@ def get_notes_by_keywords(keywords, page=1, page_size=20):
381
  WHERE k.keyword IN ({placeholders})
382
  ORDER BY n.timestamp DESC
383
  '''
384
- results, total_pages, total_count = get_paginated_results(query, tuple(keywords), page, page_size)
385
- logger.info(f"Retrieved {len(results)} notes matching keywords: {', '.join(keywords)} (page {page} of {total_pages})")
386
  notes = [(row[0], row[1], row[2], row[3]) for row in results]
387
  return notes, total_pages, total_count
388
  except Exception as e:
389
  logger.error(f"Error getting notes by keywords: {e}")
390
  raise
391
 
 
392
  def get_notes_by_keyword_collection(collection_name, page=1, page_size=20):
393
  try:
394
  query = '''
@@ -501,9 +915,10 @@ def delete_note(note_id):
501
  #
502
  # Chat-related functions
503
 
504
- def save_message(conversation_id, role, content):
505
  try:
506
- timestamp = datetime.now().isoformat()
 
507
  query = "INSERT INTO rag_qa_chats (conversation_id, timestamp, role, content) VALUES (?, ?, ?, ?)"
508
  execute_query(query, (conversation_id, timestamp, role, content))
509
 
@@ -516,29 +931,103 @@ def save_message(conversation_id, role, content):
516
  logger.error(f"Error saving message for conversation '{conversation_id}': {e}")
517
  raise
518
 
519
- def start_new_conversation(title="Untitled Conversation"):
 
520
  try:
521
  conversation_id = str(uuid.uuid4())
522
- query = "INSERT INTO conversation_metadata (conversation_id, created_at, last_updated, title) VALUES (?, ?, ?, ?)"
 
 
 
 
523
  now = datetime.now().isoformat()
524
- execute_query(query, (conversation_id, now, now, title))
525
- logger.info(f"New conversation '{conversation_id}' started with title '{title}'")
 
526
  return conversation_id
527
  except Exception as e:
528
  logger.error(f"Error starting new conversation: {e}")
529
  raise
530
 
 
531
  def get_all_conversations(page=1, page_size=20):
532
  try:
533
- query = "SELECT conversation_id, title FROM conversation_metadata ORDER BY last_updated DESC"
534
- results, total_pages, total_count = get_paginated_results(query, page=page, page_size=page_size)
535
- conversations = [(row[0], row[1]) for row in results]
536
- logger.info(f"Retrieved {len(conversations)} conversations (page {page} of {total_pages})")
537
- return conversations, total_pages, total_count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  except Exception as e:
539
- logger.error(f"Error getting conversations: {e}")
540
  raise
541
 
 
542
  # Pagination helper function
543
  def get_paginated_results(query, params=None, page=1, page_size=20):
544
  try:
@@ -564,6 +1053,7 @@ def get_paginated_results(query, params=None, page=1, page_size=20):
564
  logger.error(f"Error retrieving paginated results: {e}")
565
  raise
566
 
 
567
  def get_all_collections(page=1, page_size=20):
568
  try:
569
  query = "SELECT name FROM rag_qa_keyword_collections"
@@ -575,24 +1065,79 @@ def get_all_collections(page=1, page_size=20):
575
  logger.error(f"Error getting collections: {e}")
576
  raise
577
 
578
- def search_conversations_by_keywords(keywords, page=1, page_size=20):
 
579
  try:
580
- placeholders = ','.join(['?' for _ in keywords])
581
- query = f'''
582
- SELECT DISTINCT cm.conversation_id, cm.title
583
  FROM conversation_metadata cm
584
- JOIN rag_qa_conversation_keywords ck ON cm.conversation_id = ck.conversation_id
585
- JOIN rag_qa_keywords k ON ck.keyword_id = k.id
586
- WHERE k.keyword IN ({placeholders})
587
- '''
588
- results, total_pages, total_count = get_paginated_results(query, tuple(keywords), page, page_size)
589
- logger.info(
590
- f"Found {total_count} conversations matching keywords: {', '.join(keywords)} (page {page} of {total_pages})")
591
- return results, total_pages, total_count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
  except Exception as e:
593
- logger.error(f"Error searching conversations by keywords {keywords}: {e}")
594
  raise
595
 
 
596
  def load_chat_history(conversation_id, page=1, page_size=50):
597
  try:
598
  query = "SELECT role, content FROM rag_qa_chats WHERE conversation_id = ? ORDER BY timestamp"
@@ -604,6 +1149,7 @@ def load_chat_history(conversation_id, page=1, page_size=50):
604
  logger.error(f"Error loading chat history for conversation '{conversation_id}': {e}")
605
  raise
606
 
 
607
  def update_conversation_title(conversation_id, new_title):
608
  """Update the title of a conversation."""
609
  try:
@@ -614,6 +1160,59 @@ def update_conversation_title(conversation_id, new_title):
614
  logger.error(f"Error updating conversation title: {e}")
615
  raise
616
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617
  def delete_conversation(conversation_id):
618
  """Delete a conversation and its associated messages and notes."""
619
  try:
@@ -633,11 +1232,203 @@ def delete_conversation(conversation_id):
633
  logger.error(f"Error deleting conversation '{conversation_id}': {e}")
634
  raise
635
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636
  #
637
  # End of Chat-related functions
638
  ###################################################
639
 
640
 
 
 
 
 
 
 
 
 
 
 
641
  ###################################################
642
  #
643
  # Functions to export DB data
 
4
  # Imports
5
  import configparser
6
  import logging
7
+ import os
8
  import re
9
  import sqlite3
10
  import uuid
11
  from contextlib import contextmanager
12
  from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import List, Dict, Any, Tuple, Optional
 
15
  #
16
  # External Imports
17
  # (No external imports)
18
  #
19
  # Local Imports
20
+ from App_Function_Libraries.Utils.Utils import get_project_relative_path, get_project_root
21
+
22
  #
23
  ########################################################################################################################
24
  #
25
  # Functions:
26
 
27
+ def get_rag_qa_db_path():
28
+ config_path = os.path.join(get_project_root(), 'Config_Files', 'config.txt')
29
+ config = configparser.ConfigParser()
30
+ config.read(config_path)
31
+ if config.has_section('Database') and config.has_option('Database', 'rag_qa_db_path'):
32
+ rag_qa_db_path = config.get('Database', 'rag_qa_db_path')
33
+ if not os.path.isabs(rag_qa_db_path):
34
+ rag_qa_db_path = get_project_relative_path(rag_qa_db_path)
35
+ return rag_qa_db_path
36
+ else:
37
+ raise ValueError("Database path not found in config file")
 
 
 
38
 
39
  # Set up logging
40
  logging.basicConfig(level=logging.INFO)
 
56
  conversation_id TEXT PRIMARY KEY,
57
  created_at DATETIME NOT NULL,
58
  last_updated DATETIME NOT NULL,
59
+ title TEXT NOT NULL,
60
+ media_id INTEGER,
61
+ rating INTEGER CHECK(rating BETWEEN 1 AND 3)
62
  );
63
 
64
  -- Table for storing keywords
 
122
  CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_collection_id ON rag_qa_collection_keywords(collection_id);
123
  CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_keyword_id ON rag_qa_collection_keywords(keyword_id);
124
 
125
+ -- Full-text search virtual tables
126
+ CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_chats_fts USING fts5(
127
+ content,
128
+ content='rag_qa_chats',
129
+ content_rowid='id'
130
+ );
131
+
132
+ -- FTS table for conversation metadata
133
+ CREATE VIRTUAL TABLE IF NOT EXISTS conversation_metadata_fts USING fts5(
134
+ title,
135
+ content='conversation_metadata',
136
+ content_rowid='rowid'
137
+ );
138
+
139
+ -- FTS table for keywords
140
+ CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_keywords_fts USING fts5(
141
+ keyword,
142
+ content='rag_qa_keywords',
143
+ content_rowid='id'
144
+ );
145
+
146
+ -- FTS table for keyword collections
147
+ CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_keyword_collections_fts USING fts5(
148
+ name,
149
+ content='rag_qa_keyword_collections',
150
+ content_rowid='id'
151
+ );
152
+
153
+ -- FTS table for notes
154
+ CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_notes_fts USING fts5(
155
+ title,
156
+ content,
157
+ content='rag_qa_notes',
158
+ content_rowid='id'
159
+ );
160
+ -- FTS table for notes (modified to include both title and content)
161
+ CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_notes_fts USING fts5(
162
+ title,
163
+ content,
164
+ content='rag_qa_notes',
165
+ content_rowid='id'
166
+ );
167
 
168
+ -- Triggers for maintaining FTS indexes
169
+ -- Triggers for rag_qa_chats
170
  CREATE TRIGGER IF NOT EXISTS rag_qa_chats_ai AFTER INSERT ON rag_qa_chats BEGIN
171
+ INSERT INTO rag_qa_chats_fts(rowid, content)
172
+ VALUES (new.id, new.content);
173
+ END;
174
+
175
+ CREATE TRIGGER IF NOT EXISTS rag_qa_chats_au AFTER UPDATE ON rag_qa_chats BEGIN
176
+ UPDATE rag_qa_chats_fts
177
+ SET content = new.content
178
+ WHERE rowid = old.id;
179
+ END;
180
+
181
+ CREATE TRIGGER IF NOT EXISTS rag_qa_chats_ad AFTER DELETE ON rag_qa_chats BEGIN
182
+ DELETE FROM rag_qa_chats_fts WHERE rowid = old.id;
183
+ END;
184
+
185
+ -- Triggers for conversation_metadata
186
+ CREATE TRIGGER IF NOT EXISTS conversation_metadata_ai AFTER INSERT ON conversation_metadata BEGIN
187
+ INSERT INTO conversation_metadata_fts(rowid, title)
188
+ VALUES (new.rowid, new.title);
189
+ END;
190
+
191
+ CREATE TRIGGER IF NOT EXISTS conversation_metadata_au AFTER UPDATE ON conversation_metadata BEGIN
192
+ UPDATE conversation_metadata_fts
193
+ SET title = new.title
194
+ WHERE rowid = old.rowid;
195
+ END;
196
+
197
+ CREATE TRIGGER IF NOT EXISTS conversation_metadata_ad AFTER DELETE ON conversation_metadata BEGIN
198
+ DELETE FROM conversation_metadata_fts WHERE rowid = old.rowid;
199
+ END;
200
+
201
+ -- Triggers for rag_qa_keywords
202
+ CREATE TRIGGER IF NOT EXISTS rag_qa_keywords_ai AFTER INSERT ON rag_qa_keywords BEGIN
203
+ INSERT INTO rag_qa_keywords_fts(rowid, keyword)
204
+ VALUES (new.id, new.keyword);
205
+ END;
206
+
207
+ CREATE TRIGGER IF NOT EXISTS rag_qa_keywords_au AFTER UPDATE ON rag_qa_keywords BEGIN
208
+ UPDATE rag_qa_keywords_fts
209
+ SET keyword = new.keyword
210
+ WHERE rowid = old.id;
211
+ END;
212
+
213
+ CREATE TRIGGER IF NOT EXISTS rag_qa_keywords_ad AFTER DELETE ON rag_qa_keywords BEGIN
214
+ DELETE FROM rag_qa_keywords_fts WHERE rowid = old.id;
215
+ END;
216
+
217
+ -- Triggers for rag_qa_keyword_collections
218
+ CREATE TRIGGER IF NOT EXISTS rag_qa_keyword_collections_ai AFTER INSERT ON rag_qa_keyword_collections BEGIN
219
+ INSERT INTO rag_qa_keyword_collections_fts(rowid, name)
220
+ VALUES (new.id, new.name);
221
+ END;
222
+
223
+ CREATE TRIGGER IF NOT EXISTS rag_qa_keyword_collections_au AFTER UPDATE ON rag_qa_keyword_collections BEGIN
224
+ UPDATE rag_qa_keyword_collections_fts
225
+ SET name = new.name
226
+ WHERE rowid = old.id;
227
+ END;
228
+
229
+ CREATE TRIGGER IF NOT EXISTS rag_qa_keyword_collections_ad AFTER DELETE ON rag_qa_keyword_collections BEGIN
230
+ DELETE FROM rag_qa_keyword_collections_fts WHERE rowid = old.id;
231
+ END;
232
+
233
+ -- Triggers for rag_qa_notes
234
+ CREATE TRIGGER IF NOT EXISTS rag_qa_notes_ai AFTER INSERT ON rag_qa_notes BEGIN
235
+ INSERT INTO rag_qa_notes_fts(rowid, title, content)
236
+ VALUES (new.id, new.title, new.content);
237
+ END;
238
+
239
+ CREATE TRIGGER IF NOT EXISTS rag_qa_notes_au AFTER UPDATE ON rag_qa_notes BEGIN
240
+ UPDATE rag_qa_notes_fts
241
+ SET title = new.title,
242
+ content = new.content
243
+ WHERE rowid = old.id;
244
+ END;
245
+
246
+ CREATE TRIGGER IF NOT EXISTS rag_qa_notes_ad AFTER DELETE ON rag_qa_notes BEGIN
247
+ DELETE FROM rag_qa_notes_fts WHERE rowid = old.id;
248
  END;
249
  '''
250
 
251
  # Database connection management
252
  @contextmanager
253
  def get_db_connection():
254
+ db_path = get_rag_qa_db_path()
255
+ conn = sqlite3.connect(db_path)
256
  try:
257
  yield conn
258
  finally:
 
286
  conn.commit()
287
  return cursor.fetchall()
288
 
289
+
290
  def create_tables():
291
+ """Create database tables and initialize FTS indexes."""
292
  with get_db_connection() as conn:
293
+ cursor = conn.cursor()
294
+ # Execute the SCHEMA_SQL to create tables and triggers
295
+ cursor.executescript(SCHEMA_SQL)
296
+
297
+ # Check and populate all FTS tables
298
+ fts_tables = [
299
+ ('rag_qa_notes_fts', 'rag_qa_notes', ['title', 'content']),
300
+ ('rag_qa_chats_fts', 'rag_qa_chats', ['content']),
301
+ ('conversation_metadata_fts', 'conversation_metadata', ['title']),
302
+ ('rag_qa_keywords_fts', 'rag_qa_keywords', ['keyword']),
303
+ ('rag_qa_keyword_collections_fts', 'rag_qa_keyword_collections', ['name'])
304
+ ]
305
+
306
+ for fts_table, source_table, columns in fts_tables:
307
+ # Check if FTS table needs population
308
+ cursor.execute(f"SELECT COUNT(*) FROM {fts_table}")
309
+ fts_count = cursor.fetchone()[0]
310
+ cursor.execute(f"SELECT COUNT(*) FROM {source_table}")
311
+ source_count = cursor.fetchone()[0]
312
+
313
+ if fts_count != source_count:
314
+ # Repopulate FTS table
315
+ logger.info(f"Repopulating {fts_table}")
316
+ cursor.execute(f"DELETE FROM {fts_table}")
317
+ columns_str = ', '.join(columns)
318
+ source_columns = ', '.join([f"id" if source_table != 'conversation_metadata' else "rowid"] + columns)
319
+ cursor.execute(f"""
320
+ INSERT INTO {fts_table}(rowid, {columns_str})
321
+ SELECT {source_columns} FROM {source_table}
322
+ """)
323
+
324
+ logger.info("All RAG QA Chat tables and triggers created successfully")
325
+
326
 
327
  # Initialize the database
328
  create_tables()
 
348
  raise ValueError("Keyword contains invalid characters")
349
  return keyword.strip()
350
 
351
+
352
  def validate_collection_name(name):
353
  if not isinstance(name, str):
354
  raise ValueError("Collection name must be a string")
 
360
  raise ValueError("Collection name contains invalid characters")
361
  return name.strip()
362
 
363
+
364
  # Core functions
365
  def add_keyword(keyword, conn=None):
366
  try:
 
375
  logger.error(f"Error adding keyword '{keyword}': {e}")
376
  raise
377
 
378
+
379
  def create_keyword_collection(name, parent_id=None):
380
  try:
381
  validated_name = validate_collection_name(name)
 
389
  logger.error(f"Error creating keyword collection '{name}': {e}")
390
  raise
391
 
392
+
393
  def add_keyword_to_collection(collection_name, keyword):
394
  try:
395
  validated_collection_name = validate_collection_name(collection_name)
 
414
  logger.error(f"Error adding keyword '{keyword}' to collection '{collection_name}': {e}")
415
  raise
416
 
417
+
418
  def add_keywords_to_conversation(conversation_id, keywords):
419
  if not isinstance(keywords, (list, tuple)):
420
  raise ValueError("Keywords must be a list or tuple")
 
438
  logger.error(f"Error adding keywords to conversation '{conversation_id}': {e}")
439
  raise
440
 
441
+
442
+ def view_rag_keywords():
443
+ try:
444
+ rag_db_path = get_rag_qa_db_path()
445
+ with sqlite3.connect(rag_db_path) as conn:
446
+ cursor = conn.cursor()
447
+ cursor.execute("SELECT keyword FROM rag_qa_keywords ORDER BY keyword")
448
+ keywords = cursor.fetchall()
449
+ if keywords:
450
+ keyword_list = [k[0] for k in keywords]
451
+ return "### Current RAG QA Keywords:\n" + "\n".join(
452
+ [f"- {k}" for k in keyword_list])
453
+ return "No keywords found."
454
+ except Exception as e:
455
+ return f"Error retrieving keywords: {str(e)}"
456
+
457
+
458
  def get_keywords_for_conversation(conversation_id):
459
  try:
460
  query = '''
 
471
  logger.error(f"Error getting keywords for conversation '{conversation_id}': {e}")
472
  raise
473
 
474
+
475
  def get_keywords_for_collection(collection_name):
476
  try:
477
  query = '''
 
489
  logger.error(f"Error getting keywords for collection '{collection_name}': {e}")
490
  raise
491
 
492
+
493
+ def delete_rag_keyword(keyword: str) -> str:
494
+ """
495
+ Delete a keyword from the RAG QA database and all its associations.
496
+
497
+ Args:
498
+ keyword (str): The keyword to delete
499
+
500
+ Returns:
501
+ str: Success/failure message
502
+ """
503
+ try:
504
+ # Validate the keyword
505
+ validated_keyword = validate_keyword(keyword)
506
+
507
+ with transaction() as conn:
508
+ # First, get the keyword ID
509
+ cursor = conn.cursor()
510
+ cursor.execute("SELECT id FROM rag_qa_keywords WHERE keyword = ?", (validated_keyword,))
511
+ result = cursor.fetchone()
512
+
513
+ if not result:
514
+ return f"Keyword '{validated_keyword}' not found."
515
+
516
+ keyword_id = result[0]
517
+
518
+ # Delete from all associated tables
519
+ cursor.execute("DELETE FROM rag_qa_conversation_keywords WHERE keyword_id = ?", (keyword_id,))
520
+ cursor.execute("DELETE FROM rag_qa_collection_keywords WHERE keyword_id = ?", (keyword_id,))
521
+ cursor.execute("DELETE FROM rag_qa_note_keywords WHERE keyword_id = ?", (keyword_id,))
522
+
523
+ # Finally, delete the keyword itself
524
+ cursor.execute("DELETE FROM rag_qa_keywords WHERE id = ?", (keyword_id,))
525
+
526
+ logger.info(f"Keyword '{validated_keyword}' deleted successfully")
527
+ return f"Successfully deleted keyword '{validated_keyword}' and all its associations."
528
+
529
+ except ValueError as e:
530
+ error_msg = f"Invalid keyword: {str(e)}"
531
+ logger.error(error_msg)
532
+ return error_msg
533
+ except Exception as e:
534
+ error_msg = f"Error deleting keyword: {str(e)}"
535
+ logger.error(error_msg)
536
+ return error_msg
537
+
538
+
539
+ def export_rag_keywords_to_csv() -> Tuple[str, str]:
540
+ """
541
+ Export all RAG QA keywords to a CSV file.
542
+
543
+ Returns:
544
+ Tuple[str, str]: (status_message, file_path)
545
+ """
546
+ import csv
547
+ from tempfile import NamedTemporaryFile
548
+ from datetime import datetime
549
+
550
+ try:
551
+ # Create a temporary CSV file
552
+ temp_file = NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', newline='')
553
+
554
+ with transaction() as conn:
555
+ cursor = conn.cursor()
556
+
557
+ # Get all keywords and their associations
558
+ query = """
559
+ SELECT
560
+ k.keyword,
561
+ GROUP_CONCAT(DISTINCT c.name) as collections,
562
+ COUNT(DISTINCT ck.conversation_id) as num_conversations,
563
+ COUNT(DISTINCT nk.note_id) as num_notes
564
+ FROM rag_qa_keywords k
565
+ LEFT JOIN rag_qa_collection_keywords col_k ON k.id = col_k.keyword_id
566
+ LEFT JOIN rag_qa_keyword_collections c ON col_k.collection_id = c.id
567
+ LEFT JOIN rag_qa_conversation_keywords ck ON k.id = ck.keyword_id
568
+ LEFT JOIN rag_qa_note_keywords nk ON k.id = nk.keyword_id
569
+ GROUP BY k.id, k.keyword
570
+ ORDER BY k.keyword
571
+ """
572
+
573
+ cursor.execute(query)
574
+ results = cursor.fetchall()
575
+
576
+ # Write to CSV
577
+ writer = csv.writer(temp_file)
578
+ writer.writerow(['Keyword', 'Collections', 'Number of Conversations', 'Number of Notes'])
579
+
580
+ for row in results:
581
+ writer.writerow([
582
+ row[0], # keyword
583
+ row[1] if row[1] else '', # collections (may be None)
584
+ row[2], # num_conversations
585
+ row[3] # num_notes
586
+ ])
587
+
588
+ temp_file.close()
589
+
590
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
591
+ status_msg = f"Successfully exported {len(results)} keywords to CSV."
592
+ logger.info(status_msg)
593
+
594
+ return status_msg, temp_file.name
595
+
596
+ except Exception as e:
597
+ error_msg = f"Error exporting keywords: {str(e)}"
598
+ logger.error(error_msg)
599
+ return error_msg, ""
600
+
601
+
602
  #
603
  # End of Keyword-related functions
604
  ###################################################
 
623
  logger.error(f"Error saving notes for conversation '{conversation_id}': {e}")
624
  raise
625
 
626
+
627
  def update_note(note_id, title, content):
628
  try:
629
  query = "UPDATE rag_qa_notes SET title = ?, content = ?, timestamp = ? WHERE id = ?"
 
634
  logger.error(f"Error updating note ID '{note_id}': {e}")
635
  raise
636
 
637
+
638
+ def search_notes_titles(search_term: str, page: int = 1, results_per_page: int = 20, connection=None) -> Tuple[
639
+ List[Tuple], int, int]:
640
+ """
641
+ Search note titles using full-text search. Returns all notes if search_term is empty.
642
+
643
+ Args:
644
+ search_term (str): The search term for note titles. If empty, returns all notes.
645
+ page (int, optional): Page number for pagination. Defaults to 1.
646
+ results_per_page (int, optional): Number of results per page. Defaults to 20.
647
+ connection (sqlite3.Connection, optional): Database connection. Uses new connection if not provided.
648
+
649
+ Returns:
650
+ Tuple[List[Tuple], int, int]: Tuple containing:
651
+ - List of tuples: (note_id, title, content, timestamp, conversation_id)
652
+ - Total number of pages
653
+ - Total count of matching records
654
+
655
+ Raises:
656
+ ValueError: If page number is less than 1
657
+ sqlite3.Error: If there's a database error
658
+ """
659
+ if page < 1:
660
+ raise ValueError("Page number must be 1 or greater.")
661
+
662
+ offset = (page - 1) * results_per_page
663
+
664
+ def execute_search(conn):
665
+ cursor = conn.cursor()
666
+
667
+ # Debug: Show table contents
668
+ cursor.execute("SELECT title FROM rag_qa_notes")
669
+ main_titles = cursor.fetchall()
670
+ logger.debug(f"Main table titles: {main_titles}")
671
+
672
+ cursor.execute("SELECT title FROM rag_qa_notes_fts")
673
+ fts_titles = cursor.fetchall()
674
+ logger.debug(f"FTS table titles: {fts_titles}")
675
+
676
+ if not search_term.strip():
677
+ # Query for all notes
678
+ cursor.execute(
679
+ """
680
+ SELECT COUNT(*)
681
+ FROM rag_qa_notes
682
+ """
683
+ )
684
+ total_count = cursor.fetchone()[0]
685
+
686
+ cursor.execute(
687
+ """
688
+ SELECT id, title, content, timestamp, conversation_id
689
+ FROM rag_qa_notes
690
+ ORDER BY timestamp DESC
691
+ LIMIT ? OFFSET ?
692
+ """,
693
+ (results_per_page, offset)
694
+ )
695
+ results = cursor.fetchall()
696
+ else:
697
+ # Search query
698
+ search_term_clean = search_term.strip().lower()
699
+
700
+ # Test direct FTS search
701
+ cursor.execute(
702
+ """
703
+ SELECT COUNT(*)
704
+ FROM rag_qa_notes n
705
+ JOIN rag_qa_notes_fts fts ON n.id = fts.rowid
706
+ WHERE fts.title MATCH ?
707
+ """,
708
+ (search_term_clean,)
709
+ )
710
+ total_count = cursor.fetchone()[0]
711
+
712
+ cursor.execute(
713
+ """
714
+ SELECT
715
+ n.id,
716
+ n.title,
717
+ n.content,
718
+ n.timestamp,
719
+ n.conversation_id
720
+ FROM rag_qa_notes n
721
+ JOIN rag_qa_notes_fts fts ON n.id = fts.rowid
722
+ WHERE fts.title MATCH ?
723
+ ORDER BY rank
724
+ LIMIT ? OFFSET ?
725
+ """,
726
+ (search_term_clean, results_per_page, offset)
727
+ )
728
+ results = cursor.fetchall()
729
+
730
+ logger.debug(f"Search term: {search_term_clean}")
731
+ logger.debug(f"Results: {results}")
732
+
733
+ total_pages = max(1, (total_count + results_per_page - 1) // results_per_page)
734
+ logger.info(f"Found {total_count} matching notes for search term '{search_term}'")
735
+
736
+ return results, total_pages, total_count
737
+
738
+ try:
739
+ if connection:
740
+ return execute_search(connection)
741
+ else:
742
+ with get_db_connection() as conn:
743
+ return execute_search(conn)
744
+
745
+ except sqlite3.Error as e:
746
+ logger.error(f"Database error in search_notes_titles: {str(e)}")
747
+ logger.error(f"Search term: {search_term}")
748
+ raise sqlite3.Error(f"Error searching notes: {str(e)}")
749
+
750
+
751
+
752
  def get_notes(conversation_id):
753
  """Retrieve notes for a given conversation."""
754
  try:
 
761
  logger.error(f"Error getting notes for conversation '{conversation_id}': {e}")
762
  raise
763
 
764
+
765
  def get_note_by_id(note_id):
766
  try:
767
  query = "SELECT id, title, content FROM rag_qa_notes WHERE id = ?"
 
771
  logger.error(f"Error getting note by ID '{note_id}': {e}")
772
  raise
773
 
774
+
775
  def get_notes_by_keywords(keywords, page=1, page_size=20):
776
  try:
777
+ # Handle empty or invalid keywords
778
+ if not keywords or not isinstance(keywords, (list, tuple)) or len(keywords) == 0:
779
+ return [], 0, 0
780
+
781
+ # Convert all keywords to strings and strip them
782
+ clean_keywords = [str(k).strip() for k in keywords if k is not None and str(k).strip()]
783
+
784
+ # If no valid keywords after cleaning, return empty result
785
+ if not clean_keywords:
786
+ return [], 0, 0
787
+
788
+ placeholders = ','.join(['?'] * len(clean_keywords))
789
  query = f'''
790
  SELECT n.id, n.title, n.content, n.timestamp
791
  FROM rag_qa_notes n
 
794
  WHERE k.keyword IN ({placeholders})
795
  ORDER BY n.timestamp DESC
796
  '''
797
+ results, total_pages, total_count = get_paginated_results(query, tuple(clean_keywords), page, page_size)
798
+ logger.info(f"Retrieved {len(results)} notes matching keywords: {', '.join(clean_keywords)} (page {page} of {total_pages})")
799
  notes = [(row[0], row[1], row[2], row[3]) for row in results]
800
  return notes, total_pages, total_count
801
  except Exception as e:
802
  logger.error(f"Error getting notes by keywords: {e}")
803
  raise
804
 
805
+
806
  def get_notes_by_keyword_collection(collection_name, page=1, page_size=20):
807
  try:
808
  query = '''
 
915
  #
916
  # Chat-related functions
917
 
918
+ def save_message(conversation_id, role, content, timestamp=None):
919
  try:
920
+ if timestamp is None:
921
+ timestamp = datetime.now().isoformat()
922
  query = "INSERT INTO rag_qa_chats (conversation_id, timestamp, role, content) VALUES (?, ?, ?, ?)"
923
  execute_query(query, (conversation_id, timestamp, role, content))
924
 
 
931
  logger.error(f"Error saving message for conversation '{conversation_id}': {e}")
932
  raise
933
 
934
+
935
+ def start_new_conversation(title="Untitled Conversation", media_id=None):
936
  try:
937
  conversation_id = str(uuid.uuid4())
938
+ query = """
939
+ INSERT INTO conversation_metadata (
940
+ conversation_id, created_at, last_updated, title, media_id, rating
941
+ ) VALUES (?, ?, ?, ?, ?, ?)
942
+ """
943
  now = datetime.now().isoformat()
944
+ # Set initial rating to NULL
945
+ execute_query(query, (conversation_id, now, now, title, media_id, None))
946
+ logger.info(f"New conversation '{conversation_id}' started with title '{title}' and media_id '{media_id}'")
947
  return conversation_id
948
  except Exception as e:
949
  logger.error(f"Error starting new conversation: {e}")
950
  raise
951
 
952
+
953
  def get_all_conversations(page=1, page_size=20):
954
  try:
955
+ query = """
956
+ SELECT conversation_id, title, media_id, rating
957
+ FROM conversation_metadata
958
+ ORDER BY last_updated DESC
959
+ LIMIT ? OFFSET ?
960
+ """
961
+
962
+ count_query = "SELECT COUNT(*) FROM conversation_metadata"
963
+ db_path = get_rag_qa_db_path()
964
+ with sqlite3.connect(db_path) as conn:
965
+ cursor = conn.cursor()
966
+
967
+ # Get total count
968
+ cursor.execute(count_query)
969
+ total_count = cursor.fetchone()[0]
970
+ total_pages = (total_count + page_size - 1) // page_size
971
+
972
+ # Get page of results
973
+ offset = (page - 1) * page_size
974
+ cursor.execute(query, (page_size, offset))
975
+ results = cursor.fetchall()
976
+
977
+ conversations = [{
978
+ 'conversation_id': row[0],
979
+ 'title': row[1],
980
+ 'media_id': row[2],
981
+ 'rating': row[3] # Include rating
982
+ } for row in results]
983
+ return conversations, total_pages, total_count
984
+ except Exception as e:
985
+ logging.error(f"Error getting conversations: {e}")
986
+ raise
987
+
988
+
989
+ def get_all_notes(page=1, page_size=20):
990
+ try:
991
+ query = """
992
+ SELECT n.id, n.conversation_id, n.title, n.content, n.timestamp,
993
+ cm.title as conversation_title, cm.media_id
994
+ FROM rag_qa_notes n
995
+ LEFT JOIN conversation_metadata cm ON n.conversation_id = cm.conversation_id
996
+ ORDER BY n.timestamp DESC
997
+ LIMIT ? OFFSET ?
998
+ """
999
+
1000
+ count_query = "SELECT COUNT(*) FROM rag_qa_notes"
1001
+ db_path = get_rag_qa_db_path()
1002
+ with sqlite3.connect(db_path) as conn:
1003
+ cursor = conn.cursor()
1004
+
1005
+ # Get total count
1006
+ cursor.execute(count_query)
1007
+ total_count = cursor.fetchone()[0]
1008
+ total_pages = (total_count + page_size - 1) // page_size
1009
+
1010
+ # Get page of results
1011
+ offset = (page - 1) * page_size
1012
+ cursor.execute(query, (page_size, offset))
1013
+ results = cursor.fetchall()
1014
+
1015
+ notes = [{
1016
+ 'id': row[0],
1017
+ 'conversation_id': row[1],
1018
+ 'title': row[2],
1019
+ 'content': row[3],
1020
+ 'timestamp': row[4],
1021
+ 'conversation_title': row[5],
1022
+ 'media_id': row[6]
1023
+ } for row in results]
1024
+
1025
+ return notes, total_pages, total_count
1026
  except Exception as e:
1027
+ logging.error(f"Error getting notes: {e}")
1028
  raise
1029
 
1030
+
1031
  # Pagination helper function
1032
  def get_paginated_results(query, params=None, page=1, page_size=20):
1033
  try:
 
1053
  logger.error(f"Error retrieving paginated results: {e}")
1054
  raise
1055
 
1056
+
1057
  def get_all_collections(page=1, page_size=20):
1058
  try:
1059
  query = "SELECT name FROM rag_qa_keyword_collections"
 
1065
  logger.error(f"Error getting collections: {e}")
1066
  raise
1067
 
1068
+
1069
+ def search_conversations_by_keywords(keywords=None, title_query=None, content_query=None, page=1, page_size=20):
1070
  try:
1071
+ # Base query starts with conversation metadata
1072
+ query = """
1073
+ SELECT DISTINCT cm.conversation_id, cm.title, cm.last_updated
1074
  FROM conversation_metadata cm
1075
+ WHERE 1=1
1076
+ """
1077
+ params = []
1078
+
1079
+ # Add content search if provided
1080
+ if content_query and isinstance(content_query, str) and content_query.strip():
1081
+ query += """
1082
+ AND EXISTS (
1083
+ SELECT 1 FROM rag_qa_chats_fts
1084
+ WHERE rag_qa_chats_fts.content MATCH ?
1085
+ AND rag_qa_chats_fts.rowid IN (
1086
+ SELECT id FROM rag_qa_chats
1087
+ WHERE conversation_id = cm.conversation_id
1088
+ )
1089
+ )
1090
+ """
1091
+ params.append(content_query.strip())
1092
+
1093
+ # Add title search if provided
1094
+ if title_query and isinstance(title_query, str) and title_query.strip():
1095
+ query += """
1096
+ AND EXISTS (
1097
+ SELECT 1 FROM conversation_metadata_fts
1098
+ WHERE conversation_metadata_fts.title MATCH ?
1099
+ AND conversation_metadata_fts.rowid = cm.rowid
1100
+ )
1101
+ """
1102
+ params.append(title_query.strip())
1103
+
1104
+ # Add keyword search if provided
1105
+ if keywords and isinstance(keywords, (list, tuple)) and len(keywords) > 0:
1106
+ # Convert all keywords to strings and strip them
1107
+ clean_keywords = [str(k).strip() for k in keywords if k is not None and str(k).strip()]
1108
+ if clean_keywords: # Only add to query if we have valid keywords
1109
+ placeholders = ','.join(['?' for _ in clean_keywords])
1110
+ query += f"""
1111
+ AND EXISTS (
1112
+ SELECT 1 FROM rag_qa_conversation_keywords ck
1113
+ JOIN rag_qa_keywords k ON ck.keyword_id = k.id
1114
+ WHERE ck.conversation_id = cm.conversation_id
1115
+ AND k.keyword IN ({placeholders})
1116
+ )
1117
+ """
1118
+ params.extend(clean_keywords)
1119
+
1120
+ # Add ordering
1121
+ query += " ORDER BY cm.last_updated DESC"
1122
+
1123
+ results, total_pages, total_count = get_paginated_results(query, tuple(params), page, page_size)
1124
+
1125
+ conversations = [
1126
+ {
1127
+ 'conversation_id': row[0],
1128
+ 'title': row[1],
1129
+ 'last_updated': row[2]
1130
+ }
1131
+ for row in results
1132
+ ]
1133
+
1134
+ return conversations, total_pages, total_count
1135
+
1136
  except Exception as e:
1137
+ logger.error(f"Error searching conversations: {e}")
1138
  raise
1139
 
1140
+
1141
  def load_chat_history(conversation_id, page=1, page_size=50):
1142
  try:
1143
  query = "SELECT role, content FROM rag_qa_chats WHERE conversation_id = ? ORDER BY timestamp"
 
1149
  logger.error(f"Error loading chat history for conversation '{conversation_id}': {e}")
1150
  raise
1151
 
1152
+
1153
  def update_conversation_title(conversation_id, new_title):
1154
  """Update the title of a conversation."""
1155
  try:
 
1160
  logger.error(f"Error updating conversation title: {e}")
1161
  raise
1162
 
1163
+
1164
+ def delete_messages_in_conversation(conversation_id):
1165
+ """Helper function to delete all messages in a conversation."""
1166
+ try:
1167
+ execute_query("DELETE FROM rag_qa_chats WHERE conversation_id = ?", (conversation_id,))
1168
+ logging.info(f"Messages in conversation '{conversation_id}' deleted successfully.")
1169
+ except Exception as e:
1170
+ logging.error(f"Error deleting messages in conversation '{conversation_id}': {e}")
1171
+ raise
1172
+
1173
+
1174
+ def get_conversation_title(conversation_id):
1175
+ """Helper function to get the conversation title."""
1176
+ query = "SELECT title FROM conversation_metadata WHERE conversation_id = ?"
1177
+ result = execute_query(query, (conversation_id,))
1178
+ if result:
1179
+ return result[0][0]
1180
+ else:
1181
+ return "Untitled Conversation"
1182
+
1183
+
1184
+ def get_conversation_text(conversation_id):
1185
+ try:
1186
+ query = """
1187
+ SELECT role, content
1188
+ FROM rag_qa_chats
1189
+ WHERE conversation_id = ?
1190
+ ORDER BY timestamp ASC
1191
+ """
1192
+
1193
+ messages = []
1194
+ # Use the connection as a context manager
1195
+ db_path = get_rag_qa_db_path()
1196
+ with sqlite3.connect(db_path) as conn:
1197
+ cursor = conn.cursor()
1198
+ cursor.execute(query, (conversation_id,))
1199
+ messages = cursor.fetchall()
1200
+
1201
+ return "\n\n".join([f"{msg[0]}: {msg[1]}" for msg in messages])
1202
+ except Exception as e:
1203
+ logger.error(f"Error getting conversation text: {e}")
1204
+ raise
1205
+
1206
+
1207
+ def get_conversation_details(conversation_id):
1208
+ query = "SELECT title, media_id, rating FROM conversation_metadata WHERE conversation_id = ?"
1209
+ result = execute_query(query, (conversation_id,))
1210
+ if result:
1211
+ return {'title': result[0][0], 'media_id': result[0][1], 'rating': result[0][2]}
1212
+ else:
1213
+ return {'title': "Untitled Conversation", 'media_id': None, 'rating': None}
1214
+
1215
+
1216
  def delete_conversation(conversation_id):
1217
  """Delete a conversation and its associated messages and notes."""
1218
  try:
 
1232
  logger.error(f"Error deleting conversation '{conversation_id}': {e}")
1233
  raise
1234
 
1235
+ def set_conversation_rating(conversation_id, rating):
1236
+ """Set the rating for a conversation."""
1237
+ # Validate rating
1238
+ if rating not in [1, 2, 3]:
1239
+ raise ValueError('Rating must be an integer between 1 and 3.')
1240
+ try:
1241
+ query = "UPDATE conversation_metadata SET rating = ? WHERE conversation_id = ?"
1242
+ execute_query(query, (rating, conversation_id))
1243
+ logger.info(f"Rating for conversation '{conversation_id}' set to {rating}")
1244
+ except Exception as e:
1245
+ logger.error(f"Error setting rating for conversation '{conversation_id}': {e}")
1246
+ raise
1247
+
1248
+ def get_conversation_rating(conversation_id):
1249
+ """Get the rating of a conversation."""
1250
+ try:
1251
+ query = "SELECT rating FROM conversation_metadata WHERE conversation_id = ?"
1252
+ result = execute_query(query, (conversation_id,))
1253
+ if result:
1254
+ rating = result[0][0]
1255
+ logger.info(f"Rating for conversation '{conversation_id}' is {rating}")
1256
+ return rating
1257
+ else:
1258
+ logger.warning(f"Conversation '{conversation_id}' not found.")
1259
+ return None
1260
+ except Exception as e:
1261
+ logger.error(f"Error getting rating for conversation '{conversation_id}': {e}")
1262
+ raise
1263
+
1264
+
1265
+ def get_conversation_name(conversation_id: str) -> str:
1266
+ """
1267
+ Retrieves the title/name of a conversation from the conversation_metadata table.
1268
+
1269
+ Args:
1270
+ conversation_id (str): The unique identifier of the conversation
1271
+
1272
+ Returns:
1273
+ str: The title of the conversation if found, "Untitled Conversation" if not found
1274
+
1275
+ Raises:
1276
+ sqlite3.Error: If there's a database error
1277
+ """
1278
+ try:
1279
+ with get_db_connection() as conn:
1280
+ cursor = conn.cursor()
1281
+ cursor.execute(
1282
+ "SELECT title FROM conversation_metadata WHERE conversation_id = ?",
1283
+ (conversation_id,)
1284
+ )
1285
+ result = cursor.fetchone()
1286
+
1287
+ if result:
1288
+ return result[0]
1289
+ else:
1290
+ logging.warning(f"No conversation found with ID: {conversation_id}")
1291
+ return "Untitled Conversation"
1292
+
1293
+ except sqlite3.Error as e:
1294
+ logging.error(f"Database error retrieving conversation name for ID {conversation_id}: {e}")
1295
+ raise
1296
+ except Exception as e:
1297
+ logging.error(f"Unexpected error retrieving conversation name for ID {conversation_id}: {e}")
1298
+ raise
1299
+
1300
+
1301
+ def search_rag_chat(query: str, fts_top_k: int = 10, relevant_media_ids: List[str] = None) -> List[Dict[str, Any]]:
1302
+ """
1303
+ Perform a full-text search on the RAG Chat database.
1304
+
1305
+ Args:
1306
+ query: Search query string.
1307
+ fts_top_k: Maximum number of results to return.
1308
+ relevant_media_ids: Optional list of media IDs to filter results.
1309
+
1310
+ Returns:
1311
+ List of search results with content and metadata.
1312
+ """
1313
+ if not query.strip():
1314
+ return []
1315
+
1316
+ try:
1317
+ db_path = get_rag_qa_db_path()
1318
+ with sqlite3.connect(db_path) as conn:
1319
+ cursor = conn.cursor()
1320
+ # Perform the full-text search using the FTS virtual table
1321
+ cursor.execute("""
1322
+ SELECT rag_qa_chats.id, rag_qa_chats.conversation_id, rag_qa_chats.role, rag_qa_chats.content
1323
+ FROM rag_qa_chats_fts
1324
+ JOIN rag_qa_chats ON rag_qa_chats_fts.rowid = rag_qa_chats.id
1325
+ WHERE rag_qa_chats_fts MATCH ?
1326
+ LIMIT ?
1327
+ """, (query, fts_top_k))
1328
+
1329
+ rows = cursor.fetchall()
1330
+ columns = [description[0] for description in cursor.description]
1331
+ results = [dict(zip(columns, row)) for row in rows]
1332
+
1333
+ # Filter by relevant_media_ids if provided
1334
+ if relevant_media_ids is not None:
1335
+ results = [
1336
+ r for r in results
1337
+ if get_conversation_details(r['conversation_id']).get('media_id') in relevant_media_ids
1338
+ ]
1339
+
1340
+ # Format results
1341
+ formatted_results = [
1342
+ {
1343
+ "content": r['content'],
1344
+ "metadata": {
1345
+ "conversation_id": r['conversation_id'],
1346
+ "role": r['role'],
1347
+ "media_id": get_conversation_details(r['conversation_id']).get('media_id')
1348
+ }
1349
+ }
1350
+ for r in results
1351
+ ]
1352
+ return formatted_results
1353
+
1354
+ except Exception as e:
1355
+ logging.error(f"Error in search_rag_chat: {e}")
1356
+ return []
1357
+
1358
+
1359
+ def search_rag_notes(query: str, fts_top_k: int = 10, relevant_media_ids: List[str] = None) -> List[Dict[str, Any]]:
1360
+ """
1361
+ Perform a full-text search on the RAG Notes database.
1362
+
1363
+ Args:
1364
+ query: Search query string.
1365
+ fts_top_k: Maximum number of results to return.
1366
+ relevant_media_ids: Optional list of media IDs to filter results.
1367
+
1368
+ Returns:
1369
+ List of search results with content and metadata.
1370
+ """
1371
+ if not query.strip():
1372
+ return []
1373
+
1374
+ try:
1375
+ db_path = get_rag_qa_db_path()
1376
+ with sqlite3.connect(db_path) as conn:
1377
+ cursor = conn.cursor()
1378
+ # Perform the full-text search using the FTS virtual table
1379
+ cursor.execute("""
1380
+ SELECT rag_qa_notes.id, rag_qa_notes.title, rag_qa_notes.content, rag_qa_notes.conversation_id
1381
+ FROM rag_qa_notes_fts
1382
+ JOIN rag_qa_notes ON rag_qa_notes_fts.rowid = rag_qa_notes.id
1383
+ WHERE rag_qa_notes_fts MATCH ?
1384
+ LIMIT ?
1385
+ """, (query, fts_top_k))
1386
+
1387
+ rows = cursor.fetchall()
1388
+ columns = [description[0] for description in cursor.description]
1389
+ results = [dict(zip(columns, row)) for row in rows]
1390
+
1391
+ # Filter by relevant_media_ids if provided
1392
+ if relevant_media_ids is not None:
1393
+ results = [
1394
+ r for r in results
1395
+ if get_conversation_details(r['conversation_id']).get('media_id') in relevant_media_ids
1396
+ ]
1397
+
1398
+ # Format results
1399
+ formatted_results = [
1400
+ {
1401
+ "content": r['content'],
1402
+ "metadata": {
1403
+ "note_id": r['id'],
1404
+ "title": r['title'],
1405
+ "conversation_id": r['conversation_id'],
1406
+ "media_id": get_conversation_details(r['conversation_id']).get('media_id')
1407
+ }
1408
+ }
1409
+ for r in results
1410
+ ]
1411
+ return formatted_results
1412
+
1413
+ except Exception as e:
1414
+ logging.error(f"Error in search_rag_notes: {e}")
1415
+ return []
1416
+
1417
  #
1418
  # End of Chat-related functions
1419
  ###################################################
1420
 
1421
 
1422
+ ###################################################
1423
+ #
1424
+ # Import functions
1425
+
1426
+
1427
+ #
1428
+ # End of Import functions
1429
+ ###################################################
1430
+
1431
+
1432
  ###################################################
1433
  #
1434
  # Functions to export DB data
App_Function_Libraries/DB/SQLite_DB.py CHANGED
@@ -21,7 +21,7 @@ import configparser
21
  # 11. browse_items(search_query, search_type)
22
  # 12. fetch_item_details(media_id: int)
23
  # 13. add_media_version(media_id: int, prompt: str, summary: str)
24
- # 14. search_db(search_query: str, search_fields: List[str], keywords: str, page: int = 1, results_per_page: int = 10)
25
  # 15. search_and_display(search_query, search_fields, keywords, page)
26
  # 16. display_details(index, results)
27
  # 17. get_details(index, dataframe)
@@ -55,12 +55,14 @@ import re
55
  import shutil
56
  import sqlite3
57
  import threading
 
58
  import traceback
59
  from contextlib import contextmanager
60
  from datetime import datetime, timedelta
61
  from typing import List, Tuple, Dict, Any, Optional
62
  from urllib.parse import quote
63
 
 
64
  # Local Libraries
65
  from App_Function_Libraries.Utils.Utils import get_project_relative_path, get_database_path, \
66
  get_database_dir
@@ -342,27 +344,6 @@ def create_tables(db) -> None:
342
  )
343
  ''',
344
  '''
345
- CREATE TABLE IF NOT EXISTS ChatConversations (
346
- id INTEGER PRIMARY KEY AUTOINCREMENT,
347
- media_id INTEGER,
348
- media_name TEXT,
349
- conversation_name TEXT,
350
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
351
- updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
352
- FOREIGN KEY (media_id) REFERENCES Media(id)
353
- )
354
- ''',
355
- '''
356
- CREATE TABLE IF NOT EXISTS ChatMessages (
357
- id INTEGER PRIMARY KEY AUTOINCREMENT,
358
- conversation_id INTEGER,
359
- sender TEXT,
360
- message TEXT,
361
- timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
362
- FOREIGN KEY (conversation_id) REFERENCES ChatConversations(id)
363
- )
364
- ''',
365
- '''
366
  CREATE TABLE IF NOT EXISTS Transcripts (
367
  id INTEGER PRIMARY KEY AUTOINCREMENT,
368
  media_id INTEGER,
@@ -421,8 +402,6 @@ def create_tables(db) -> None:
421
  'CREATE INDEX IF NOT EXISTS idx_mediakeywords_keyword_id ON MediaKeywords(keyword_id)',
422
  'CREATE INDEX IF NOT EXISTS idx_media_version_media_id ON MediaVersion(media_id)',
423
  'CREATE INDEX IF NOT EXISTS idx_mediamodifications_media_id ON MediaModifications(media_id)',
424
- 'CREATE INDEX IF NOT EXISTS idx_chatconversations_media_id ON ChatConversations(media_id)',
425
- 'CREATE INDEX IF NOT EXISTS idx_chatmessages_conversation_id ON ChatMessages(conversation_id)',
426
  'CREATE INDEX IF NOT EXISTS idx_media_is_trash ON Media(is_trash)',
427
  'CREATE INDEX IF NOT EXISTS idx_mediachunks_media_id ON MediaChunks(media_id)',
428
  'CREATE INDEX IF NOT EXISTS idx_unvectorized_media_chunks_media_id ON UnvectorizedMediaChunks(media_id)',
@@ -606,7 +585,10 @@ def mark_media_as_processed(database, media_id):
606
  # Function to add media with keywords
607
  def add_media_with_keywords(url, title, media_type, content, keywords, prompt, summary, transcription_model, author,
608
  ingestion_date):
 
 
609
  logging.debug(f"Entering add_media_with_keywords: URL={url}, Title={title}")
 
610
  # Set default values for missing fields
611
  if url is None:
612
  url = 'localhost'
@@ -622,10 +604,17 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
622
  author = author or 'Unknown'
623
  ingestion_date = ingestion_date or datetime.now().strftime('%Y-%m-%d')
624
 
625
- if media_type not in ['article', 'audio', 'document', 'mediawiki_article', 'mediawiki_dump', 'obsidian_note', 'podcast', 'text', 'video', 'unknown']:
626
- raise InputError("Invalid media type. Allowed types: article, audio file, document, obsidian_note podcast, text, video, unknown.")
 
 
 
 
627
 
628
  if ingestion_date and not is_valid_date(ingestion_date):
 
 
 
629
  raise InputError("Invalid ingestion date format. Use YYYY-MM-DD.")
630
 
631
  # Handle keywords as either string or list
@@ -654,6 +643,7 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
654
  logging.debug(f"Existing media ID for {url}: {existing_media_id}")
655
 
656
  if existing_media_id:
 
657
  media_id = existing_media_id
658
  logging.debug(f"Updating existing media with ID: {media_id}")
659
  cursor.execute('''
@@ -661,7 +651,9 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
661
  SET content = ?, transcription_model = ?, type = ?, author = ?, ingestion_date = ?
662
  WHERE id = ?
663
  ''', (content, transcription_model, media_type, author, ingestion_date, media_id))
 
664
  else:
 
665
  logging.debug("Inserting new media")
666
  cursor.execute('''
667
  INSERT INTO Media (url, title, type, content, author, ingestion_date, transcription_model)
@@ -669,6 +661,7 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
669
  ''', (url, title, media_type, content, author, ingestion_date, transcription_model))
670
  media_id = cursor.lastrowid
671
  logging.debug(f"New media inserted with ID: {media_id}")
 
672
 
673
  cursor.execute('''
674
  INSERT INTO MediaModifications (media_id, prompt, summary, modification_date)
@@ -698,13 +691,23 @@ def add_media_with_keywords(url, title, media_type, content, keywords, prompt, s
698
  conn.commit()
699
  logging.info(f"Media '{title}' successfully added/updated with ID: {media_id}")
700
 
701
- return media_id, f"Media '{title}' added/updated successfully with keywords: {', '.join(keyword_list)}"
 
 
 
 
702
 
703
  except sqlite3.Error as e:
704
  logging.error(f"SQL Error in add_media_with_keywords: {e}")
 
 
 
705
  raise DatabaseError(f"Error adding media with keywords: {e}")
706
  except Exception as e:
707
  logging.error(f"Unexpected Error in add_media_with_keywords: {e}")
 
 
 
708
  raise DatabaseError(f"Unexpected error: {e}")
709
 
710
 
@@ -779,7 +782,13 @@ def ingest_article_to_db(url, title, author, content, keywords, summary, ingesti
779
 
780
  # Function to add a keyword
781
  def add_keyword(keyword: str) -> int:
 
 
 
782
  if not keyword.strip():
 
 
 
783
  raise DatabaseError("Keyword cannot be empty")
784
 
785
  keyword = keyword.strip().lower()
@@ -801,18 +810,32 @@ def add_keyword(keyword: str) -> int:
801
 
802
  logging.info(f"Keyword '{keyword}' added or updated with ID: {keyword_id}")
803
  conn.commit()
 
 
 
 
 
804
  return keyword_id
805
  except sqlite3.IntegrityError as e:
806
  logging.error(f"Integrity error adding keyword: {e}")
 
 
 
807
  raise DatabaseError(f"Integrity error adding keyword: {e}")
808
  except sqlite3.Error as e:
809
  logging.error(f"Error adding keyword: {e}")
 
 
 
810
  raise DatabaseError(f"Error adding keyword: {e}")
811
 
812
 
813
 
814
  # Function to delete a keyword
815
  def delete_keyword(keyword: str) -> str:
 
 
 
816
  keyword = keyword.strip().lower()
817
  with db.get_connection() as conn:
818
  cursor = conn.cursor()
@@ -823,10 +846,23 @@ def delete_keyword(keyword: str) -> str:
823
  cursor.execute('DELETE FROM Keywords WHERE keyword = ?', (keyword,))
824
  cursor.execute('DELETE FROM keyword_fts WHERE rowid = ?', (keyword_id[0],))
825
  conn.commit()
 
 
 
 
 
826
  return f"Keyword '{keyword}' deleted successfully."
827
  else:
 
 
 
 
828
  return f"Keyword '{keyword}' not found."
829
  except sqlite3.Error as e:
 
 
 
 
830
  raise DatabaseError(f"Error deleting keyword: {e}")
831
 
832
 
@@ -1000,7 +1036,7 @@ def add_media_version(conn, media_id: int, prompt: str, summary: str) -> None:
1000
 
1001
 
1002
  # Function to search the database with advanced options, including keyword search and full-text search
1003
- def sqlite_search_db(search_query: str, search_fields: List[str], keywords: str, page: int = 1, results_per_page: int = 10, connection=None):
1004
  if page < 1:
1005
  raise ValueError("Page number must be 1 or greater.")
1006
 
@@ -1055,7 +1091,7 @@ def sqlite_search_db(search_query: str, search_fields: List[str], keywords: str,
1055
 
1056
  # Gradio function to handle user input and display results with pagination, with better feedback
1057
  def search_and_display(search_query, search_fields, keywords, page):
1058
- results = sqlite_search_db(search_query, search_fields, keywords, page)
1059
 
1060
  if isinstance(results, pd.DataFrame):
1061
  # Convert DataFrame to a list of tuples or lists
@@ -1133,7 +1169,7 @@ def format_results(results):
1133
  # Function to export search results to CSV or markdown with pagination
1134
  def export_to_file(search_query: str, search_fields: List[str], keyword: str, page: int = 1, results_per_file: int = 1000, export_format: str = 'csv'):
1135
  try:
1136
- results = sqlite_search_db(search_query, search_fields, keyword, page, results_per_file)
1137
  if not results:
1138
  return "No results found to export."
1139
 
@@ -1381,303 +1417,6 @@ def schedule_chunking(media_id: int, content: str, media_name: str):
1381
  #######################################################################################################################
1382
 
1383
 
1384
- #######################################################################################################################
1385
- #
1386
- # Functions to manage prompts DB
1387
-
1388
- def create_prompts_db():
1389
- logging.debug("create_prompts_db: Creating prompts database.")
1390
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
1391
- cursor = conn.cursor()
1392
- cursor.executescript('''
1393
- CREATE TABLE IF NOT EXISTS Prompts (
1394
- id INTEGER PRIMARY KEY AUTOINCREMENT,
1395
- name TEXT NOT NULL UNIQUE,
1396
- author TEXT,
1397
- details TEXT,
1398
- system TEXT,
1399
- user TEXT
1400
- );
1401
- CREATE TABLE IF NOT EXISTS Keywords (
1402
- id INTEGER PRIMARY KEY AUTOINCREMENT,
1403
- keyword TEXT NOT NULL UNIQUE COLLATE NOCASE
1404
- );
1405
- CREATE TABLE IF NOT EXISTS PromptKeywords (
1406
- prompt_id INTEGER,
1407
- keyword_id INTEGER,
1408
- FOREIGN KEY (prompt_id) REFERENCES Prompts (id),
1409
- FOREIGN KEY (keyword_id) REFERENCES Keywords (id),
1410
- PRIMARY KEY (prompt_id, keyword_id)
1411
- );
1412
- CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON Keywords(keyword);
1413
- CREATE INDEX IF NOT EXISTS idx_promptkeywords_prompt_id ON PromptKeywords(prompt_id);
1414
- CREATE INDEX IF NOT EXISTS idx_promptkeywords_keyword_id ON PromptKeywords(keyword_id);
1415
- ''')
1416
-
1417
- # FIXME - dirty hack that should be removed later...
1418
- # Migration function to add the 'author' column to the Prompts table
1419
- def add_author_column_to_prompts():
1420
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
1421
- cursor = conn.cursor()
1422
- # Check if 'author' column already exists
1423
- cursor.execute("PRAGMA table_info(Prompts)")
1424
- columns = [col[1] for col in cursor.fetchall()]
1425
-
1426
- if 'author' not in columns:
1427
- # Add the 'author' column
1428
- cursor.execute('ALTER TABLE Prompts ADD COLUMN author TEXT')
1429
- print("Author column added to Prompts table.")
1430
- else:
1431
- print("Author column already exists in Prompts table.")
1432
-
1433
- add_author_column_to_prompts()
1434
-
1435
- def normalize_keyword(keyword):
1436
- return re.sub(r'\s+', ' ', keyword.strip().lower())
1437
-
1438
-
1439
- # FIXME - update calls to this function to use the new args
1440
- def add_prompt(name, author, details, system=None, user=None, keywords=None):
1441
- logging.debug(f"add_prompt: Adding prompt with name: {name}, author: {author}, system: {system}, user: {user}, keywords: {keywords}")
1442
- if not name:
1443
- logging.error("add_prompt: A name is required.")
1444
- return "A name is required."
1445
-
1446
- try:
1447
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
1448
- cursor = conn.cursor()
1449
- cursor.execute('''
1450
- INSERT INTO Prompts (name, author, details, system, user)
1451
- VALUES (?, ?, ?, ?, ?)
1452
- ''', (name, author, details, system, user))
1453
- prompt_id = cursor.lastrowid
1454
-
1455
- if keywords:
1456
- normalized_keywords = [normalize_keyword(k) for k in keywords if k.strip()]
1457
- for keyword in set(normalized_keywords): # Use set to remove duplicates
1458
- cursor.execute('''
1459
- INSERT OR IGNORE INTO Keywords (keyword) VALUES (?)
1460
- ''', (keyword,))
1461
- cursor.execute('SELECT id FROM Keywords WHERE keyword = ?', (keyword,))
1462
- keyword_id = cursor.fetchone()[0]
1463
- cursor.execute('''
1464
- INSERT OR IGNORE INTO PromptKeywords (prompt_id, keyword_id) VALUES (?, ?)
1465
- ''', (prompt_id, keyword_id))
1466
- return "Prompt added successfully."
1467
- except sqlite3.IntegrityError:
1468
- return "Prompt with this name already exists."
1469
- except sqlite3.Error as e:
1470
- return f"Database error: {e}"
1471
-
1472
-
1473
- def fetch_prompt_details(name):
1474
- logging.debug(f"fetch_prompt_details: Fetching details for prompt: {name}")
1475
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
1476
- cursor = conn.cursor()
1477
- cursor.execute('''
1478
- SELECT p.name, p.author, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
1479
- FROM Prompts p
1480
- LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
1481
- LEFT JOIN Keywords k ON pk.keyword_id = k.id
1482
- WHERE p.name = ?
1483
- GROUP BY p.id
1484
- ''', (name,))
1485
- return cursor.fetchone()
1486
-
1487
-
1488
- def list_prompts(page=1, per_page=10):
1489
- logging.debug(f"list_prompts: Listing prompts for page {page} with {per_page} prompts per page.")
1490
- offset = (page - 1) * per_page
1491
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
1492
- cursor = conn.cursor()
1493
- cursor.execute('SELECT name FROM Prompts LIMIT ? OFFSET ?', (per_page, offset))
1494
- prompts = [row[0] for row in cursor.fetchall()]
1495
-
1496
- # Get total count of prompts
1497
- cursor.execute('SELECT COUNT(*) FROM Prompts')
1498
- total_count = cursor.fetchone()[0]
1499
-
1500
- total_pages = (total_count + per_page - 1) // per_page
1501
- return prompts, total_pages, page
1502
-
1503
- # This will not scale. For a large number of prompts, use a more efficient method.
1504
- # FIXME - see above statement.
1505
- def load_preset_prompts():
1506
- logging.debug("load_preset_prompts: Loading preset prompts.")
1507
- try:
1508
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
1509
- cursor = conn.cursor()
1510
- cursor.execute('SELECT name FROM Prompts ORDER BY name ASC')
1511
- prompts = [row[0] for row in cursor.fetchall()]
1512
- return prompts
1513
- except sqlite3.Error as e:
1514
- print(f"Database error: {e}")
1515
- return []
1516
-
1517
-
1518
- def insert_prompt_to_db(title, author, description, system_prompt, user_prompt, keywords=None):
1519
- return add_prompt(title, author, description, system_prompt, user_prompt, keywords)
1520
-
1521
-
1522
- def get_prompt_db_connection():
1523
- prompt_db_path = get_database_path('prompts.db')
1524
- return sqlite3.connect(prompt_db_path)
1525
-
1526
-
1527
- def search_prompts(query):
1528
- logging.debug(f"search_prompts: Searching prompts with query: {query}")
1529
- try:
1530
- with get_prompt_db_connection() as conn:
1531
- cursor = conn.cursor()
1532
- cursor.execute("""
1533
- SELECT p.name, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
1534
- FROM Prompts p
1535
- LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
1536
- LEFT JOIN Keywords k ON pk.keyword_id = k.id
1537
- WHERE p.name LIKE ? OR p.details LIKE ? OR p.system LIKE ? OR p.user LIKE ? OR k.keyword LIKE ?
1538
- GROUP BY p.id
1539
- ORDER BY p.name
1540
- """, (f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%'))
1541
- return cursor.fetchall()
1542
- except sqlite3.Error as e:
1543
- logging.error(f"Error searching prompts: {e}")
1544
- return []
1545
-
1546
-
1547
- def search_prompts_by_keyword(keyword, page=1, per_page=10):
1548
- logging.debug(f"search_prompts_by_keyword: Searching prompts by keyword: {keyword}")
1549
- normalized_keyword = normalize_keyword(keyword)
1550
- offset = (page - 1) * per_page
1551
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
1552
- cursor = conn.cursor()
1553
- cursor.execute('''
1554
- SELECT DISTINCT p.name
1555
- FROM Prompts p
1556
- JOIN PromptKeywords pk ON p.id = pk.prompt_id
1557
- JOIN Keywords k ON pk.keyword_id = k.id
1558
- WHERE k.keyword LIKE ?
1559
- LIMIT ? OFFSET ?
1560
- ''', ('%' + normalized_keyword + '%', per_page, offset))
1561
- prompts = [row[0] for row in cursor.fetchall()]
1562
-
1563
- # Get total count of matching prompts
1564
- cursor.execute('''
1565
- SELECT COUNT(DISTINCT p.id)
1566
- FROM Prompts p
1567
- JOIN PromptKeywords pk ON p.id = pk.prompt_id
1568
- JOIN Keywords k ON pk.keyword_id = k.id
1569
- WHERE k.keyword LIKE ?
1570
- ''', ('%' + normalized_keyword + '%',))
1571
- total_count = cursor.fetchone()[0]
1572
-
1573
- total_pages = (total_count + per_page - 1) // per_page
1574
- return prompts, total_pages, page
1575
-
1576
-
1577
- def update_prompt_keywords(prompt_name, new_keywords):
1578
- logging.debug(f"update_prompt_keywords: Updating keywords for prompt: {prompt_name}")
1579
- try:
1580
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
1581
- cursor = conn.cursor()
1582
-
1583
- cursor.execute('SELECT id FROM Prompts WHERE name = ?', (prompt_name,))
1584
- prompt_id = cursor.fetchone()
1585
- if not prompt_id:
1586
- return "Prompt not found."
1587
- prompt_id = prompt_id[0]
1588
-
1589
- cursor.execute('DELETE FROM PromptKeywords WHERE prompt_id = ?', (prompt_id,))
1590
-
1591
- normalized_keywords = [normalize_keyword(k) for k in new_keywords if k.strip()]
1592
- for keyword in set(normalized_keywords): # Use set to remove duplicates
1593
- cursor.execute('INSERT OR IGNORE INTO Keywords (keyword) VALUES (?)', (keyword,))
1594
- cursor.execute('SELECT id FROM Keywords WHERE keyword = ?', (keyword,))
1595
- keyword_id = cursor.fetchone()[0]
1596
- cursor.execute('INSERT INTO PromptKeywords (prompt_id, keyword_id) VALUES (?, ?)',
1597
- (prompt_id, keyword_id))
1598
-
1599
- # Remove unused keywords
1600
- cursor.execute('''
1601
- DELETE FROM Keywords
1602
- WHERE id NOT IN (SELECT DISTINCT keyword_id FROM PromptKeywords)
1603
- ''')
1604
- return "Keywords updated successfully."
1605
- except sqlite3.Error as e:
1606
- return f"Database error: {e}"
1607
-
1608
-
1609
- def add_or_update_prompt(title, author, description, system_prompt, user_prompt, keywords=None):
1610
- logging.debug(f"add_or_update_prompt: Adding or updating prompt: {title}")
1611
- if not title:
1612
- return "Error: Title is required."
1613
-
1614
- existing_prompt = fetch_prompt_details(title)
1615
- if existing_prompt:
1616
- # Update existing prompt
1617
- result = update_prompt_in_db(title, author, description, system_prompt, user_prompt)
1618
- if "successfully" in result:
1619
- # Update keywords if the prompt update was successful
1620
- keyword_result = update_prompt_keywords(title, keywords or [])
1621
- result += f" {keyword_result}"
1622
- else:
1623
- # Insert new prompt
1624
- result = insert_prompt_to_db(title, author, description, system_prompt, user_prompt, keywords)
1625
-
1626
- return result
1627
-
1628
-
1629
- def load_prompt_details(selected_prompt):
1630
- logging.debug(f"load_prompt_details: Loading prompt details for {selected_prompt}")
1631
- if selected_prompt:
1632
- details = fetch_prompt_details(selected_prompt)
1633
- if details:
1634
- return details[0], details[1], details[2], details[3], details[4], details[5]
1635
- return "", "", "", "", "", ""
1636
-
1637
-
1638
- def update_prompt_in_db(title, author, description, system_prompt, user_prompt):
1639
- logging.debug(f"update_prompt_in_db: Updating prompt: {title}")
1640
- try:
1641
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
1642
- cursor = conn.cursor()
1643
- cursor.execute(
1644
- "UPDATE Prompts SET author = ?, details = ?, system = ?, user = ? WHERE name = ?",
1645
- (author, description, system_prompt, user_prompt, title)
1646
- )
1647
- if cursor.rowcount == 0:
1648
- return "No prompt found with the given title."
1649
- return "Prompt updated successfully!"
1650
- except sqlite3.Error as e:
1651
- return f"Error updating prompt: {e}"
1652
-
1653
-
1654
- create_prompts_db()
1655
-
1656
- def delete_prompt(prompt_id):
1657
- logging.debug(f"delete_prompt: Deleting prompt with ID: {prompt_id}")
1658
- try:
1659
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
1660
- cursor = conn.cursor()
1661
-
1662
- # Delete associated keywords
1663
- cursor.execute("DELETE FROM PromptKeywords WHERE prompt_id = ?", (prompt_id,))
1664
-
1665
- # Delete the prompt
1666
- cursor.execute("DELETE FROM Prompts WHERE id = ?", (prompt_id,))
1667
-
1668
- if cursor.rowcount == 0:
1669
- return f"No prompt found with ID {prompt_id}"
1670
- else:
1671
- conn.commit()
1672
- return f"Prompt with ID {prompt_id} has been successfully deleted"
1673
- except sqlite3.Error as e:
1674
- return f"An error occurred: {e}"
1675
-
1676
- #
1677
- #
1678
- #######################################################################################################################
1679
-
1680
-
1681
  #######################################################################################################################
1682
  #
1683
  # Function to fetch/update media content
@@ -2020,204 +1759,6 @@ def import_obsidian_note_to_db(note_data):
2020
  #######################################################################################################################
2021
 
2022
 
2023
- #######################################################################################################################
2024
- #
2025
- # Chat-related Functions
2026
-
2027
-
2028
-
2029
- def create_chat_conversation(media_id, conversation_name):
2030
- try:
2031
- with db.get_connection() as conn:
2032
- cursor = conn.cursor()
2033
- cursor.execute('''
2034
- INSERT INTO ChatConversations (media_id, conversation_name, created_at, updated_at)
2035
- VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
2036
- ''', (media_id, conversation_name))
2037
- conn.commit()
2038
- return cursor.lastrowid
2039
- except sqlite3.Error as e:
2040
- logging.error(f"Error creating chat conversation: {e}")
2041
- raise DatabaseError(f"Error creating chat conversation: {e}")
2042
-
2043
-
2044
- def add_chat_message(conversation_id: int, sender: str, message: str) -> int:
2045
- try:
2046
- with db.get_connection() as conn:
2047
- cursor = conn.cursor()
2048
- cursor.execute('''
2049
- INSERT INTO ChatMessages (conversation_id, sender, message)
2050
- VALUES (?, ?, ?)
2051
- ''', (conversation_id, sender, message))
2052
- conn.commit()
2053
- return cursor.lastrowid
2054
- except sqlite3.Error as e:
2055
- logging.error(f"Error adding chat message: {e}")
2056
- raise DatabaseError(f"Error adding chat message: {e}")
2057
-
2058
-
2059
- def get_chat_messages(conversation_id: int) -> List[Dict[str, Any]]:
2060
- try:
2061
- with db.get_connection() as conn:
2062
- cursor = conn.cursor()
2063
- cursor.execute('''
2064
- SELECT id, sender, message, timestamp
2065
- FROM ChatMessages
2066
- WHERE conversation_id = ?
2067
- ORDER BY timestamp ASC
2068
- ''', (conversation_id,))
2069
- messages = cursor.fetchall()
2070
- return [
2071
- {
2072
- 'id': msg[0],
2073
- 'sender': msg[1],
2074
- 'message': msg[2],
2075
- 'timestamp': msg[3]
2076
- }
2077
- for msg in messages
2078
- ]
2079
- except sqlite3.Error as e:
2080
- logging.error(f"Error retrieving chat messages: {e}")
2081
- raise DatabaseError(f"Error retrieving chat messages: {e}")
2082
-
2083
-
2084
- def search_chat_conversations(search_query: str) -> List[Dict[str, Any]]:
2085
- try:
2086
- with db.get_connection() as conn:
2087
- cursor = conn.cursor()
2088
- cursor.execute('''
2089
- SELECT cc.id, cc.media_id, cc.conversation_name, cc.created_at, m.title as media_title
2090
- FROM ChatConversations cc
2091
- LEFT JOIN Media m ON cc.media_id = m.id
2092
- WHERE cc.conversation_name LIKE ? OR m.title LIKE ?
2093
- ORDER BY cc.updated_at DESC
2094
- ''', (f'%{search_query}%', f'%{search_query}%'))
2095
- conversations = cursor.fetchall()
2096
- return [
2097
- {
2098
- 'id': conv[0],
2099
- 'media_id': conv[1],
2100
- 'conversation_name': conv[2],
2101
- 'created_at': conv[3],
2102
- 'media_title': conv[4] or "Unknown Media"
2103
- }
2104
- for conv in conversations
2105
- ]
2106
- except sqlite3.Error as e:
2107
- logging.error(f"Error searching chat conversations: {e}")
2108
- return []
2109
-
2110
-
2111
- def update_chat_message(message_id: int, new_message: str) -> None:
2112
- try:
2113
- with db.get_connection() as conn:
2114
- cursor = conn.cursor()
2115
- cursor.execute('''
2116
- UPDATE ChatMessages
2117
- SET message = ?, timestamp = CURRENT_TIMESTAMP
2118
- WHERE id = ?
2119
- ''', (new_message, message_id))
2120
- conn.commit()
2121
- except sqlite3.Error as e:
2122
- logging.error(f"Error updating chat message: {e}")
2123
- raise DatabaseError(f"Error updating chat message: {e}")
2124
-
2125
-
2126
- def delete_chat_message(message_id: int) -> None:
2127
- try:
2128
- with db.get_connection() as conn:
2129
- cursor = conn.cursor()
2130
- cursor.execute('DELETE FROM ChatMessages WHERE id = ?', (message_id,))
2131
- conn.commit()
2132
- except sqlite3.Error as e:
2133
- logging.error(f"Error deleting chat message: {e}")
2134
- raise DatabaseError(f"Error deleting chat message: {e}")
2135
-
2136
-
2137
- def save_chat_history_to_database(chatbot, conversation_id, media_id, media_name, conversation_name):
2138
- try:
2139
- with db.get_connection() as conn:
2140
- cursor = conn.cursor()
2141
-
2142
- # If conversation_id is None, create a new conversation
2143
- if conversation_id is None:
2144
- cursor.execute('''
2145
- INSERT INTO ChatConversations (media_id, media_name, conversation_name, created_at, updated_at)
2146
- VALUES (?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
2147
- ''', (media_id, media_name, conversation_name))
2148
- conversation_id = cursor.lastrowid
2149
- else:
2150
- # If conversation exists, update the media_name
2151
- cursor.execute('''
2152
- UPDATE ChatConversations
2153
- SET media_name = ?, updated_at = CURRENT_TIMESTAMP
2154
- WHERE id = ?
2155
- ''', (media_name, conversation_id))
2156
-
2157
- # Save each message in the chatbot history
2158
- for i, (user_msg, ai_msg) in enumerate(chatbot):
2159
- cursor.execute('''
2160
- INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
2161
- VALUES (?, ?, ?, CURRENT_TIMESTAMP)
2162
- ''', (conversation_id, 'user', user_msg))
2163
-
2164
- cursor.execute('''
2165
- INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
2166
- VALUES (?, ?, ?, CURRENT_TIMESTAMP)
2167
- ''', (conversation_id, 'ai', ai_msg))
2168
-
2169
- # Update the conversation's updated_at timestamp
2170
- cursor.execute('''
2171
- UPDATE ChatConversations
2172
- SET updated_at = CURRENT_TIMESTAMP
2173
- WHERE id = ?
2174
- ''', (conversation_id,))
2175
-
2176
- conn.commit()
2177
-
2178
- return conversation_id
2179
- except Exception as e:
2180
- logging.error(f"Error saving chat history to database: {str(e)}")
2181
- raise
2182
-
2183
-
2184
- def get_conversation_name(conversation_id):
2185
- if conversation_id is None:
2186
- return None
2187
-
2188
- try:
2189
- with sqlite3.connect('media_summary.db') as conn: # Replace with your actual database name
2190
- cursor = conn.cursor()
2191
-
2192
- query = """
2193
- SELECT conversation_name, media_name
2194
- FROM ChatConversations
2195
- WHERE id = ?
2196
- """
2197
-
2198
- cursor.execute(query, (conversation_id,))
2199
- result = cursor.fetchone()
2200
-
2201
- if result:
2202
- conversation_name, media_name = result
2203
- if conversation_name:
2204
- return conversation_name
2205
- elif media_name:
2206
- return f"{media_name}-chat"
2207
-
2208
- return None # Return None if no result found
2209
- except sqlite3.Error as e:
2210
- logging.error(f"Database error in get_conversation_name: {e}")
2211
- return None
2212
- except Exception as e:
2213
- logging.error(f"Unexpected error in get_conversation_name: {e}")
2214
- return None
2215
-
2216
- #
2217
- # End of Chat-related Functions
2218
- #######################################################################################################################
2219
-
2220
-
2221
  #######################################################################################################################
2222
  #
2223
  # Functions to Compare Transcripts
@@ -2837,29 +2378,42 @@ def process_chunks(database, chunks: List[Dict], media_id: int, batch_size: int
2837
  :param media_id: ID of the media these chunks belong to
2838
  :param batch_size: Number of chunks to process in each batch
2839
  """
 
 
2840
  total_chunks = len(chunks)
2841
  processed_chunks = 0
2842
 
2843
- for i in range(0, total_chunks, batch_size):
2844
- batch = chunks[i:i + batch_size]
2845
- chunk_data = [
2846
- (media_id, chunk['text'], chunk['start_index'], chunk['end_index'])
2847
- for chunk in batch
2848
- ]
2849
-
2850
- try:
2851
- database.execute_many(
2852
- "INSERT INTO MediaChunks (media_id, chunk_text, start_index, end_index) VALUES (?, ?, ?, ?)",
2853
- chunk_data
2854
- )
2855
- processed_chunks += len(batch)
2856
- logging.info(f"Processed {processed_chunks}/{total_chunks} chunks for media_id {media_id}")
2857
- except Exception as e:
2858
- logging.error(f"Error inserting chunk batch for media_id {media_id}: {e}")
2859
- # Optionally, you could raise an exception here to stop processing
2860
- # raise
2861
 
2862
- logging.info(f"Finished processing all {total_chunks} chunks for media_id {media_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2863
 
2864
 
2865
  # Usage example:
@@ -2995,46 +2549,48 @@ def update_media_table(db):
2995
  #
2996
  # Workflow Functions
2997
 
 
2998
  def save_workflow_chat_to_db(chat_history, workflow_name, conversation_id=None):
2999
- try:
3000
- with db.get_connection() as conn:
3001
- cursor = conn.cursor()
3002
-
3003
- if conversation_id is None:
3004
- # Create a new conversation
3005
- conversation_name = f"{workflow_name}_Workflow_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
3006
- cursor.execute('''
3007
- INSERT INTO ChatConversations (media_id, media_name, conversation_name, created_at, updated_at)
3008
- VALUES (NULL, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
3009
- ''', (workflow_name, conversation_name))
3010
- conversation_id = cursor.lastrowid
3011
- else:
3012
- # Update existing conversation
3013
- cursor.execute('''
3014
- UPDATE ChatConversations
3015
- SET updated_at = CURRENT_TIMESTAMP
3016
- WHERE id = ?
3017
- ''', (conversation_id,))
3018
-
3019
- # Save messages
3020
- for user_msg, ai_msg in chat_history:
3021
- if user_msg:
3022
- cursor.execute('''
3023
- INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
3024
- VALUES (?, 'user', ?, CURRENT_TIMESTAMP)
3025
- ''', (conversation_id, user_msg))
3026
- if ai_msg:
3027
- cursor.execute('''
3028
- INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
3029
- VALUES (?, 'ai', ?, CURRENT_TIMESTAMP)
3030
- ''', (conversation_id, ai_msg))
3031
-
3032
- conn.commit()
3033
-
3034
- return conversation_id, f"Chat saved successfully! Conversation ID: {conversation_id}"
3035
- except Exception as e:
3036
- logging.error(f"Error saving workflow chat to database: {str(e)}")
3037
- return None, f"Error saving chat to database: {str(e)}"
 
3038
 
3039
 
3040
  def get_workflow_chat(conversation_id):
 
21
  # 11. browse_items(search_query, search_type)
22
  # 12. fetch_item_details(media_id: int)
23
  # 13. add_media_version(media_id: int, prompt: str, summary: str)
24
+ # 14. search_media_db(search_query: str, search_fields: List[str], keywords: str, page: int = 1, results_per_page: int = 10)
25
  # 15. search_and_display(search_query, search_fields, keywords, page)
26
  # 16. display_details(index, results)
27
  # 17. get_details(index, dataframe)
 
55
  import shutil
56
  import sqlite3
57
  import threading
58
+ import time
59
  import traceback
60
  from contextlib import contextmanager
61
  from datetime import datetime, timedelta
62
  from typing import List, Tuple, Dict, Any, Optional
63
  from urllib.parse import quote
64
 
65
+ from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
66
  # Local Libraries
67
  from App_Function_Libraries.Utils.Utils import get_project_relative_path, get_database_path, \
68
  get_database_dir
 
344
  )
345
  ''',
346
  '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  CREATE TABLE IF NOT EXISTS Transcripts (
348
  id INTEGER PRIMARY KEY AUTOINCREMENT,
349
  media_id INTEGER,
 
402
  'CREATE INDEX IF NOT EXISTS idx_mediakeywords_keyword_id ON MediaKeywords(keyword_id)',
403
  'CREATE INDEX IF NOT EXISTS idx_media_version_media_id ON MediaVersion(media_id)',
404
  'CREATE INDEX IF NOT EXISTS idx_mediamodifications_media_id ON MediaModifications(media_id)',
 
 
405
  'CREATE INDEX IF NOT EXISTS idx_media_is_trash ON Media(is_trash)',
406
  'CREATE INDEX IF NOT EXISTS idx_mediachunks_media_id ON MediaChunks(media_id)',
407
  'CREATE INDEX IF NOT EXISTS idx_unvectorized_media_chunks_media_id ON UnvectorizedMediaChunks(media_id)',
 
585
  # Function to add media with keywords
586
  def add_media_with_keywords(url, title, media_type, content, keywords, prompt, summary, transcription_model, author,
587
  ingestion_date):
588
+ log_counter("add_media_with_keywords_attempt")
589
+ start_time = time.time()
590
  logging.debug(f"Entering add_media_with_keywords: URL={url}, Title={title}")
591
+
592
  # Set default values for missing fields
593
  if url is None:
594
  url = 'localhost'
 
604
  author = author or 'Unknown'
605
  ingestion_date = ingestion_date or datetime.now().strftime('%Y-%m-%d')
606
 
607
+ if media_type not in ['article', 'audio', 'book', 'document', 'mediawiki_article', 'mediawiki_dump',
608
+ 'obsidian_note', 'podcast', 'text', 'video', 'unknown']:
609
+ log_counter("add_media_with_keywords_error", labels={"error_type": "InvalidMediaType"})
610
+ duration = time.time() - start_time
611
+ log_histogram("add_media_with_keywords_duration", duration)
612
+ raise InputError("Invalid media type. Allowed types: article, audio file, document, obsidian_note, podcast, text, video, unknown.")
613
 
614
  if ingestion_date and not is_valid_date(ingestion_date):
615
+ log_counter("add_media_with_keywords_error", labels={"error_type": "InvalidDateFormat"})
616
+ duration = time.time() - start_time
617
+ log_histogram("add_media_with_keywords_duration", duration)
618
  raise InputError("Invalid ingestion date format. Use YYYY-MM-DD.")
619
 
620
  # Handle keywords as either string or list
 
643
  logging.debug(f"Existing media ID for {url}: {existing_media_id}")
644
 
645
  if existing_media_id:
646
+ # Update existing media
647
  media_id = existing_media_id
648
  logging.debug(f"Updating existing media with ID: {media_id}")
649
  cursor.execute('''
 
651
  SET content = ?, transcription_model = ?, type = ?, author = ?, ingestion_date = ?
652
  WHERE id = ?
653
  ''', (content, transcription_model, media_type, author, ingestion_date, media_id))
654
+ log_counter("add_media_with_keywords_update")
655
  else:
656
+ # Insert new media
657
  logging.debug("Inserting new media")
658
  cursor.execute('''
659
  INSERT INTO Media (url, title, type, content, author, ingestion_date, transcription_model)
 
661
  ''', (url, title, media_type, content, author, ingestion_date, transcription_model))
662
  media_id = cursor.lastrowid
663
  logging.debug(f"New media inserted with ID: {media_id}")
664
+ log_counter("add_media_with_keywords_insert")
665
 
666
  cursor.execute('''
667
  INSERT INTO MediaModifications (media_id, prompt, summary, modification_date)
 
691
  conn.commit()
692
  logging.info(f"Media '{title}' successfully added/updated with ID: {media_id}")
693
 
694
+ duration = time.time() - start_time
695
+ log_histogram("add_media_with_keywords_duration", duration)
696
+ log_counter("add_media_with_keywords_success")
697
+
698
+ return media_id, f"Media '{title}' added/updated successfully with keywords: {', '.join(keyword_list)}"
699
 
700
  except sqlite3.Error as e:
701
  logging.error(f"SQL Error in add_media_with_keywords: {e}")
702
+ duration = time.time() - start_time
703
+ log_histogram("add_media_with_keywords_duration", duration)
704
+ log_counter("add_media_with_keywords_error", labels={"error_type": "SQLiteError"})
705
  raise DatabaseError(f"Error adding media with keywords: {e}")
706
  except Exception as e:
707
  logging.error(f"Unexpected Error in add_media_with_keywords: {e}")
708
+ duration = time.time() - start_time
709
+ log_histogram("add_media_with_keywords_duration", duration)
710
+ log_counter("add_media_with_keywords_error", labels={"error_type": type(e).__name__})
711
  raise DatabaseError(f"Unexpected error: {e}")
712
 
713
 
 
782
 
783
  # Function to add a keyword
784
  def add_keyword(keyword: str) -> int:
785
+ log_counter("add_keyword_attempt")
786
+ start_time = time.time()
787
+
788
  if not keyword.strip():
789
+ log_counter("add_keyword_error", labels={"error_type": "EmptyKeyword"})
790
+ duration = time.time() - start_time
791
+ log_histogram("add_keyword_duration", duration)
792
  raise DatabaseError("Keyword cannot be empty")
793
 
794
  keyword = keyword.strip().lower()
 
810
 
811
  logging.info(f"Keyword '{keyword}' added or updated with ID: {keyword_id}")
812
  conn.commit()
813
+
814
+ duration = time.time() - start_time
815
+ log_histogram("add_keyword_duration", duration)
816
+ log_counter("add_keyword_success")
817
+
818
  return keyword_id
819
  except sqlite3.IntegrityError as e:
820
  logging.error(f"Integrity error adding keyword: {e}")
821
+ duration = time.time() - start_time
822
+ log_histogram("add_keyword_duration", duration)
823
+ log_counter("add_keyword_error", labels={"error_type": "IntegrityError"})
824
  raise DatabaseError(f"Integrity error adding keyword: {e}")
825
  except sqlite3.Error as e:
826
  logging.error(f"Error adding keyword: {e}")
827
+ duration = time.time() - start_time
828
+ log_histogram("add_keyword_duration", duration)
829
+ log_counter("add_keyword_error", labels={"error_type": "SQLiteError"})
830
  raise DatabaseError(f"Error adding keyword: {e}")
831
 
832
 
833
 
834
  # Function to delete a keyword
835
  def delete_keyword(keyword: str) -> str:
836
+ log_counter("delete_keyword_attempt")
837
+ start_time = time.time()
838
+
839
  keyword = keyword.strip().lower()
840
  with db.get_connection() as conn:
841
  cursor = conn.cursor()
 
846
  cursor.execute('DELETE FROM Keywords WHERE keyword = ?', (keyword,))
847
  cursor.execute('DELETE FROM keyword_fts WHERE rowid = ?', (keyword_id[0],))
848
  conn.commit()
849
+
850
+ duration = time.time() - start_time
851
+ log_histogram("delete_keyword_duration", duration)
852
+ log_counter("delete_keyword_success")
853
+
854
  return f"Keyword '{keyword}' deleted successfully."
855
  else:
856
+ duration = time.time() - start_time
857
+ log_histogram("delete_keyword_duration", duration)
858
+ log_counter("delete_keyword_not_found")
859
+
860
  return f"Keyword '{keyword}' not found."
861
  except sqlite3.Error as e:
862
+ duration = time.time() - start_time
863
+ log_histogram("delete_keyword_duration", duration)
864
+ log_counter("delete_keyword_error", labels={"error_type": type(e).__name__})
865
+ logging.error(f"Error deleting keyword: {e}")
866
  raise DatabaseError(f"Error deleting keyword: {e}")
867
 
868
 
 
1036
 
1037
 
1038
  # Function to search the database with advanced options, including keyword search and full-text search
1039
+ def search_media_db(search_query: str, search_fields: List[str], keywords: str, page: int = 1, results_per_page: int = 20, connection=None):
1040
  if page < 1:
1041
  raise ValueError("Page number must be 1 or greater.")
1042
 
 
1091
 
1092
  # Gradio function to handle user input and display results with pagination, with better feedback
1093
  def search_and_display(search_query, search_fields, keywords, page):
1094
+ results = search_media_db(search_query, search_fields, keywords, page)
1095
 
1096
  if isinstance(results, pd.DataFrame):
1097
  # Convert DataFrame to a list of tuples or lists
 
1169
  # Function to export search results to CSV or markdown with pagination
1170
  def export_to_file(search_query: str, search_fields: List[str], keyword: str, page: int = 1, results_per_file: int = 1000, export_format: str = 'csv'):
1171
  try:
1172
+ results = search_media_db(search_query, search_fields, keyword, page, results_per_file)
1173
  if not results:
1174
  return "No results found to export."
1175
 
 
1417
  #######################################################################################################################
1418
 
1419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1420
  #######################################################################################################################
1421
  #
1422
  # Function to fetch/update media content
 
1759
  #######################################################################################################################
1760
 
1761
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1762
  #######################################################################################################################
1763
  #
1764
  # Functions to Compare Transcripts
 
2378
  :param media_id: ID of the media these chunks belong to
2379
  :param batch_size: Number of chunks to process in each batch
2380
  """
2381
+ log_counter("process_chunks_attempt", labels={"media_id": media_id})
2382
+ start_time = time.time()
2383
  total_chunks = len(chunks)
2384
  processed_chunks = 0
2385
 
2386
+ try:
2387
+ for i in range(0, total_chunks, batch_size):
2388
+ batch = chunks[i:i + batch_size]
2389
+ chunk_data = [
2390
+ (media_id, chunk['text'], chunk['start_index'], chunk['end_index'])
2391
+ for chunk in batch
2392
+ ]
 
 
 
 
 
 
 
 
 
 
 
2393
 
2394
+ try:
2395
+ database.execute_many(
2396
+ "INSERT INTO MediaChunks (media_id, chunk_text, start_index, end_index) VALUES (?, ?, ?, ?)",
2397
+ chunk_data
2398
+ )
2399
+ processed_chunks += len(batch)
2400
+ logging.info(f"Processed {processed_chunks}/{total_chunks} chunks for media_id {media_id}")
2401
+ log_counter("process_chunks_batch_success", labels={"media_id": media_id})
2402
+ except Exception as e:
2403
+ logging.error(f"Error inserting chunk batch for media_id {media_id}: {e}")
2404
+ log_counter("process_chunks_batch_error", labels={"media_id": media_id, "error_type": type(e).__name__})
2405
+ # Optionally, you could raise an exception here to stop processing
2406
+ # raise
2407
+
2408
+ logging.info(f"Finished processing all {total_chunks} chunks for media_id {media_id}")
2409
+ duration = time.time() - start_time
2410
+ log_histogram("process_chunks_duration", duration, labels={"media_id": media_id})
2411
+ log_counter("process_chunks_success", labels={"media_id": media_id})
2412
+ except Exception as e:
2413
+ duration = time.time() - start_time
2414
+ log_histogram("process_chunks_duration", duration, labels={"media_id": media_id})
2415
+ log_counter("process_chunks_error", labels={"media_id": media_id, "error_type": type(e).__name__})
2416
+ logging.error(f"Error processing chunks for media_id {media_id}: {e}")
2417
 
2418
 
2419
  # Usage example:
 
2549
  #
2550
  # Workflow Functions
2551
 
2552
+ # Workflow Functions
2553
  def save_workflow_chat_to_db(chat_history, workflow_name, conversation_id=None):
2554
+ pass
2555
+ # try:
2556
+ # with db.get_connection() as conn:
2557
+ # cursor = conn.cursor()
2558
+ #
2559
+ # if conversation_id is None:
2560
+ # # Create a new conversation
2561
+ # conversation_name = f"{workflow_name}_Workflow_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
2562
+ # cursor.execute('''
2563
+ # INSERT INTO ChatConversations (media_id, media_name, conversation_name, created_at, updated_at)
2564
+ # VALUES (NULL, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
2565
+ # ''', (workflow_name, conversation_name))
2566
+ # conversation_id = cursor.lastrowid
2567
+ # else:
2568
+ # # Update existing conversation
2569
+ # cursor.execute('''
2570
+ # UPDATE ChatConversations
2571
+ # SET updated_at = CURRENT_TIMESTAMP
2572
+ # WHERE id = ?
2573
+ # ''', (conversation_id,))
2574
+ #
2575
+ # # Save messages
2576
+ # for user_msg, ai_msg in chat_history:
2577
+ # if user_msg:
2578
+ # cursor.execute('''
2579
+ # INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
2580
+ # VALUES (?, 'user', ?, CURRENT_TIMESTAMP)
2581
+ # ''', (conversation_id, user_msg))
2582
+ # if ai_msg:
2583
+ # cursor.execute('''
2584
+ # INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
2585
+ # VALUES (?, 'ai', ?, CURRENT_TIMESTAMP)
2586
+ # ''', (conversation_id, ai_msg))
2587
+ #
2588
+ # conn.commit()
2589
+ #
2590
+ # return conversation_id, f"Chat saved successfully! Conversation ID: {conversation_id}"
2591
+ # except Exception as e:
2592
+ # logging.error(f"Error saving workflow chat to database: {str(e)}")
2593
+ # return None, f"Error saving chat to database: {str(e)}"
2594
 
2595
 
2596
  def get_workflow_chat(conversation_id):
App_Function_Libraries/Gradio_Related.py CHANGED
@@ -1,420 +1,600 @@
1
- # Gradio_Related.py
2
- #########################################
3
- # Gradio UI Functions Library
4
- # I fucking hate Gradio.
5
- #
6
- #########################################
7
- #
8
- # Built-In Imports
9
- import logging
10
- import os
11
- import webbrowser
12
-
13
- #
14
- # Import 3rd-Party Libraries
15
- import gradio as gr
16
- #
17
- # Local Imports
18
- from App_Function_Libraries.DB.DB_Manager import get_db_config
19
- from App_Function_Libraries.Gradio_UI.Arxiv_tab import create_arxiv_tab
20
- from App_Function_Libraries.Gradio_UI.Audio_ingestion_tab import create_audio_processing_tab
21
- from App_Function_Libraries.Gradio_UI.Book_Ingestion_tab import create_import_book_tab
22
- from App_Function_Libraries.Gradio_UI.Character_Chat_tab import create_character_card_interaction_tab, create_character_chat_mgmt_tab, create_custom_character_card_tab, \
23
- create_character_card_validation_tab, create_export_characters_tab
24
- from App_Function_Libraries.Gradio_UI.Character_interaction_tab import create_narrator_controlled_conversation_tab, \
25
- create_multiple_character_chat_tab
26
- from App_Function_Libraries.Gradio_UI.Chat_ui import create_chat_management_tab, \
27
- create_chat_interface_four, create_chat_interface_multi_api, create_chat_interface_stacked, create_chat_interface
28
- from App_Function_Libraries.Gradio_UI.Config_tab import create_config_editor_tab
29
- from App_Function_Libraries.Gradio_UI.Explain_summarize_tab import create_summarize_explain_tab
30
- from App_Function_Libraries.Gradio_UI.Export_Functionality import create_export_tab
31
- from App_Function_Libraries.Gradio_UI.Backup_Functionality import create_backup_tab, create_view_backups_tab, \
32
- create_restore_backup_tab
33
- from App_Function_Libraries.Gradio_UI.Import_Functionality import create_import_single_prompt_tab, \
34
- create_import_obsidian_vault_tab, create_import_item_tab, create_import_multiple_prompts_tab
35
- from App_Function_Libraries.Gradio_UI.Introduction_tab import create_introduction_tab
36
- from App_Function_Libraries.Gradio_UI.Keywords import create_view_keywords_tab, create_add_keyword_tab, \
37
- create_delete_keyword_tab, create_export_keywords_tab
38
- from App_Function_Libraries.Gradio_UI.Live_Recording import create_live_recording_tab
39
- #from App_Function_Libraries.Gradio_UI.Llamafile_tab import create_chat_with_llamafile_tab
40
- #from App_Function_Libraries.Gradio_UI.MMLU_Pro_tab import create_mmlu_pro_tab
41
- from App_Function_Libraries.Gradio_UI.Media_edit import create_prompt_clone_tab, create_prompt_edit_tab, \
42
- create_media_edit_and_clone_tab, create_media_edit_tab
43
- from App_Function_Libraries.Gradio_UI.Media_wiki_tab import create_mediawiki_import_tab, create_mediawiki_config_tab
44
- from App_Function_Libraries.Gradio_UI.PDF_ingestion_tab import create_pdf_ingestion_tab, create_pdf_ingestion_test_tab
45
- from App_Function_Libraries.Gradio_UI.Plaintext_tab_import import create_plain_text_import_tab
46
- from App_Function_Libraries.Gradio_UI.Podcast_tab import create_podcast_tab
47
- from App_Function_Libraries.Gradio_UI.Prompt_Suggestion_tab import create_prompt_suggestion_tab
48
- from App_Function_Libraries.Gradio_UI.RAG_QA_Chat_tab import create_rag_qa_chat_tab, create_rag_qa_notes_management_tab, \
49
- create_rag_qa_chat_management_tab
50
- from App_Function_Libraries.Gradio_UI.Re_summarize_tab import create_resummary_tab
51
- from App_Function_Libraries.Gradio_UI.Search_Tab import create_prompt_search_tab, \
52
- create_search_summaries_tab, create_search_tab
53
- from App_Function_Libraries.Gradio_UI.RAG_Chat_tab import create_rag_tab
54
- from App_Function_Libraries.Gradio_UI.Embeddings_tab import create_embeddings_tab, create_view_embeddings_tab, \
55
- create_purge_embeddings_tab
56
- from App_Function_Libraries.Gradio_UI.Trash import create_view_trash_tab, create_empty_trash_tab, \
57
- create_delete_trash_tab, create_search_and_mark_trash_tab
58
- from App_Function_Libraries.Gradio_UI.Utilities import create_utilities_yt_timestamp_tab, create_utilities_yt_audio_tab, \
59
- create_utilities_yt_video_tab
60
- from App_Function_Libraries.Gradio_UI.Video_transcription_tab import create_video_transcription_tab
61
- from App_Function_Libraries.Gradio_UI.View_tab import create_manage_items_tab
62
- from App_Function_Libraries.Gradio_UI.Website_scraping_tab import create_website_scraping_tab
63
- from App_Function_Libraries.Gradio_UI.Chat_Workflows import chat_workflows_tab
64
- from App_Function_Libraries.Gradio_UI.View_DB_Items_tab import create_prompt_view_tab, \
65
- create_view_all_with_versions_tab, create_viewing_tab
66
- #
67
- # Gradio UI Imports
68
- from App_Function_Libraries.Gradio_UI.Evaluations_Benchmarks_tab import create_geval_tab, create_infinite_bench_tab
69
- #from App_Function_Libraries.Local_LLM.Local_LLM_huggingface import create_huggingface_tab
70
- #from App_Function_Libraries.Local_LLM.Local_LLM_ollama import create_ollama_tab
71
- #
72
- #######################################################################################################################
73
- # Function Definitions
74
- #
75
-
76
-
77
- # Disable Gradio Analytics
78
- os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
79
-
80
-
81
- custom_prompt_input = None
82
- server_mode = False
83
- share_public = False
84
- custom_prompt_summarize_bulleted_notes = ("""
85
- <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
86
- **Bulleted Note Creation Guidelines**
87
-
88
- **Headings**:
89
- - Based on referenced topics, not categories like quotes or terms
90
- - Surrounded by **bold** formatting
91
- - Not listed as bullet points
92
- - No space between headings and list items underneath
93
-
94
- **Emphasis**:
95
- - **Important terms** set in bold font
96
- - **Text ending in a colon**: also bolded
97
-
98
- **Review**:
99
- - Ensure adherence to specified format
100
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
101
- """)
102
- #
103
- # End of globals
104
- #######################################################################################################################
105
- #
106
- # Start of Video/Audio Transcription and Summarization Functions
107
- #
108
- # Functions:
109
- # FIXME
110
- #
111
- #
112
- ################################################################################################################
113
- # Functions for Re-Summarization
114
- #
115
- # Functions:
116
- # FIXME
117
- # End of Re-Summarization Functions
118
- #
119
- ############################################################################################################################################################################################################################
120
- #
121
- # Explain/Summarize This Tab
122
- #
123
- # Functions:
124
- # FIXME
125
- #
126
- #
127
- ############################################################################################################################################################################################################################
128
- #
129
- # Transcript Comparison Tab
130
- #
131
- # Functions:
132
- # FIXME
133
- #
134
- #
135
- ###########################################################################################################################################################################################################################
136
- #
137
- # Search Tab
138
- #
139
- # Functions:
140
- # FIXME
141
- #
142
- # End of Search Tab Functions
143
- #
144
- ##############################################################################################################################################################################################################################
145
- #
146
- # Llamafile Tab
147
- #
148
- # Functions:
149
- # FIXME
150
- #
151
- # End of Llamafile Tab Functions
152
- ##############################################################################################################################################################################################################################
153
- #
154
- # Chat Interface Tab Functions
155
- #
156
- # Functions:
157
- # FIXME
158
- #
159
- #
160
- # End of Chat Interface Tab Functions
161
- ################################################################################################################################################################################################################################
162
- #
163
- # Media Edit Tab Functions
164
- # Functions:
165
- # Fixme
166
- # create_media_edit_tab():
167
- ##### Trash Tab
168
- # FIXME
169
- # Functions:
170
- #
171
- # End of Media Edit Tab Functions
172
- ################################################################################################################
173
- #
174
- # Import Items Tab Functions
175
- #
176
- # Functions:
177
- #FIXME
178
- # End of Import Items Tab Functions
179
- ################################################################################################################
180
- #
181
- # Export Items Tab Functions
182
- #
183
- # Functions:
184
- # FIXME
185
- #
186
- #
187
- # End of Export Items Tab Functions
188
- ################################################################################################################
189
- #
190
- # Keyword Management Tab Functions
191
- #
192
- # Functions:
193
- # create_view_keywords_tab():
194
- # FIXME
195
- #
196
- # End of Keyword Management Tab Functions
197
- ################################################################################################################
198
- #
199
- # Document Editing Tab Functions
200
- #
201
- # Functions:
202
- # #FIXME
203
- #
204
- #
205
- ################################################################################################################
206
- #
207
- # Utilities Tab Functions
208
- # Functions:
209
- # create_utilities_yt_video_tab():
210
- # #FIXME
211
-
212
- #
213
- # End of Utilities Tab Functions
214
- ################################################################################################################
215
-
216
- # FIXME - Prompt sample box
217
- #
218
- # # Sample data
219
- # prompts_category_1 = [
220
- # "What are the key points discussed in the video?",
221
- # "Summarize the main arguments made by the speaker.",
222
- # "Describe the conclusions of the study presented."
223
- # ]
224
- #
225
- # prompts_category_2 = [
226
- # "How does the proposed solution address the problem?",
227
- # "What are the implications of the findings?",
228
- # "Can you explain the theory behind the observed phenomenon?"
229
- # ]
230
- #
231
- # all_prompts2 = prompts_category_1 + prompts_category_2
232
-
233
-
234
- def launch_ui(share_public=None, server_mode=False):
235
- webbrowser.open_new_tab('http://127.0.0.1:7860/?__theme=dark')
236
- share=share_public
237
- css = """
238
- .result-box {
239
- margin-bottom: 20px;
240
- border: 1px solid #ddd;
241
- padding: 10px;
242
- }
243
- .result-box.error {
244
- border-color: #ff0000;
245
- background-color: #ffeeee;
246
- }
247
- .transcription, .summary {
248
- max-height: 800px;
249
- overflow-y: auto;
250
- border: 1px solid #eee;
251
- padding: 10px;
252
- margin-top: 10px;
253
- }
254
- """
255
-
256
- with gr.Blocks(theme='bethecloud/storj_theme',css=css) as iface:
257
- gr.HTML(
258
- """
259
- <script>
260
- document.addEventListener('DOMContentLoaded', (event) => {
261
- document.body.classList.add('dark');
262
- document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)';
263
- });
264
- </script>
265
- """
266
- )
267
- db_config = get_db_config()
268
- db_type = db_config['type']
269
- gr.Markdown(f"# tl/dw: Your LLM-powered Research Multi-tool")
270
- gr.Markdown(f"(Using {db_type.capitalize()} Database)")
271
- with gr.Tabs():
272
- with gr.TabItem("Transcription / Summarization / Ingestion", id="ingestion-grouping", visible=True):
273
- with gr.Tabs():
274
- create_video_transcription_tab()
275
- #create_audio_processing_tab()
276
- #create_podcast_tab()
277
- #create_import_book_tab()
278
- ##create_plain_text_import_tab()
279
- #create_website_scraping_tab()
280
- #create_pdf_ingestion_tab()
281
- #create_pdf_ingestion_test_tab()
282
- #create_resummary_tab()
283
- #create_summarize_explain_tab()
284
- #create_live_recording_tab()
285
- #create_arxiv_tab()
286
-
287
- #with gr.TabItem("Text Search", id="text search", visible=True):
288
- #create_search_tab()
289
- #create_search_summaries_tab()
290
-
291
- #with gr.TabItem("RAG Chat/Search", id="RAG Chat Notes group", visible=True):
292
- #create_rag_tab()
293
- #create_rag_qa_chat_tab()
294
- #create_rag_qa_notes_management_tab()
295
- #create_rag_qa_chat_management_tab()
296
-
297
- #with gr.TabItem("Chat with an LLM", id="LLM Chat group", visible=True):
298
- #create_chat_interface()
299
- #create_chat_interface_stacked()
300
- #create_chat_interface_multi_api()
301
- #create_chat_interface_four()
302
- #create_chat_with_llamafile_tab()
303
- #create_chat_management_tab()
304
- #chat_workflows_tab()
305
-
306
-
307
- #with gr.TabItem("Character Chat", id="character chat group", visible=True):
308
- #create_character_card_interaction_tab()
309
- #create_character_chat_mgmt_tab()
310
- #create_custom_character_card_tab()
311
- #create_character_card_validation_tab()
312
- #create_multiple_character_chat_tab()
313
- #create_narrator_controlled_conversation_tab()
314
- #create_export_characters_tab()
315
-
316
-
317
- #with gr.TabItem("View DB Items", id="view db items group", visible=True):
318
- # This one works
319
- #create_view_all_with_versions_tab()
320
- # This one is WIP
321
- #create_viewing_tab()
322
- #create_prompt_view_tab()
323
-
324
-
325
- #with gr.TabItem("Prompts", id='view prompts group', visible=True):
326
- #create_prompt_view_tab()
327
- #create_prompt_search_tab()
328
- #create_prompt_edit_tab()
329
- #create_prompt_clone_tab()
330
- #create_prompt_suggestion_tab()
331
-
332
-
333
- #with gr.TabItem("Manage / Edit Existing Items", id="manage group", visible=True):
334
- #create_media_edit_tab()
335
- #create_manage_items_tab()
336
- #create_media_edit_and_clone_tab()
337
- # FIXME
338
- #create_compare_transcripts_tab()
339
-
340
-
341
- #with gr.TabItem("Embeddings Management", id="embeddings group", visible=True):
342
- #create_embeddings_tab()
343
- #create_view_embeddings_tab()
344
- #create_purge_embeddings_tab()
345
-
346
- #with gr.TabItem("Writing Tools", id="writing_tools group", visible=True):
347
- #from App_Function_Libraries.Gradio_UI.Writing_tab import create_document_feedback_tab
348
- #create_document_feedback_tab()
349
- #from App_Function_Libraries.Gradio_UI.Writing_tab import create_grammar_style_check_tab
350
- #create_grammar_style_check_tab()
351
- #from App_Function_Libraries.Gradio_UI.Writing_tab import create_tone_adjustment_tab
352
- #create_tone_adjustment_tab()
353
- #from App_Function_Libraries.Gradio_UI.Writing_tab import create_creative_writing_tab
354
- #create_creative_writing_tab()
355
- #from App_Function_Libraries.Gradio_UI.Writing_tab import create_mikupad_tab
356
- #create_mikupad_tab()
357
-
358
-
359
- #with gr.TabItem("Keywords", id="keywords group", visible=True):
360
- #create_view_keywords_tab()
361
- #create_add_keyword_tab()
362
- #create_delete_keyword_tab()
363
- #create_export_keywords_tab()
364
-
365
- #with gr.TabItem("Import", id="import group", visible=True):
366
- #create_import_item_tab()
367
- #create_import_obsidian_vault_tab()
368
- #create_import_single_prompt_tab()
369
- #create_import_multiple_prompts_tab()
370
- #create_mediawiki_import_tab()
371
- #create_mediawiki_config_tab()
372
-
373
- #with gr.TabItem("Export", id="export group", visible=True):
374
- #create_export_tab()
375
-
376
- #with gr.TabItem("Backup Management", id="backup group", visible=True):
377
- #create_backup_tab()
378
- #create_view_backups_tab()
379
- #create_restore_backup_tab()
380
-
381
- #with gr.TabItem("Utilities", id="util group", visible=True):
382
- #create_utilities_yt_video_tab()
383
- #create_utilities_yt_audio_tab()
384
- #create_utilities_yt_timestamp_tab()
385
-
386
- #with gr.TabItem("Local LLM", id="local llm group", visible=True):
387
- #create_chat_with_llamafile_tab()
388
- #create_ollama_tab()
389
- #create_huggingface_tab()
390
-
391
- #with gr.TabItem("Trashcan", id="trashcan group", visible=True):
392
- #create_search_and_mark_trash_tab()
393
- #create_view_trash_tab()
394
- #create_delete_trash_tab()
395
- #create_empty_trash_tab()
396
-
397
- #with gr.TabItem("Evaluations", id="eval", visible=True):
398
- #create_geval_tab()
399
- #create_infinite_bench_tab()
400
- # FIXME
401
- #create_mmlu_pro_tab()
402
-
403
- #with gr.TabItem("Introduction/Help", id="introduction group", visible=True):
404
- #create_introduction_tab()
405
-
406
- #with gr.TabItem("Config Editor", id="config group"):
407
- #create_config_editor_tab()
408
-
409
- # Launch the interface
410
- server_port_variable = 7860
411
- os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
412
- if share==True:
413
- iface.launch(share=True)
414
- elif server_mode and not share_public:
415
- iface.launch(share=False, server_name="0.0.0.0", server_port=server_port_variable, )
416
- else:
417
- try:
418
- iface.launch(share=False, server_name="0.0.0.0", server_port=server_port_variable, )
419
- except Exception as e:
420
- logging.error(f"Error launching interface: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gradio_Related.py
2
+ #########################################
3
+ # Gradio UI Functions Library
4
+ # I fucking hate Gradio.
5
+ #
6
+ #########################################
7
+ #
8
+ # Built-In Imports
9
+ import logging
10
+ import os
11
+ import webbrowser
12
+ #
13
+ # Import 3rd-Party Libraries
14
+ import gradio as gr
15
+ #
16
+ # Local Imports
17
+ from App_Function_Libraries.DB.DB_Manager import get_db_config, backup_dir
18
+ from App_Function_Libraries.DB.RAG_QA_Chat_DB import create_tables
19
+ from App_Function_Libraries.Gradio_UI.Anki_tab import create_anki_validation_tab, create_anki_generator_tab
20
+ from App_Function_Libraries.Gradio_UI.Arxiv_tab import create_arxiv_tab
21
+ from App_Function_Libraries.Gradio_UI.Audio_ingestion_tab import create_audio_processing_tab
22
+ from App_Function_Libraries.Gradio_UI.Backup_RAG_Notes_Character_Chat_tab import create_database_management_interface
23
+ from App_Function_Libraries.Gradio_UI.Book_Ingestion_tab import create_import_book_tab
24
+ from App_Function_Libraries.Gradio_UI.Character_Chat_tab import create_character_card_interaction_tab, create_character_chat_mgmt_tab, create_custom_character_card_tab, \
25
+ create_character_card_validation_tab, create_export_characters_tab
26
+ from App_Function_Libraries.Gradio_UI.Character_interaction_tab import create_narrator_controlled_conversation_tab, \
27
+ create_multiple_character_chat_tab
28
+ from App_Function_Libraries.Gradio_UI.Chat_ui import create_chat_interface_four, create_chat_interface_multi_api, \
29
+ create_chat_interface_stacked, create_chat_interface
30
+ from App_Function_Libraries.Gradio_UI.Config_tab import create_config_editor_tab
31
+ from App_Function_Libraries.Gradio_UI.Explain_summarize_tab import create_summarize_explain_tab
32
+ from App_Function_Libraries.Gradio_UI.Export_Functionality import create_rag_export_tab, create_export_tabs
33
+ #from App_Function_Libraries.Gradio_UI.Backup_Functionality import create_backup_tab, create_view_backups_tab, \
34
+ # create_restore_backup_tab
35
+ from App_Function_Libraries.Gradio_UI.Import_Functionality import create_import_single_prompt_tab, \
36
+ create_import_obsidian_vault_tab, create_import_item_tab, create_import_multiple_prompts_tab, \
37
+ create_conversation_import_tab
38
+ from App_Function_Libraries.Gradio_UI.Introduction_tab import create_introduction_tab
39
+ from App_Function_Libraries.Gradio_UI.Keywords import create_view_keywords_tab, create_add_keyword_tab, \
40
+ create_delete_keyword_tab, create_export_keywords_tab, create_rag_qa_keywords_tab, create_character_keywords_tab, \
41
+ create_meta_keywords_tab, create_prompt_keywords_tab
42
+ from App_Function_Libraries.Gradio_UI.Live_Recording import create_live_recording_tab
43
+ from App_Function_Libraries.Gradio_UI.Llamafile_tab import create_chat_with_llamafile_tab
44
+ #from App_Function_Libraries.Gradio_UI.MMLU_Pro_tab import create_mmlu_pro_tab
45
+ from App_Function_Libraries.Gradio_UI.Media_edit import create_prompt_clone_tab, create_prompt_edit_tab, \
46
+ create_media_edit_and_clone_tab, create_media_edit_tab
47
+ from App_Function_Libraries.Gradio_UI.Media_wiki_tab import create_mediawiki_import_tab, create_mediawiki_config_tab
48
+ from App_Function_Libraries.Gradio_UI.Mind_Map_tab import create_mindmap_tab
49
+ from App_Function_Libraries.Gradio_UI.PDF_ingestion_tab import create_pdf_ingestion_tab, create_pdf_ingestion_test_tab
50
+ from App_Function_Libraries.Gradio_UI.Plaintext_tab_import import create_plain_text_import_tab
51
+ from App_Function_Libraries.Gradio_UI.Podcast_tab import create_podcast_tab
52
+ from App_Function_Libraries.Gradio_UI.Prompt_Suggestion_tab import create_prompt_suggestion_tab
53
+ from App_Function_Libraries.Gradio_UI.RAG_QA_Chat_tab import create_rag_qa_chat_tab, create_rag_qa_notes_management_tab, \
54
+ create_rag_qa_chat_management_tab
55
+ from App_Function_Libraries.Gradio_UI.Re_summarize_tab import create_resummary_tab
56
+ from App_Function_Libraries.Gradio_UI.Search_Tab import create_prompt_search_tab, \
57
+ create_search_summaries_tab, create_search_tab
58
+ from App_Function_Libraries.Gradio_UI.RAG_Chat_tab import create_rag_tab
59
+ from App_Function_Libraries.Gradio_UI.Embeddings_tab import create_embeddings_tab, create_view_embeddings_tab, \
60
+ create_purge_embeddings_tab
61
+ from App_Function_Libraries.Gradio_UI.Semantic_Scholar_tab import create_semantic_scholar_tab
62
+ from App_Function_Libraries.Gradio_UI.Trash import create_view_trash_tab, create_empty_trash_tab, \
63
+ create_delete_trash_tab, create_search_and_mark_trash_tab
64
+ from App_Function_Libraries.Gradio_UI.Utilities import create_utilities_yt_timestamp_tab, create_utilities_yt_audio_tab, \
65
+ create_utilities_yt_video_tab
66
+ from App_Function_Libraries.Gradio_UI.Video_transcription_tab import create_video_transcription_tab
67
+ from App_Function_Libraries.Gradio_UI.View_tab import create_manage_items_tab
68
+ from App_Function_Libraries.Gradio_UI.Website_scraping_tab import create_website_scraping_tab
69
+ from App_Function_Libraries.Gradio_UI.Workflows_tab import chat_workflows_tab
70
+ from App_Function_Libraries.Gradio_UI.View_DB_Items_tab import create_view_all_mediadb_with_versions_tab, \
71
+ create_viewing_mediadb_tab, create_view_all_rag_notes_tab, create_viewing_ragdb_tab, \
72
+ create_mediadb_keyword_search_tab, create_ragdb_keyword_items_tab
73
+ from App_Function_Libraries.Gradio_UI.Prompts_tab import create_prompt_view_tab, create_prompts_export_tab
74
+ #
75
+ # Gradio UI Imports
76
+ from App_Function_Libraries.Gradio_UI.Evaluations_Benchmarks_tab import create_geval_tab, create_infinite_bench_tab
77
+ from App_Function_Libraries.Gradio_UI.XML_Ingestion_Tab import create_xml_import_tab
78
+ #from App_Function_Libraries.Local_LLM.Local_LLM_huggingface import create_huggingface_tab
79
+ from App_Function_Libraries.Local_LLM.Local_LLM_ollama import create_ollama_tab
80
+ from App_Function_Libraries.Utils.Utils import load_and_log_configs
81
+
82
+ #
83
+ #######################################################################################################################
84
+ # Function Definitions
85
+ #
86
+
87
+
88
+ # Disable Gradio Analytics
89
+ os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
90
+
91
+
92
+ custom_prompt_input = None
93
+ server_mode = False
94
+ share_public = False
95
+ custom_prompt_summarize_bulleted_notes = ("""
96
+ <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
97
+ **Bulleted Note Creation Guidelines**
98
+
99
+ **Headings**:
100
+ - Based on referenced topics, not categories like quotes or terms
101
+ - Surrounded by **bold** formatting
102
+ - Not listed as bullet points
103
+ - No space between headings and list items underneath
104
+
105
+ **Emphasis**:
106
+ - **Important terms** set in bold font
107
+ - **Text ending in a colon**: also bolded
108
+
109
+ **Review**:
110
+ - Ensure adherence to specified format
111
+ - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
112
+ """)
113
+ #
114
+ # End of globals
115
+ #######################################################################################################################
116
+ #
117
+ # Start of Video/Audio Transcription and Summarization Functions
118
+ #
119
+ # Functions:
120
+ # FIXME
121
+ #
122
+ #
123
+ ################################################################################################################
124
+ # Functions for Re-Summarization
125
+ #
126
+ # Functions:
127
+ # FIXME
128
+ # End of Re-Summarization Functions
129
+ #
130
+ ############################################################################################################################################################################################################################
131
+ #
132
+ # Explain/Summarize This Tab
133
+ #
134
+ # Functions:
135
+ # FIXME
136
+ #
137
+ #
138
+ ############################################################################################################################################################################################################################
139
+ #
140
+ # Transcript Comparison Tab
141
+ #
142
+ # Functions:
143
+ # FIXME
144
+ #
145
+ #
146
+ ###########################################################################################################################################################################################################################
147
+ #
148
+ # Search Tab
149
+ #
150
+ # Functions:
151
+ # FIXME
152
+ #
153
+ # End of Search Tab Functions
154
+ #
155
+ ##############################################################################################################################################################################################################################
156
+ #
157
+ # Llamafile Tab
158
+ #
159
+ # Functions:
160
+ # FIXME
161
+ #
162
+ # End of Llamafile Tab Functions
163
+ ##############################################################################################################################################################################################################################
164
+ #
165
+ # Chat Interface Tab Functions
166
+ #
167
+ # Functions:
168
+ # FIXME
169
+ #
170
+ #
171
+ # End of Chat Interface Tab Functions
172
+ ################################################################################################################################################################################################################################
173
+ #
174
+ # Media Edit Tab Functions
175
+ # Functions:
176
+ # Fixme
177
+ # create_media_edit_tab():
178
+ ##### Trash Tab
179
+ # FIXME
180
+ # Functions:
181
+ #
182
+ # End of Media Edit Tab Functions
183
+ ################################################################################################################
184
+ #
185
+ # Import Items Tab Functions
186
+ #
187
+ # Functions:
188
+ #FIXME
189
+ # End of Import Items Tab Functions
190
+ ################################################################################################################
191
+ #
192
+ # Export Items Tab Functions
193
+ #
194
+ # Functions:
195
+ # FIXME
196
+ #
197
+ #
198
+ # End of Export Items Tab Functions
199
+ ################################################################################################################
200
+ #
201
+ # Keyword Management Tab Functions
202
+ #
203
+ # Functions:
204
+ # create_view_keywords_tab():
205
+ # FIXME
206
+ #
207
+ # End of Keyword Management Tab Functions
208
+ ################################################################################################################
209
+ #
210
+ # Document Editing Tab Functions
211
+ #
212
+ # Functions:
213
+ # #FIXME
214
+ #
215
+ #
216
+ ################################################################################################################
217
+ #
218
+ # Utilities Tab Functions
219
+ # Functions:
220
+ # create_utilities_yt_video_tab():
221
+ # #FIXME
222
+
223
+ #
224
+ # End of Utilities Tab Functions
225
+ ################################################################################################################
226
+
227
+ # FIXME - Prompt sample box
228
+ #
229
+ # # Sample data
230
+ # prompts_category_1 = [
231
+ # "What are the key points discussed in the video?",
232
+ # "Summarize the main arguments made by the speaker.",
233
+ # "Describe the conclusions of the study presented."
234
+ # ]
235
+ #
236
+ # prompts_category_2 = [
237
+ # "How does the proposed solution address the problem?",
238
+ # "What are the implications of the findings?",
239
+ # "Can you explain the theory behind the observed phenomenon?"
240
+ # ]
241
+ #
242
+ # all_prompts2 = prompts_category_1 + prompts_category_2
243
+
244
+
245
+
246
+ #######################################################################################################################
247
+ #
248
+ # Migration Script
249
+ import sqlite3
250
+ import uuid
251
+ import logging
252
+ import os
253
+ from datetime import datetime
254
+ import shutil
255
+
256
+ # def migrate_media_db_to_rag_chat_db(media_db_path, rag_chat_db_path):
257
+ # # Check if migration is needed
258
+ # if not os.path.exists(media_db_path):
259
+ # logging.info("Media DB does not exist. No migration needed.")
260
+ # return
261
+ #
262
+ # # Optional: Check if migration has already been completed
263
+ # migration_flag = os.path.join(os.path.dirname(rag_chat_db_path), 'migration_completed.flag')
264
+ # if os.path.exists(migration_flag):
265
+ # logging.info("Migration already completed. Skipping migration.")
266
+ # return
267
+ #
268
+ # # Backup databases
269
+ # backup_database(media_db_path)
270
+ # backup_database(rag_chat_db_path)
271
+ #
272
+ # # Connect to both databases
273
+ # try:
274
+ # media_conn = sqlite3.connect(media_db_path)
275
+ # rag_conn = sqlite3.connect(rag_chat_db_path)
276
+ #
277
+ # # Enable foreign key support
278
+ # media_conn.execute('PRAGMA foreign_keys = ON;')
279
+ # rag_conn.execute('PRAGMA foreign_keys = ON;')
280
+ #
281
+ # media_cursor = media_conn.cursor()
282
+ # rag_cursor = rag_conn.cursor()
283
+ #
284
+ # # Begin transaction
285
+ # rag_conn.execute('BEGIN TRANSACTION;')
286
+ #
287
+ # # Extract conversations from media DB
288
+ # media_cursor.execute('''
289
+ # SELECT id, media_id, media_name, conversation_name, created_at, updated_at
290
+ # FROM ChatConversations
291
+ # ''')
292
+ # conversations = media_cursor.fetchall()
293
+ #
294
+ # for conv in conversations:
295
+ # old_conv_id, media_id, media_name, conversation_name, created_at, updated_at = conv
296
+ #
297
+ # # Convert timestamps if necessary
298
+ # created_at = parse_timestamp(created_at)
299
+ # updated_at = parse_timestamp(updated_at)
300
+ #
301
+ # # Generate a new conversation_id
302
+ # conversation_id = str(uuid.uuid4())
303
+ # title = conversation_name or (f"{media_name}-chat" if media_name else "Untitled Conversation")
304
+ #
305
+ # # Insert into conversation_metadata
306
+ # rag_cursor.execute('''
307
+ # INSERT INTO conversation_metadata (conversation_id, created_at, last_updated, title, media_id)
308
+ # VALUES (?, ?, ?, ?, ?)
309
+ # ''', (conversation_id, created_at, updated_at, title, media_id))
310
+ #
311
+ # # Extract messages from media DB
312
+ # media_cursor.execute('''
313
+ # SELECT sender, message, timestamp
314
+ # FROM ChatMessages
315
+ # WHERE conversation_id = ?
316
+ # ORDER BY timestamp ASC
317
+ # ''', (old_conv_id,))
318
+ # messages = media_cursor.fetchall()
319
+ #
320
+ # for msg in messages:
321
+ # sender, content, timestamp = msg
322
+ #
323
+ # # Convert timestamp if necessary
324
+ # timestamp = parse_timestamp(timestamp)
325
+ #
326
+ # role = sender # Assuming 'sender' is 'user' or 'ai'
327
+ #
328
+ # # Insert message into rag_qa_chats
329
+ # rag_cursor.execute('''
330
+ # INSERT INTO rag_qa_chats (conversation_id, timestamp, role, content)
331
+ # VALUES (?, ?, ?, ?)
332
+ # ''', (conversation_id, timestamp, role, content))
333
+ #
334
+ # # Commit transaction
335
+ # rag_conn.commit()
336
+ # logging.info("Migration completed successfully.")
337
+ #
338
+ # # Mark migration as complete
339
+ # with open(migration_flag, 'w') as f:
340
+ # f.write('Migration completed on ' + datetime.now().isoformat())
341
+ #
342
+ # except Exception as e:
343
+ # # Rollback transaction in case of error
344
+ # rag_conn.rollback()
345
+ # logging.error(f"Error during migration: {e}")
346
+ # raise
347
+ # finally:
348
+ # media_conn.close()
349
+ # rag_conn.close()
350
+
351
+ def backup_database(db_path):
352
+ backup_path = db_path + '.backup'
353
+ if not os.path.exists(backup_path):
354
+ shutil.copyfile(db_path, backup_path)
355
+ logging.info(f"Database backed up to {backup_path}")
356
+ else:
357
+ logging.info(f"Backup already exists at {backup_path}")
358
+
359
+ def parse_timestamp(timestamp_value):
360
+ """
361
+ Parses the timestamp from the old database and converts it to a standard format.
362
+ Adjust this function based on the actual format of your timestamps.
363
+ """
364
+ try:
365
+ # Attempt to parse ISO format
366
+ return datetime.fromisoformat(timestamp_value).isoformat()
367
+ except ValueError:
368
+ # Handle other timestamp formats if necessary
369
+ # For example, if timestamps are in Unix epoch format
370
+ try:
371
+ timestamp_float = float(timestamp_value)
372
+ return datetime.fromtimestamp(timestamp_float).isoformat()
373
+ except ValueError:
374
+ # Default to current time if parsing fails
375
+ logging.warning(f"Unable to parse timestamp '{timestamp_value}', using current time.")
376
+ return datetime.now().isoformat()
377
+
378
+ #
379
+ # End of Migration Script
380
+ #######################################################################################################################
381
+
382
+
383
+ #######################################################################################################################
384
+ #
385
+ # Launch UI Function
386
+ def launch_ui(share_public=None, server_mode=False):
387
+ webbrowser.open_new_tab('http://127.0.0.1:7860/?__theme=dark')
388
+ share=share_public
389
+ css = """
390
+ .result-box {
391
+ margin-bottom: 20px;
392
+ border: 1px solid #ddd;
393
+ padding: 10px;
394
+ }
395
+ .result-box.error {
396
+ border-color: #ff0000;
397
+ background-color: #ffeeee;
398
+ }
399
+ .transcription, .summary {
400
+ max-height: 800px;
401
+ overflow-y: auto;
402
+ border: 1px solid #eee;
403
+ padding: 10px;
404
+ margin-top: 10px;
405
+ }
406
+ """
407
+
408
+ config = load_and_log_configs()
409
+ # Get database paths from config
410
+ db_config = config['db_config']
411
+ media_db_path = db_config['sqlite_path']
412
+ character_chat_db_path = os.path.join(os.path.dirname(media_db_path), "chatDB.db")
413
+ rag_chat_db_path = os.path.join(os.path.dirname(media_db_path), "rag_qa.db")
414
+ # Initialize the RAG Chat DB (create tables and update schema)
415
+ create_tables()
416
+
417
+ # Migrate data from the media DB to the RAG Chat DB
418
+ #migrate_media_db_to_rag_chat_db(media_db_path, rag_chat_db_path)
419
+
420
+
421
+ with gr.Blocks(theme='bethecloud/storj_theme',css=css) as iface:
422
+ gr.HTML(
423
+ """
424
+ <script>
425
+ document.addEventListener('DOMContentLoaded', (event) => {
426
+ document.body.classList.add('dark');
427
+ document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)';
428
+ });
429
+ </script>
430
+ """
431
+ )
432
+ db_config = get_db_config()
433
+ db_type = db_config['type']
434
+ gr.Markdown(f"# tl/dw: Your LLM-powered Research Multi-tool")
435
+ gr.Markdown(f"(Using {db_type.capitalize()} Database)")
436
+ with gr.Tabs():
437
+ with gr.TabItem("Transcribe / Analyze / Ingestion", id="ingestion-grouping", visible=True):
438
+ with gr.Tabs():
439
+ create_video_transcription_tab()
440
+ create_audio_processing_tab()
441
+ create_podcast_tab()
442
+ create_import_book_tab()
443
+ create_plain_text_import_tab()
444
+ create_xml_import_tab()
445
+ create_website_scraping_tab()
446
+ create_pdf_ingestion_tab()
447
+ create_pdf_ingestion_test_tab()
448
+ create_resummary_tab()
449
+ create_summarize_explain_tab()
450
+ create_live_recording_tab()
451
+ create_arxiv_tab()
452
+ create_semantic_scholar_tab()
453
+
454
+ with gr.TabItem("RAG Chat/Search", id="RAG Chat Notes group", visible=True):
455
+ create_rag_tab()
456
+ create_rag_qa_chat_tab()
457
+ create_rag_qa_notes_management_tab()
458
+ create_rag_qa_chat_management_tab()
459
+
460
+ with gr.TabItem("Chat with an LLM", id="LLM Chat group", visible=True):
461
+ create_chat_interface()
462
+ create_chat_interface_stacked()
463
+ create_chat_interface_multi_api()
464
+ create_chat_interface_four()
465
+ chat_workflows_tab()
466
+
467
+ with gr.TabItem("Character Chat", id="character chat group", visible=True):
468
+ create_character_card_interaction_tab()
469
+ create_character_chat_mgmt_tab()
470
+ create_custom_character_card_tab()
471
+ create_character_card_validation_tab()
472
+ create_multiple_character_chat_tab()
473
+ create_narrator_controlled_conversation_tab()
474
+ create_export_characters_tab()
475
+
476
+ with gr.TabItem("Writing Tools", id="writing_tools group", visible=True):
477
+ from App_Function_Libraries.Gradio_UI.Writing_tab import create_document_feedback_tab
478
+ create_document_feedback_tab()
479
+ from App_Function_Libraries.Gradio_UI.Writing_tab import create_grammar_style_check_tab
480
+ create_grammar_style_check_tab()
481
+ from App_Function_Libraries.Gradio_UI.Writing_tab import create_tone_adjustment_tab
482
+ create_tone_adjustment_tab()
483
+ from App_Function_Libraries.Gradio_UI.Writing_tab import create_creative_writing_tab
484
+ create_creative_writing_tab()
485
+ from App_Function_Libraries.Gradio_UI.Writing_tab import create_mikupad_tab
486
+ create_mikupad_tab()
487
+
488
+ with gr.TabItem("Search/View DB Items", id="view db items group", visible=True):
489
+ create_search_tab()
490
+ create_search_summaries_tab()
491
+ create_view_all_mediadb_with_versions_tab()
492
+ create_viewing_mediadb_tab()
493
+ create_mediadb_keyword_search_tab()
494
+ create_view_all_rag_notes_tab()
495
+ create_viewing_ragdb_tab()
496
+ create_ragdb_keyword_items_tab()
497
+
498
+ with gr.TabItem("Prompts", id='view prompts group', visible=True):
499
+ with gr.Tabs():
500
+ create_prompt_view_tab()
501
+ create_prompt_search_tab()
502
+ create_prompt_edit_tab()
503
+ create_prompt_clone_tab()
504
+ create_prompt_suggestion_tab()
505
+ create_prompts_export_tab()
506
+
507
+ with gr.TabItem("Manage Media DB Items", id="manage group", visible=True):
508
+ create_media_edit_tab()
509
+ create_manage_items_tab()
510
+ create_media_edit_and_clone_tab()
511
+
512
+ with gr.TabItem("Embeddings Management", id="embeddings group", visible=True):
513
+ create_embeddings_tab()
514
+ create_view_embeddings_tab()
515
+ create_purge_embeddings_tab()
516
+
517
+ with gr.TabItem("Keywords", id="keywords group", visible=True):
518
+ create_view_keywords_tab()
519
+ create_add_keyword_tab()
520
+ create_delete_keyword_tab()
521
+ create_export_keywords_tab()
522
+ create_character_keywords_tab()
523
+ create_rag_qa_keywords_tab()
524
+ create_meta_keywords_tab()
525
+ create_prompt_keywords_tab()
526
+
527
+ with gr.TabItem("Import", id="import group", visible=True):
528
+ create_import_item_tab()
529
+ create_import_obsidian_vault_tab()
530
+ create_import_single_prompt_tab()
531
+ create_import_multiple_prompts_tab()
532
+ create_mediawiki_import_tab()
533
+ create_mediawiki_config_tab()
534
+ create_conversation_import_tab()
535
+
536
+ with gr.TabItem("Export", id="export group", visible=True):
537
+ create_export_tabs()
538
+
539
+
540
+ with gr.TabItem("Database Management", id="database_management_group", visible=True):
541
+ create_database_management_interface(
542
+ media_db_config={
543
+ 'db_path': media_db_path,
544
+ 'backup_dir': backup_dir
545
+ },
546
+ rag_db_config={
547
+ 'db_path': rag_chat_db_path,
548
+ 'backup_dir': backup_dir
549
+ },
550
+ char_db_config={
551
+ 'db_path': character_chat_db_path,
552
+ 'backup_dir': backup_dir
553
+ }
554
+ )
555
+
556
+ with gr.TabItem("Utilities", id="util group", visible=True):
557
+ create_mindmap_tab()
558
+ create_utilities_yt_video_tab()
559
+ create_utilities_yt_audio_tab()
560
+ create_utilities_yt_timestamp_tab()
561
+
562
+ with gr.TabItem("Anki Deck Creation/Validation", id="anki group", visible=True):
563
+ create_anki_generator_tab()
564
+ create_anki_validation_tab()
565
+
566
+ with gr.TabItem("Local LLM", id="local llm group", visible=True):
567
+ create_chat_with_llamafile_tab()
568
+ create_ollama_tab()
569
+ #create_huggingface_tab()
570
+
571
+ with gr.TabItem("Trashcan", id="trashcan group", visible=True):
572
+ create_search_and_mark_trash_tab()
573
+ create_view_trash_tab()
574
+ create_delete_trash_tab()
575
+ create_empty_trash_tab()
576
+
577
+ with gr.TabItem("Evaluations", id="eval", visible=True):
578
+ create_geval_tab()
579
+ create_infinite_bench_tab()
580
+ # FIXME
581
+ #create_mmlu_pro_tab()
582
+
583
+ with gr.TabItem("Introduction/Help", id="introduction group", visible=True):
584
+ create_introduction_tab()
585
+
586
+ with gr.TabItem("Config Editor", id="config group"):
587
+ create_config_editor_tab()
588
+
589
+ # Launch the interface
590
+ server_port_variable = 7860
591
+ os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
592
+ if share==True:
593
+ iface.launch(share=True)
594
+ elif server_mode and not share_public:
595
+ iface.launch(share=False, server_name="0.0.0.0", server_port=server_port_variable, )
596
+ else:
597
+ try:
598
+ iface.launch(share=False, server_name="0.0.0.0", server_port=server_port_variable, )
599
+ except Exception as e:
600
+ logging.error(f"Error launching interface: {str(e)}")
App_Function_Libraries/Gradio_UI/Anki_tab.py ADDED
@@ -0,0 +1,921 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Anki_Validation_tab.py
2
+ # Description: Gradio functions for the Anki Validation tab
3
+ #
4
+ # Imports
5
+ import json
6
+ import logging
7
+ import os
8
+ import tempfile
9
+ from typing import Optional, Tuple, List, Dict
10
+ #
11
+ # External Imports
12
+ import genanki
13
+ import gradio as gr
14
+ #
15
+ # Local Imports
16
+ from App_Function_Libraries.Chat.Chat_Functions import approximate_token_count, update_chat_content, save_chat_history, \
17
+ save_chat_history_to_db_wrapper
18
+ from App_Function_Libraries.DB.DB_Manager import list_prompts
19
+ from App_Function_Libraries.Gradio_UI.Chat_ui import update_dropdown_multiple, chat_wrapper, update_selected_parts, \
20
+ search_conversations, regenerate_last_message, load_conversation, debug_output
21
+ from App_Function_Libraries.Third_Party.Anki import sanitize_html, generate_card_choices, \
22
+ export_cards, load_card_for_editing, handle_file_upload, \
23
+ validate_for_ui, update_card_with_validation, update_card_choices, enhanced_file_upload, \
24
+ handle_validation
25
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
26
+ #
27
+ ############################################################################################################
28
+ #
29
+ # Functions:
30
+
31
+ def create_anki_validation_tab():
32
+ with gr.TabItem("Anki Flashcard Validation", visible=True):
33
+ gr.Markdown("# Anki Flashcard Validation and Editor")
34
+
35
+ # State variables for internal tracking
36
+ current_card_data = gr.State({})
37
+ preview_update_flag = gr.State(False)
38
+
39
+ with gr.Row():
40
+ # Left Column: Input and Validation
41
+ with gr.Column(scale=1):
42
+ gr.Markdown("## Import or Create Flashcards")
43
+
44
+ input_type = gr.Radio(
45
+ choices=["JSON", "APKG"],
46
+ label="Input Type",
47
+ value="JSON"
48
+ )
49
+
50
+ with gr.Group() as json_input_group:
51
+ flashcard_input = gr.TextArea(
52
+ label="Enter Flashcards (JSON format)",
53
+ placeholder='''{
54
+ "cards": [
55
+ {
56
+ "id": "CARD_001",
57
+ "type": "basic",
58
+ "front": "What is the capital of France?",
59
+ "back": "Paris",
60
+ "tags": ["geography", "europe"],
61
+ "note": "Remember: City of Light"
62
+ }
63
+ ]
64
+ }''',
65
+ lines=10
66
+ )
67
+
68
+ import_json = gr.File(
69
+ label="Or Import JSON File",
70
+ file_types=[".json"]
71
+ )
72
+
73
+ with gr.Group(visible=False) as apkg_input_group:
74
+ import_apkg = gr.File(
75
+ label="Import APKG File",
76
+ file_types=[".apkg"]
77
+ )
78
+ deck_info = gr.JSON(
79
+ label="Deck Information",
80
+ visible=False
81
+ )
82
+
83
+ validate_button = gr.Button("Validate Flashcards")
84
+
85
+ # Right Column: Validation Results and Editor
86
+ with gr.Column(scale=1):
87
+ gr.Markdown("## Validation Results")
88
+ validation_status = gr.Markdown("")
89
+
90
+ with gr.Accordion("Validation Rules", open=False):
91
+ gr.Markdown("""
92
+ ### Required Fields:
93
+ - Unique ID
94
+ - Card Type (basic, cloze, reverse)
95
+ - Front content
96
+ - Back content
97
+ - At least one tag
98
+
99
+ ### Content Rules:
100
+ - No empty fields
101
+ - Front side should be a clear question/prompt
102
+ - Back side should contain complete answer
103
+ - Cloze deletions must have valid syntax
104
+ - No duplicate IDs
105
+
106
+ ### Image Rules:
107
+ - Valid image tags
108
+ - Supported formats (JPG, PNG, GIF)
109
+ - Base64 encoded or valid URL
110
+
111
+ ### APKG-specific Rules:
112
+ - Valid SQLite database structure
113
+ - Media files properly referenced
114
+ - Note types match Anki standards
115
+ - Card templates are well-formed
116
+ """)
117
+
118
+ with gr.Row():
119
+ # Card Editor
120
+ gr.Markdown("## Card Editor")
121
+ with gr.Row():
122
+ with gr.Column(scale=1):
123
+ with gr.Accordion("Edit Individual Cards", open=True):
124
+ card_selector = gr.Dropdown(
125
+ label="Select Card to Edit",
126
+ choices=[],
127
+ interactive=True
128
+ )
129
+
130
+ card_type = gr.Radio(
131
+ choices=["basic", "cloze", "reverse"],
132
+ label="Card Type",
133
+ value="basic"
134
+ )
135
+
136
+ # Front content with preview
137
+ with gr.Group():
138
+ gr.Markdown("### Front Content")
139
+ front_content = gr.TextArea(
140
+ label="Content (HTML supported)",
141
+ lines=3
142
+ )
143
+ front_preview = gr.HTML(
144
+ label="Preview"
145
+ )
146
+
147
+ # Back content with preview
148
+ with gr.Group():
149
+ gr.Markdown("### Back Content")
150
+ back_content = gr.TextArea(
151
+ label="Content (HTML supported)",
152
+ lines=3
153
+ )
154
+ back_preview = gr.HTML(
155
+ label="Preview"
156
+ )
157
+
158
+ tags_input = gr.TextArea(
159
+ label="Tags (comma-separated)",
160
+ lines=1
161
+ )
162
+
163
+ notes_input = gr.TextArea(
164
+ label="Additional Notes",
165
+ lines=2
166
+ )
167
+
168
+ with gr.Row():
169
+ update_card_button = gr.Button("Update Card")
170
+ delete_card_button = gr.Button("Delete Card", variant="stop")
171
+
172
+ with gr.Row():
173
+ with gr.Column(scale=1):
174
+ # Export Options
175
+ gr.Markdown("## Export Options")
176
+ export_format = gr.Radio(
177
+ choices=["Anki CSV", "JSON", "Plain Text"],
178
+ label="Export Format",
179
+ value="Anki CSV"
180
+ )
181
+ export_button = gr.Button("Export Valid Cards")
182
+ export_file = gr.File(label="Download Validated Cards")
183
+ export_status = gr.Markdown("")
184
+ with gr.Column(scale=1):
185
+ gr.Markdown("## Export Instructions")
186
+ gr.Markdown("""
187
+ ### Anki CSV Format:
188
+ - Front, Back, Tags, Type, Note
189
+ - Use for importing into Anki
190
+ - Images preserved as HTML
191
+
192
+ ### JSON Format:
193
+ - JSON array of cards
194
+ - Images as base64 or URLs
195
+ - Use for custom processing
196
+
197
+ ### Plain Text Format:
198
+ - Question and Answer pairs
199
+ - Images represented as [IMG] placeholder
200
+ - Use for manual review
201
+ """)
202
+
203
+ def update_preview(content):
204
+ """Update preview with sanitized content."""
205
+ if not content:
206
+ return ""
207
+ return sanitize_html(content)
208
+
209
+ # Event handlers
210
+ def validation_chain(content: str) -> Tuple[str, List[str]]:
211
+ """Combined validation and card choice update."""
212
+ validation_message = validate_for_ui(content)
213
+ card_choices = update_card_choices(content)
214
+ return validation_message, card_choices
215
+
216
+ def delete_card(card_selection, current_content):
217
+ """Delete selected card and return updated content."""
218
+ if not card_selection or not current_content:
219
+ return current_content, "No card selected", []
220
+
221
+ try:
222
+ data = json.loads(current_content)
223
+ selected_id = card_selection.split(" - ")[0]
224
+
225
+ data['cards'] = [card for card in data['cards'] if card['id'] != selected_id]
226
+ new_content = json.dumps(data, indent=2)
227
+
228
+ return (
229
+ new_content,
230
+ "Card deleted successfully!",
231
+ generate_card_choices(new_content)
232
+ )
233
+
234
+ except Exception as e:
235
+ return current_content, f"Error deleting card: {str(e)}", []
236
+
237
+ def process_validation_result(is_valid, message):
238
+ """Process validation result into a formatted markdown string."""
239
+ if is_valid:
240
+ return f"✅ {message}"
241
+ else:
242
+ return f"❌ {message}"
243
+
244
+ # Register event handlers
245
+ input_type.change(
246
+ fn=lambda t: (
247
+ gr.update(visible=t == "JSON"),
248
+ gr.update(visible=t == "APKG"),
249
+ gr.update(visible=t == "APKG")
250
+ ),
251
+ inputs=[input_type],
252
+ outputs=[json_input_group, apkg_input_group, deck_info]
253
+ )
254
+
255
+ # File upload handlers
256
+ import_json.upload(
257
+ fn=handle_file_upload,
258
+ inputs=[import_json, input_type],
259
+ outputs=[
260
+ flashcard_input,
261
+ deck_info,
262
+ validation_status,
263
+ card_selector
264
+ ]
265
+ )
266
+
267
+ import_apkg.upload(
268
+ fn=enhanced_file_upload,
269
+ inputs=[import_apkg, input_type],
270
+ outputs=[
271
+ flashcard_input,
272
+ deck_info,
273
+ validation_status,
274
+ card_selector
275
+ ]
276
+ )
277
+
278
+ # Validation handler
279
+ validate_button.click(
280
+ fn=lambda content, input_format: (
281
+ handle_validation(content, input_format),
282
+ generate_card_choices(content) if content else []
283
+ ),
284
+ inputs=[flashcard_input, input_type],
285
+ outputs=[validation_status, card_selector]
286
+ )
287
+
288
+ # Card editing handlers
289
+ # Card selector change event
290
+ card_selector.change(
291
+ fn=load_card_for_editing,
292
+ inputs=[card_selector, flashcard_input],
293
+ outputs=[
294
+ card_type,
295
+ front_content,
296
+ back_content,
297
+ tags_input,
298
+ notes_input,
299
+ front_preview,
300
+ back_preview
301
+ ]
302
+ )
303
+
304
+ # Live preview updates
305
+ front_content.change(
306
+ fn=update_preview,
307
+ inputs=[front_content],
308
+ outputs=[front_preview]
309
+ )
310
+
311
+ back_content.change(
312
+ fn=update_preview,
313
+ inputs=[back_content],
314
+ outputs=[back_preview]
315
+ )
316
+
317
+ # Card update handler
318
+ update_card_button.click(
319
+ fn=update_card_with_validation,
320
+ inputs=[
321
+ flashcard_input,
322
+ card_selector,
323
+ card_type,
324
+ front_content,
325
+ back_content,
326
+ tags_input,
327
+ notes_input
328
+ ],
329
+ outputs=[
330
+ flashcard_input,
331
+ validation_status,
332
+ card_selector
333
+ ]
334
+ )
335
+
336
+ # Delete card handler
337
+ delete_card_button.click(
338
+ fn=delete_card,
339
+ inputs=[card_selector, flashcard_input],
340
+ outputs=[flashcard_input, validation_status, card_selector]
341
+ )
342
+
343
+ # Export handler
344
+ export_button.click(
345
+ fn=export_cards,
346
+ inputs=[flashcard_input, export_format],
347
+ outputs=[export_status, export_file]
348
+ )
349
+
350
+ return (
351
+ flashcard_input,
352
+ import_json,
353
+ import_apkg,
354
+ validate_button,
355
+ validation_status,
356
+ card_selector,
357
+ card_type,
358
+ front_content,
359
+ back_content,
360
+ front_preview,
361
+ back_preview,
362
+ tags_input,
363
+ notes_input,
364
+ update_card_button,
365
+ delete_card_button,
366
+ export_format,
367
+ export_button,
368
+ export_file,
369
+ export_status,
370
+ deck_info
371
+ )
372
+
373
+
374
+ def create_anki_generator_tab():
375
+ with gr.TabItem("Anki Deck Generator", visible=True):
376
+ try:
377
+ default_value = None
378
+ if default_api_endpoint:
379
+ if default_api_endpoint in global_api_endpoints:
380
+ default_value = format_api_name(default_api_endpoint)
381
+ else:
382
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
383
+ except Exception as e:
384
+ logging.error(f"Error setting default API endpoint: {str(e)}")
385
+ default_value = None
386
+ custom_css = """
387
+ .chatbot-container .message-wrap .message {
388
+ font-size: 14px !important;
389
+ }
390
+ """
391
+ with gr.TabItem("LLM Chat & Anki Deck Creation", visible=True):
392
+ gr.Markdown("# Chat with an LLM to help you come up with Questions/Answers for an Anki Deck")
393
+ chat_history = gr.State([])
394
+ media_content = gr.State({})
395
+ selected_parts = gr.State([])
396
+ conversation_id = gr.State(None)
397
+ initial_prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
398
+
399
+ with gr.Row():
400
+ with gr.Column(scale=1):
401
+ search_query_input = gr.Textbox(
402
+ label="Search Query",
403
+ placeholder="Enter your search query here..."
404
+ )
405
+ search_type_input = gr.Radio(
406
+ choices=["Title", "Content", "Author", "Keyword"],
407
+ value="Keyword",
408
+ label="Search By"
409
+ )
410
+ keyword_filter_input = gr.Textbox(
411
+ label="Filter by Keywords (comma-separated)",
412
+ placeholder="ml, ai, python, etc..."
413
+ )
414
+ search_button = gr.Button("Search")
415
+ items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
416
+ item_mapping = gr.State({})
417
+ with gr.Row():
418
+ use_content = gr.Checkbox(label="Use Content")
419
+ use_summary = gr.Checkbox(label="Use Summary")
420
+ use_prompt = gr.Checkbox(label="Use Prompt")
421
+ save_conversation = gr.Checkbox(label="Save Conversation", value=False, visible=True)
422
+ with gr.Row():
423
+ temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
424
+ with gr.Row():
425
+ conversation_search = gr.Textbox(label="Search Conversations")
426
+ with gr.Row():
427
+ search_conversations_btn = gr.Button("Search Conversations")
428
+ with gr.Row():
429
+ previous_conversations = gr.Dropdown(label="Select Conversation", choices=[], interactive=True)
430
+ with gr.Row():
431
+ load_conversations_btn = gr.Button("Load Selected Conversation")
432
+
433
+ # Refactored API selection dropdown
434
+ api_endpoint = gr.Dropdown(
435
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
436
+ value=default_value,
437
+ label="API for Chat Interaction (Optional)"
438
+ )
439
+ api_key = gr.Textbox(label="API Key (if required)", type="password")
440
+ custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
441
+ value=False,
442
+ visible=True)
443
+ preset_prompt_checkbox = gr.Checkbox(label="Use a Pre-set Prompt",
444
+ value=False,
445
+ visible=True)
446
+ with gr.Row(visible=False) as preset_prompt_controls:
447
+ prev_prompt_page = gr.Button("Previous")
448
+ next_prompt_page = gr.Button("Next")
449
+ current_prompt_page_text = gr.Text(f"Page {current_page} of {total_pages}")
450
+ current_prompt_page_state = gr.State(value=1)
451
+
452
+ preset_prompt = gr.Dropdown(
453
+ label="Select Preset Prompt",
454
+ choices=initial_prompts
455
+ )
456
+ user_prompt = gr.Textbox(label="Custom Prompt",
457
+ placeholder="Enter custom prompt here",
458
+ lines=3,
459
+ visible=False)
460
+ system_prompt_input = gr.Textbox(label="System Prompt",
461
+ value="You are a helpful AI assitant",
462
+ lines=3,
463
+ visible=False)
464
+ with gr.Column(scale=2):
465
+ chatbot = gr.Chatbot(height=800, elem_classes="chatbot-container")
466
+ msg = gr.Textbox(label="Enter your message")
467
+ submit = gr.Button("Submit")
468
+ regenerate_button = gr.Button("Regenerate Last Message")
469
+ token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False)
470
+ clear_chat_button = gr.Button("Clear Chat")
471
+
472
+ chat_media_name = gr.Textbox(label="Custom Chat Name(optional)")
473
+ save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
474
+ save_status = gr.Textbox(label="Save Status", interactive=False)
475
+ save_chat_history_as_file = gr.Button("Save Chat History as File")
476
+ download_file = gr.File(label="Download Chat History")
477
+
478
+ search_button.click(
479
+ fn=update_dropdown_multiple,
480
+ inputs=[search_query_input, search_type_input, keyword_filter_input],
481
+ outputs=[items_output, item_mapping]
482
+ )
483
+
484
+ def update_prompt_visibility(custom_prompt_checked, preset_prompt_checked):
485
+ user_prompt_visible = custom_prompt_checked
486
+ system_prompt_visible = custom_prompt_checked
487
+ preset_prompt_visible = preset_prompt_checked
488
+ preset_prompt_controls_visible = preset_prompt_checked
489
+ return (
490
+ gr.update(visible=user_prompt_visible, interactive=user_prompt_visible),
491
+ gr.update(visible=system_prompt_visible, interactive=system_prompt_visible),
492
+ gr.update(visible=preset_prompt_visible, interactive=preset_prompt_visible),
493
+ gr.update(visible=preset_prompt_controls_visible)
494
+ )
495
+
496
+ def update_prompt_page(direction, current_page_val):
497
+ new_page = current_page_val + direction
498
+ if new_page < 1:
499
+ new_page = 1
500
+ prompts, total_pages, _ = list_prompts(page=new_page, per_page=20)
501
+ if new_page > total_pages:
502
+ new_page = total_pages
503
+ prompts, total_pages, _ = list_prompts(page=new_page, per_page=20)
504
+ return (
505
+ gr.update(choices=prompts),
506
+ gr.update(value=f"Page {new_page} of {total_pages}"),
507
+ new_page
508
+ )
509
+
510
+ def clear_chat():
511
+ return [], None # Return empty list for chatbot and None for conversation_id
512
+
513
+ custom_prompt_checkbox.change(
514
+ update_prompt_visibility,
515
+ inputs=[custom_prompt_checkbox, preset_prompt_checkbox],
516
+ outputs=[user_prompt, system_prompt_input, preset_prompt, preset_prompt_controls]
517
+ )
518
+
519
+ preset_prompt_checkbox.change(
520
+ update_prompt_visibility,
521
+ inputs=[custom_prompt_checkbox, preset_prompt_checkbox],
522
+ outputs=[user_prompt, system_prompt_input, preset_prompt, preset_prompt_controls]
523
+ )
524
+
525
+ prev_prompt_page.click(
526
+ lambda x: update_prompt_page(-1, x),
527
+ inputs=[current_prompt_page_state],
528
+ outputs=[preset_prompt, current_prompt_page_text, current_prompt_page_state]
529
+ )
530
+
531
+ next_prompt_page.click(
532
+ lambda x: update_prompt_page(1, x),
533
+ inputs=[current_prompt_page_state],
534
+ outputs=[preset_prompt, current_prompt_page_text, current_prompt_page_state]
535
+ )
536
+
537
+ submit.click(
538
+ chat_wrapper,
539
+ inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt,
540
+ conversation_id,
541
+ save_conversation, temperature, system_prompt_input],
542
+ outputs=[msg, chatbot, conversation_id]
543
+ ).then( # Clear the message box after submission
544
+ lambda x: gr.update(value=""),
545
+ inputs=[chatbot],
546
+ outputs=[msg]
547
+ ).then( # Clear the user prompt after the first message
548
+ lambda: (gr.update(value=""), gr.update(value="")),
549
+ outputs=[user_prompt, system_prompt_input]
550
+ ).then(
551
+ lambda history: approximate_token_count(history),
552
+ inputs=[chatbot],
553
+ outputs=[token_count_display]
554
+ )
555
+
556
+
557
+ clear_chat_button.click(
558
+ clear_chat,
559
+ outputs=[chatbot, conversation_id]
560
+ )
561
+
562
+ items_output.change(
563
+ update_chat_content,
564
+ inputs=[items_output, use_content, use_summary, use_prompt, item_mapping],
565
+ outputs=[media_content, selected_parts]
566
+ )
567
+
568
+ use_content.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
569
+ outputs=[selected_parts])
570
+ use_summary.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
571
+ outputs=[selected_parts])
572
+ use_prompt.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
573
+ outputs=[selected_parts])
574
+ items_output.change(debug_output, inputs=[media_content, selected_parts], outputs=[])
575
+
576
+ search_conversations_btn.click(
577
+ search_conversations,
578
+ inputs=[conversation_search],
579
+ outputs=[previous_conversations]
580
+ )
581
+
582
+ load_conversations_btn.click(
583
+ clear_chat,
584
+ outputs=[chatbot, chat_history]
585
+ ).then(
586
+ load_conversation,
587
+ inputs=[previous_conversations],
588
+ outputs=[chatbot, conversation_id]
589
+ )
590
+
591
+ previous_conversations.change(
592
+ load_conversation,
593
+ inputs=[previous_conversations],
594
+ outputs=[chat_history]
595
+ )
596
+
597
+ save_chat_history_as_file.click(
598
+ save_chat_history,
599
+ inputs=[chatbot, conversation_id],
600
+ outputs=[download_file]
601
+ )
602
+
603
+ save_chat_history_to_db.click(
604
+ save_chat_history_to_db_wrapper,
605
+ inputs=[chatbot, conversation_id, media_content, chat_media_name],
606
+ outputs=[conversation_id, gr.Textbox(label="Save Status")]
607
+ )
608
+
609
+ regenerate_button.click(
610
+ regenerate_last_message,
611
+ inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temperature,
612
+ system_prompt_input],
613
+ outputs=[chatbot, save_status]
614
+ ).then(
615
+ lambda history: approximate_token_count(history),
616
+ inputs=[chatbot],
617
+ outputs=[token_count_display]
618
+ )
619
+ gr.Markdown("# Create Anki Deck")
620
+
621
+ with gr.Row():
622
+ # Left Column: Deck Settings
623
+ with gr.Column(scale=1):
624
+ gr.Markdown("## Deck Settings")
625
+ deck_name = gr.Textbox(
626
+ label="Deck Name",
627
+ placeholder="My Study Deck",
628
+ value="My Study Deck"
629
+ )
630
+
631
+ deck_description = gr.Textbox(
632
+ label="Deck Description",
633
+ placeholder="Description of your deck",
634
+ lines=2
635
+ )
636
+
637
+ note_type = gr.Radio(
638
+ choices=["Basic", "Basic (and reversed)", "Cloze"],
639
+ label="Note Type",
640
+ value="Basic"
641
+ )
642
+
643
+ # Card Fields based on note type
644
+ with gr.Group() as basic_fields:
645
+ front_template = gr.Textbox(
646
+ label="Front Template (HTML)",
647
+ value="{{Front}}",
648
+ lines=3
649
+ )
650
+ back_template = gr.Textbox(
651
+ label="Back Template (HTML)",
652
+ value="{{FrontSide}}<hr id='answer'>{{Back}}",
653
+ lines=3
654
+ )
655
+
656
+ with gr.Group() as cloze_fields:
657
+ cloze_template = gr.Textbox(
658
+ label="Cloze Template (HTML)",
659
+ value="{{cloze:Text}}",
660
+ lines=3,
661
+ visible=False
662
+ )
663
+
664
+ css_styling = gr.Textbox(
665
+ label="Card Styling (CSS)",
666
+ value=".card {\n font-family: arial;\n font-size: 20px;\n text-align: center;\n color: black;\n background-color: white;\n}\n\n.cloze {\n font-weight: bold;\n color: blue;\n}",
667
+ lines=5
668
+ )
669
+
670
+ # Right Column: Card Creation
671
+ with gr.Column(scale=1):
672
+ gr.Markdown("## Add Cards")
673
+
674
+ with gr.Group() as basic_input:
675
+ front_content = gr.TextArea(
676
+ label="Front Content",
677
+ placeholder="Question or prompt",
678
+ lines=3
679
+ )
680
+ back_content = gr.TextArea(
681
+ label="Back Content",
682
+ placeholder="Answer",
683
+ lines=3
684
+ )
685
+
686
+ with gr.Group() as cloze_input:
687
+ cloze_content = gr.TextArea(
688
+ label="Cloze Content",
689
+ placeholder="Text with {{c1::cloze}} deletions",
690
+ lines=3,
691
+ visible=False
692
+ )
693
+
694
+ tags_input = gr.TextArea(
695
+ label="Tags (comma-separated)",
696
+ placeholder="tag1, tag2, tag3",
697
+ lines=1
698
+ )
699
+
700
+ add_card_btn = gr.Button("Add Card")
701
+
702
+ cards_list = gr.JSON(
703
+ label="Cards in Deck",
704
+ value={"cards": []}
705
+ )
706
+
707
+ clear_cards_btn = gr.Button("Clear All Cards", variant="stop")
708
+
709
+ with gr.Row():
710
+ generate_deck_btn = gr.Button("Generate Deck", variant="primary")
711
+ download_deck = gr.File(label="Download Deck")
712
+ generation_status = gr.Markdown("")
713
+
714
+ def update_note_type_fields(note_type: str):
715
+ if note_type == "Cloze":
716
+ return {
717
+ basic_input: gr.update(visible=False),
718
+ cloze_input: gr.update(visible=True),
719
+ basic_fields: gr.update(visible=False),
720
+ cloze_fields: gr.update(visible=True)
721
+ }
722
+ else:
723
+ return {
724
+ basic_input: gr.update(visible=True),
725
+ cloze_input: gr.update(visible=False),
726
+ basic_fields: gr.update(visible=True),
727
+ cloze_fields: gr.update(visible=False)
728
+ }
729
+
730
+ def add_card(note_type: str, front: str, back: str, cloze: str, tags: str, current_cards: Dict[str, List]):
731
+ if not current_cards:
732
+ current_cards = {"cards": []}
733
+
734
+ cards_data = current_cards["cards"]
735
+
736
+ # Process tags
737
+ card_tags = [tag.strip() for tag in tags.split(',') if tag.strip()]
738
+
739
+ new_card = {
740
+ "id": f"CARD_{len(cards_data) + 1}",
741
+ "tags": card_tags
742
+ }
743
+
744
+ if note_type == "Cloze":
745
+ if not cloze or "{{c" not in cloze:
746
+ return current_cards, "❌ Invalid cloze format. Use {{c1::text}} syntax."
747
+ new_card.update({
748
+ "type": "cloze",
749
+ "content": cloze
750
+ })
751
+ else:
752
+ if not front or not back:
753
+ return current_cards, "❌ Both front and back content are required."
754
+ new_card.update({
755
+ "type": "basic",
756
+ "front": front,
757
+ "back": back,
758
+ "is_reverse": note_type == "Basic (and reversed)"
759
+ })
760
+
761
+ cards_data.append(new_card)
762
+ return {"cards": cards_data}, "✅ Card added successfully!"
763
+
764
+ def clear_cards() -> Tuple[Dict[str, List], str]:
765
+ return {"cards": []}, "✅ All cards cleared!"
766
+
767
+ def generate_anki_deck(
768
+ deck_name: str,
769
+ deck_description: str,
770
+ note_type: str,
771
+ front_template: str,
772
+ back_template: str,
773
+ cloze_template: str,
774
+ css: str,
775
+ cards_data: Dict[str, List]
776
+ ) -> Tuple[Optional[str], str]:
777
+ try:
778
+ if not cards_data or not cards_data.get("cards"):
779
+ return None, "❌ No cards to generate deck from!"
780
+
781
+ # Create model based on note type
782
+ if note_type == "Cloze":
783
+ model = genanki.Model(
784
+ 1483883320, # Random model ID
785
+ 'Cloze Model',
786
+ fields=[
787
+ {'name': 'Text'},
788
+ {'name': 'Back Extra'}
789
+ ],
790
+ templates=[{
791
+ 'name': 'Cloze Card',
792
+ 'qfmt': cloze_template,
793
+ 'afmt': cloze_template + '<br><hr id="extra">{{Back Extra}}'
794
+ }],
795
+ css=css,
796
+ # FIXME CLOZE DOESNT EXIST
797
+ model_type=1
798
+ )
799
+ else:
800
+ templates = [{
801
+ 'name': 'Card 1',
802
+ 'qfmt': front_template,
803
+ 'afmt': back_template
804
+ }]
805
+
806
+ if note_type == "Basic (and reversed)":
807
+ templates.append({
808
+ 'name': 'Card 2',
809
+ 'qfmt': '{{Back}}',
810
+ 'afmt': '{{FrontSide}}<hr id="answer">{{Front}}'
811
+ })
812
+
813
+ model = genanki.Model(
814
+ 1607392319, # Random model ID
815
+ 'Basic Model',
816
+ fields=[
817
+ {'name': 'Front'},
818
+ {'name': 'Back'}
819
+ ],
820
+ templates=templates,
821
+ css=css
822
+ )
823
+
824
+ # Create deck
825
+ deck = genanki.Deck(
826
+ 2059400110, # Random deck ID
827
+ deck_name,
828
+ description=deck_description
829
+ )
830
+
831
+ # Add cards to deck
832
+ for card in cards_data["cards"]:
833
+ if card["type"] == "cloze":
834
+ note = genanki.Note(
835
+ model=model,
836
+ fields=[card["content"], ""],
837
+ tags=card["tags"]
838
+ )
839
+ else:
840
+ note = genanki.Note(
841
+ model=model,
842
+ fields=[card["front"], card["back"]],
843
+ tags=card["tags"]
844
+ )
845
+ deck.add_note(note)
846
+
847
+ # Save deck to temporary file
848
+ temp_dir = tempfile.mkdtemp()
849
+ deck_path = os.path.join(temp_dir, f"{deck_name}.apkg")
850
+ genanki.Package(deck).write_to_file(deck_path)
851
+
852
+ return deck_path, "✅ Deck generated successfully!"
853
+
854
+ except Exception as e:
855
+ return None, f"❌ Error generating deck: {str(e)}"
856
+
857
+ # Register event handlers
858
+ note_type.change(
859
+ fn=update_note_type_fields,
860
+ inputs=[note_type],
861
+ outputs=[basic_input, cloze_input, basic_fields, cloze_fields]
862
+ )
863
+
864
+ add_card_btn.click(
865
+ fn=add_card,
866
+ inputs=[
867
+ note_type,
868
+ front_content,
869
+ back_content,
870
+ cloze_content,
871
+ tags_input,
872
+ cards_list
873
+ ],
874
+ outputs=[cards_list, generation_status]
875
+ )
876
+
877
+ clear_cards_btn.click(
878
+ fn=clear_cards,
879
+ inputs=[],
880
+ outputs=[cards_list, generation_status]
881
+ )
882
+
883
+ generate_deck_btn.click(
884
+ fn=generate_anki_deck,
885
+ inputs=[
886
+ deck_name,
887
+ deck_description,
888
+ note_type,
889
+ front_template,
890
+ back_template,
891
+ cloze_template,
892
+ css_styling,
893
+ cards_list
894
+ ],
895
+ outputs=[download_deck, generation_status]
896
+ )
897
+
898
+
899
+ return (
900
+ deck_name,
901
+ deck_description,
902
+ note_type,
903
+ front_template,
904
+ back_template,
905
+ cloze_template,
906
+ css_styling,
907
+ front_content,
908
+ back_content,
909
+ cloze_content,
910
+ tags_input,
911
+ cards_list,
912
+ add_card_btn,
913
+ clear_cards_btn,
914
+ generate_deck_btn,
915
+ download_deck,
916
+ generation_status
917
+ )
918
+
919
+ #
920
+ # End of Anki_Validation_tab.py
921
+ ############################################################################################################
App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py CHANGED
@@ -2,16 +2,18 @@
2
  # Description: Gradio UI for ingesting audio files into the database
3
  #
4
  # Imports
 
5
  #
6
  # External Imports
7
  import gradio as gr
8
  #
9
  # Local Imports
10
  from App_Function_Libraries.Audio.Audio_Files import process_audio_files
11
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
12
  from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
13
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models
14
- from App_Function_Libraries.Utils.Utils import cleanup_temp_files
 
15
  # Import metrics logging
16
  from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
17
  from App_Function_Libraries.Metrics.logger_config import logger
@@ -22,6 +24,18 @@ from App_Function_Libraries.Metrics.logger_config import logger
22
  def create_audio_processing_tab():
23
  with gr.TabItem("Audio File Transcription + Summarization", visible=True):
24
  gr.Markdown("# Transcribe & Summarize Audio Files from URLs or Local Files!")
 
 
 
 
 
 
 
 
 
 
 
 
25
  with gr.Row():
26
  with gr.Column():
27
  audio_url_input = gr.Textbox(label="Audio File URL(s)", placeholder="Enter the URL(s) of the audio file(s), one per line")
@@ -46,54 +60,133 @@ def create_audio_processing_tab():
46
  keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
47
 
48
  with gr.Row():
49
- custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
50
- value=False,
51
- visible=True)
52
- preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
53
- value=False,
54
- visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  with gr.Row():
56
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
57
- choices=load_preset_prompts(),
58
- visible=False)
 
59
  with gr.Row():
60
- custom_prompt_input = gr.Textbox(label="Custom Prompt",
61
- placeholder="Enter custom prompt here",
62
- lines=3,
63
- visible=False)
 
 
64
  with gr.Row():
65
- system_prompt_input = gr.Textbox(label="System Prompt",
66
- value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
67
- **Bulleted Note Creation Guidelines**
68
-
69
- **Headings**:
70
- - Based on referenced topics, not categories like quotes or terms
71
- - Surrounded by **bold** formatting
72
- - Not listed as bullet points
73
- - No space between headings and list items underneath
74
-
75
- **Emphasis**:
76
- - **Important terms** set in bold font
77
- - **Text ending in a colon**: also bolded
78
-
79
- **Review**:
80
- - Ensure adherence to specified format
81
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
82
- """,
83
- lines=3,
84
- visible=False)
 
 
85
 
86
  custom_prompt_checkbox.change(
87
  fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
88
  inputs=[custom_prompt_checkbox],
89
  outputs=[custom_prompt_input, system_prompt_input]
90
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  preset_prompt_checkbox.change(
92
- fn=lambda x: gr.update(visible=x),
93
  inputs=[preset_prompt_checkbox],
94
- outputs=[preset_prompt]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  )
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def update_prompts(preset_name):
98
  prompts = update_user_prompt(preset_name)
99
  return (
@@ -103,15 +196,14 @@ def create_audio_processing_tab():
103
 
104
  preset_prompt.change(
105
  update_prompts,
106
- inputs=preset_prompt,
107
  outputs=[custom_prompt_input, system_prompt_input]
108
  )
109
-
110
  api_name_input = gr.Dropdown(
111
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
112
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace", "Custom-OpenAI-API"],
113
- value=None,
114
- label="API for Summarization (Optional)"
115
  )
116
  api_key_input = gr.Textbox(label="API Key (if required)", placeholder="Enter your API key here", type="password")
117
  custom_keywords_input = gr.Textbox(label="Custom Keywords", placeholder="Enter custom keywords, comma-separated")
 
2
  # Description: Gradio UI for ingesting audio files into the database
3
  #
4
  # Imports
5
+ import logging
6
  #
7
  # External Imports
8
  import gradio as gr
9
  #
10
  # Local Imports
11
  from App_Function_Libraries.Audio.Audio_Files import process_audio_files
12
+ from App_Function_Libraries.DB.DB_Manager import list_prompts
13
  from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
14
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models
15
+ from App_Function_Libraries.Utils.Utils import cleanup_temp_files, default_api_endpoint, global_api_endpoints, \
16
+ format_api_name
17
  # Import metrics logging
18
  from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
19
  from App_Function_Libraries.Metrics.logger_config import logger
 
24
  def create_audio_processing_tab():
25
  with gr.TabItem("Audio File Transcription + Summarization", visible=True):
26
  gr.Markdown("# Transcribe & Summarize Audio Files from URLs or Local Files!")
27
+ # Get and validate default value
28
+ try:
29
+ default_value = None
30
+ if default_api_endpoint:
31
+ if default_api_endpoint in global_api_endpoints:
32
+ default_value = format_api_name(default_api_endpoint)
33
+ else:
34
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
35
+ except Exception as e:
36
+ logging.error(f"Error setting default API endpoint: {str(e)}")
37
+ default_value = None
38
+
39
  with gr.Row():
40
  with gr.Column():
41
  audio_url_input = gr.Textbox(label="Audio File URL(s)", placeholder="Enter the URL(s) of the audio file(s), one per line")
 
60
  keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
61
 
62
  with gr.Row():
63
+ custom_prompt_checkbox = gr.Checkbox(
64
+ label="Use a Custom Prompt",
65
+ value=False,
66
+ visible=True
67
+ )
68
+ preset_prompt_checkbox = gr.Checkbox(
69
+ label="Use a pre-set Prompt",
70
+ value=False,
71
+ visible=True
72
+ )
73
+
74
+ # Initialize state variables for pagination
75
+ current_page_state = gr.State(value=1)
76
+ total_pages_state = gr.State(value=1)
77
+
78
+ with gr.Row():
79
+ # Add pagination controls
80
+ preset_prompt = gr.Dropdown(
81
+ label="Select Preset Prompt",
82
+ choices=[],
83
+ visible=False
84
+ )
85
  with gr.Row():
86
+ prev_page_button = gr.Button("Previous Page", visible=False)
87
+ page_display = gr.Markdown("Page 1 of X", visible=False)
88
+ next_page_button = gr.Button("Next Page", visible=False)
89
+
90
  with gr.Row():
91
+ custom_prompt_input = gr.Textbox(
92
+ label="Custom Prompt",
93
+ placeholder="Enter custom prompt here",
94
+ lines=3,
95
+ visible=False
96
+ )
97
  with gr.Row():
98
+ system_prompt_input = gr.Textbox(
99
+ label="System Prompt",
100
+ value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhere to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
101
+ **Bulleted Note Creation Guidelines**
102
+
103
+ **Headings**:
104
+ - Based on referenced topics, not categories like quotes or terms
105
+ - Surrounded by **bold** formatting
106
+ - Not listed as bullet points
107
+ - No space between headings and list items underneath
108
+
109
+ **Emphasis**:
110
+ - **Important terms** set in bold font
111
+ - **Text ending in a colon**: also bolded
112
+
113
+ **Review**:
114
+ - Ensure adherence to specified format
115
+ - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
116
+ """,
117
+ lines=3,
118
+ visible=False
119
+ )
120
 
121
  custom_prompt_checkbox.change(
122
  fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
123
  inputs=[custom_prompt_checkbox],
124
  outputs=[custom_prompt_input, system_prompt_input]
125
  )
126
+
127
+ # Handle preset prompt checkbox change
128
+ def on_preset_prompt_checkbox_change(is_checked):
129
+ if is_checked:
130
+ prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
131
+ page_display_text = f"Page {current_page} of {total_pages}"
132
+ return (
133
+ gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
134
+ gr.update(visible=True), # prev_page_button
135
+ gr.update(visible=True), # next_page_button
136
+ gr.update(value=page_display_text, visible=True), # page_display
137
+ current_page, # current_page_state
138
+ total_pages # total_pages_state
139
+ )
140
+ else:
141
+ return (
142
+ gr.update(visible=False, interactive=False), # preset_prompt
143
+ gr.update(visible=False), # prev_page_button
144
+ gr.update(visible=False), # next_page_button
145
+ gr.update(visible=False), # page_display
146
+ 1, # current_page_state
147
+ 1 # total_pages_state
148
+ )
149
+
150
  preset_prompt_checkbox.change(
151
+ fn=on_preset_prompt_checkbox_change,
152
  inputs=[preset_prompt_checkbox],
153
+ outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
154
+ )
155
+
156
+ # Pagination button functions
157
+ def on_prev_page_click(current_page, total_pages):
158
+ new_page = max(current_page - 1, 1)
159
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
160
+ page_display_text = f"Page {current_page} of {total_pages}"
161
+ return (
162
+ gr.update(choices=prompts),
163
+ gr.update(value=page_display_text),
164
+ current_page
165
+ )
166
+
167
+ prev_page_button.click(
168
+ fn=on_prev_page_click,
169
+ inputs=[current_page_state, total_pages_state],
170
+ outputs=[preset_prompt, page_display, current_page_state]
171
  )
172
 
173
+ def on_next_page_click(current_page, total_pages):
174
+ new_page = min(current_page + 1, total_pages)
175
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
176
+ page_display_text = f"Page {current_page} of {total_pages}"
177
+ return (
178
+ gr.update(choices=prompts),
179
+ gr.update(value=page_display_text),
180
+ current_page
181
+ )
182
+
183
+ next_page_button.click(
184
+ fn=on_next_page_click,
185
+ inputs=[current_page_state, total_pages_state],
186
+ outputs=[preset_prompt, page_display, current_page_state]
187
+ )
188
+
189
+ # Update prompts when a preset is selected
190
  def update_prompts(preset_name):
191
  prompts = update_user_prompt(preset_name)
192
  return (
 
196
 
197
  preset_prompt.change(
198
  update_prompts,
199
+ inputs=[preset_prompt],
200
  outputs=[custom_prompt_input, system_prompt_input]
201
  )
202
+ # Refactored API selection dropdown
203
  api_name_input = gr.Dropdown(
204
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
205
+ value=default_value,
206
+ label="API for Summarization/Analysis (Optional)"
 
207
  )
208
  api_key_input = gr.Textbox(label="API Key (if required)", placeholder="Enter your API key here", type="password")
209
  custom_keywords_input = gr.Textbox(label="Custom Keywords", placeholder="Enter custom keywords, comma-separated")
App_Function_Libraries/Gradio_UI/Backup_Functionality.py CHANGED
@@ -14,7 +14,7 @@ from App_Function_Libraries.DB.DB_Manager import create_automated_backup, db_pat
14
  #
15
  # Functions:
16
 
17
- def create_backup():
18
  backup_file = create_automated_backup(db_path, backup_dir)
19
  return f"Backup created: {backup_file}"
20
 
@@ -42,18 +42,7 @@ def create_backup_tab():
42
  create_button = gr.Button("Create Backup")
43
  create_output = gr.Textbox(label="Result")
44
  with gr.Column():
45
- create_button.click(create_backup, inputs=[], outputs=create_output)
46
-
47
-
48
- def create_view_backups_tab():
49
- with gr.TabItem("View Backups", visible=True):
50
- gr.Markdown("# Browse available backups")
51
- with gr.Row():
52
- with gr.Column():
53
- view_button = gr.Button("View Backups")
54
- with gr.Column():
55
- backup_list = gr.Textbox(label="Available Backups")
56
- view_button.click(list_backups, inputs=[], outputs=backup_list)
57
 
58
 
59
  def create_restore_backup_tab():
 
14
  #
15
  # Functions:
16
 
17
+ def create_db_backup():
18
  backup_file = create_automated_backup(db_path, backup_dir)
19
  return f"Backup created: {backup_file}"
20
 
 
42
  create_button = gr.Button("Create Backup")
43
  create_output = gr.Textbox(label="Result")
44
  with gr.Column():
45
+ create_button.click(create_db_backup, inputs=[], outputs=create_output)
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  def create_restore_backup_tab():
App_Function_Libraries/Gradio_UI/Backup_RAG_Notes_Character_Chat_tab.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backup_Functionality.py
2
+ # Functionality for managing database backups
3
+ #
4
+ # Imports:
5
+ import os
6
+ import shutil
7
+ import gradio as gr
8
+ from typing import Dict, List
9
+ #
10
+ # Local Imports:
11
+ from App_Function_Libraries.DB.DB_Manager import create_automated_backup
12
+ from App_Function_Libraries.DB.DB_Backups import create_backup, create_incremental_backup, restore_single_db_backup
13
+
14
+
15
+ #
16
+ # End of Imports
17
+ #######################################################################################################################
18
+ #
19
+ # Functions:
20
+
21
+ def get_db_specific_backups(backup_dir: str, db_name: str) -> List[str]:
22
+ """Get list of backups specific to a database."""
23
+ all_backups = [f for f in os.listdir(backup_dir) if f.endswith(('.db', '.sqlib'))]
24
+ db_specific_backups = [
25
+ backup for backup in all_backups
26
+ if backup.startswith(f"{db_name}_")
27
+ ]
28
+ return sorted(db_specific_backups, reverse=True) # Most recent first
29
+
30
+ def create_backup_tab(db_path: str, backup_dir: str, db_name: str):
31
+ """Create the backup creation tab for a database."""
32
+ gr.Markdown("## Create Database Backup")
33
+ gr.Markdown(f"This will create a backup in the directory: `{backup_dir}`")
34
+ with gr.Row():
35
+ with gr.Column():
36
+ #automated_backup_btn = gr.Button("Create Simple Backup")
37
+ full_backup_btn = gr.Button("Create Full Backup")
38
+ incr_backup_btn = gr.Button("Create Incremental Backup")
39
+ with gr.Column():
40
+ backup_output = gr.Textbox(label="Result")
41
+
42
+ def create_db_backup():
43
+ backup_file = create_automated_backup(db_path, backup_dir)
44
+ return f"Backup created: {backup_file}"
45
+
46
+ # automated_backup_btn.click(
47
+ # fn=create_db_backup,
48
+ # inputs=[],
49
+ # outputs=[backup_output]
50
+ # )
51
+ full_backup_btn.click(
52
+ fn=lambda: create_backup(db_path, backup_dir, db_name),
53
+ inputs=[],
54
+ outputs=[backup_output]
55
+ )
56
+ incr_backup_btn.click(
57
+ fn=lambda: create_incremental_backup(db_path, backup_dir, db_name),
58
+ inputs=[],
59
+ outputs=[backup_output]
60
+ )
61
+
62
+ def create_view_backups_tab(backup_dir: str, db_name: str):
63
+ """Create the backup viewing tab for a database."""
64
+ gr.Markdown("## Available Backups")
65
+ with gr.Row():
66
+ with gr.Column():
67
+ view_btn = gr.Button("Refresh Backup List")
68
+ with gr.Column():
69
+ backup_list = gr.Textbox(label="Available Backups")
70
+
71
+ def list_db_backups():
72
+ """List backups specific to this database."""
73
+ backups = get_db_specific_backups(backup_dir, db_name)
74
+ return "\n".join(backups) if backups else f"No backups found for {db_name} database"
75
+
76
+ view_btn.click(
77
+ fn=list_db_backups,
78
+ inputs=[],
79
+ outputs=[backup_list]
80
+ )
81
+
82
+ def validate_backup_name(backup_name: str, db_name: str) -> bool:
83
+ """Validate that the backup name matches the database being restored."""
84
+ # Check if backup name starts with the database name prefix and has valid extension
85
+ valid_prefixes = [
86
+ f"{db_name}_backup_", # Full backup prefix
87
+ f"{db_name}_incremental_" # Incremental backup prefix
88
+ ]
89
+ has_valid_prefix = any(backup_name.startswith(prefix) for prefix in valid_prefixes)
90
+ has_valid_extension = backup_name.endswith(('.db', '.sqlib'))
91
+ return has_valid_prefix and has_valid_extension
92
+
93
+ def create_restore_backup_tab(db_path: str, backup_dir: str, db_name: str):
94
+ """Create the backup restoration tab for a database."""
95
+ gr.Markdown("## Restore Database")
96
+ gr.Markdown("⚠️ **Warning**: Restoring a backup will overwrite the current database.")
97
+ with gr.Row():
98
+ with gr.Column():
99
+ backup_input = gr.Textbox(label="Backup Filename")
100
+ restore_btn = gr.Button("Restore", variant="primary")
101
+ with gr.Column():
102
+ restore_output = gr.Textbox(label="Result")
103
+
104
+ def secure_restore(backup_name: str) -> str:
105
+ """Restore backup with validation checks."""
106
+ if not backup_name:
107
+ return "Please enter a backup filename"
108
+
109
+ # Validate backup name format
110
+ if not validate_backup_name(backup_name, db_name):
111
+ return f"Invalid backup file. Please select a backup file that starts with '{db_name}_backup_' or '{db_name}_incremental_'"
112
+
113
+ # Check if backup exists
114
+ backup_path = os.path.join(backup_dir, backup_name)
115
+ if not os.path.exists(backup_path):
116
+ return f"Backup file not found: {backup_name}"
117
+
118
+ # Proceed with restore
119
+ return restore_single_db_backup(db_path, backup_dir, db_name, backup_name)
120
+
121
+ restore_btn.click(
122
+ fn=secure_restore,
123
+ inputs=[backup_input],
124
+ outputs=[restore_output]
125
+ )
126
+
127
+ def create_media_db_tabs(db_config: Dict[str, str]):
128
+ """Create all tabs for the Media database."""
129
+ create_backup_tab(
130
+ db_path=db_config['db_path'],
131
+ backup_dir=db_config['backup_dir'],
132
+ db_name='media'
133
+ )
134
+ create_view_backups_tab(
135
+ backup_dir=db_config['backup_dir'],
136
+ db_name='media'
137
+ )
138
+ create_restore_backup_tab(
139
+ db_path=db_config['db_path'],
140
+ backup_dir=db_config['backup_dir'],
141
+ db_name='media'
142
+ )
143
+
144
+ def create_rag_chat_tabs(db_config: Dict[str, str]):
145
+ """Create all tabs for the RAG Chat database."""
146
+ create_backup_tab(
147
+ db_path=db_config['db_path'],
148
+ backup_dir=db_config['backup_dir'],
149
+ db_name='rag_qa' # Updated to match DB_Manager.py
150
+ )
151
+ create_view_backups_tab(
152
+ backup_dir=db_config['backup_dir'],
153
+ db_name='rag_qa' # Updated to match DB_Manager.py
154
+ )
155
+ create_restore_backup_tab(
156
+ db_path=db_config['db_path'],
157
+ backup_dir=db_config['backup_dir'],
158
+ db_name='rag_qa' # Updated to match DB_Manager.py
159
+ )
160
+
161
+ def create_character_chat_tabs(db_config: Dict[str, str]):
162
+ """Create all tabs for the Character Chat database."""
163
+ create_backup_tab(
164
+ db_path=db_config['db_path'],
165
+ backup_dir=db_config['backup_dir'],
166
+ db_name='chatDB' # Updated to match DB_Manager.py
167
+ )
168
+ create_view_backups_tab(
169
+ backup_dir=db_config['backup_dir'],
170
+ db_name='chatDB' # Updated to match DB_Manager.py
171
+ )
172
+ create_restore_backup_tab(
173
+ db_path=db_config['db_path'],
174
+ backup_dir=db_config['backup_dir'],
175
+ db_name='chatDB'
176
+ )
177
+
178
+ def create_database_management_interface(
179
+ media_db_config: Dict[str, str],
180
+ rag_db_config: Dict[str, str],
181
+ char_db_config: Dict[str, str]
182
+ ):
183
+ """Create the main database management interface with tabs for each database."""
184
+ with gr.TabItem("Media Database", id="media_db_group", visible=True):
185
+ create_media_db_tabs(media_db_config)
186
+
187
+ with gr.TabItem("RAG Chat Database", id="rag_chat_group", visible=True):
188
+ create_rag_chat_tabs(rag_db_config)
189
+
190
+ with gr.TabItem("Character Chat Database", id="character_chat_group", visible=True):
191
+ create_character_chat_tabs(char_db_config)
192
+
193
+ #
194
+ # End of Functions
195
+ #######################################################################################################################
App_Function_Libraries/Gradio_UI/Book_Ingestion_tab.py CHANGED
@@ -8,69 +8,113 @@
8
  #
9
  ####################
10
  # Imports
 
11
  #
12
  # External Imports
13
  import gradio as gr
14
  #
15
  # Local Imports
16
- from App_Function_Libraries.Books.Book_Ingestion_Lib import process_zip_file, import_epub, import_file_handler
 
17
  #
18
  ########################################################################################################################
19
  #
20
  # Functions:
21
 
22
-
23
-
24
  def create_import_book_tab():
 
 
 
 
 
 
 
 
 
 
 
25
  with gr.TabItem("Ebook(epub) Files", visible=True):
26
  with gr.Row():
27
  with gr.Column():
28
  gr.Markdown("# Import .epub files")
29
- gr.Markdown("Upload a single .epub file or a .zip file containing multiple .epub files")
30
  gr.Markdown(
31
  "🔗 **How to remove DRM from your ebooks:** [Reddit Guide](https://www.reddit.com/r/Calibre/comments/1ck4w8e/2024_guide_on_removing_drm_from_kobo_kindle_ebooks/)")
32
- import_file = gr.File(label="Upload file for import", file_types=[".epub", ".zip"])
33
- title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content (for single files)")
34
- author_input = gr.Textbox(label="Author", placeholder="Enter the author's name (for single files)")
35
- keywords_input = gr.Textbox(label="Keywords (like genre or publish year)",
36
- placeholder="Enter keywords, comma-separated")
37
- system_prompt_input = gr.Textbox(label="System Prompt", lines=3,
38
- value=""""
39
- <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
40
- **Bulleted Note Creation Guidelines**
41
-
42
- **Headings**:
43
- - Based on referenced topics, not categories like quotes or terms
44
- - Surrounded by **bold** formatting
45
- - Not listed as bullet points
46
- - No space between headings and list items underneath
47
 
48
- **Emphasis**:
49
- - **Important terms** set in bold font
50
- - **Text ending in a colon**: also bolded
 
 
 
51
 
52
- **Review**:
53
- - Ensure adherence to specified format
54
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
55
- """, )
56
- custom_prompt_input = gr.Textbox(label="Custom User Prompt",
57
- placeholder="Enter a custom user prompt for summarization (optional)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
 
 
59
  api_name_input = gr.Dropdown(
60
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
61
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
62
- label="API for Auto-summarization"
63
  )
64
  api_key_input = gr.Textbox(label="API Key", type="password")
65
 
66
  # Chunking options
67
- max_chunk_size = gr.Slider(minimum=100, maximum=2000, value=500, step=50, label="Max Chunk Size")
68
- chunk_overlap = gr.Slider(minimum=0, maximum=500, value=200, step=10, label="Chunk Overlap")
69
- custom_chapter_pattern = gr.Textbox(label="Custom Chapter Pattern (optional)",
70
- placeholder="Enter a custom regex pattern for chapter detection")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
 
72
 
73
- import_button = gr.Button("Import eBook(s)")
74
  with gr.Column():
75
  with gr.Row():
76
  import_output = gr.Textbox(label="Import Status", lines=10, interactive=False)
@@ -78,10 +122,10 @@ def create_import_book_tab():
78
  import_button.click(
79
  fn=import_file_handler,
80
  inputs=[
81
- import_file,
82
- title_input,
83
  author_input,
84
  keywords_input,
 
85
  custom_prompt_input,
86
  auto_summarize_checkbox,
87
  api_name_input,
@@ -93,8 +137,8 @@ def create_import_book_tab():
93
  outputs=import_output
94
  )
95
 
96
- return import_file, title_input, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
97
 
98
  #
99
  # End of File
100
- ########################################################################################################################
 
8
  #
9
  ####################
10
  # Imports
11
+ import logging
12
  #
13
  # External Imports
14
  import gradio as gr
15
  #
16
  # Local Imports
17
+ from App_Function_Libraries.Books.Book_Ingestion_Lib import import_file_handler
18
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
19
  #
20
  ########################################################################################################################
21
  #
22
  # Functions:
23
 
 
 
24
  def create_import_book_tab():
25
+ try:
26
+ default_value = None
27
+ if default_api_endpoint:
28
+ if default_api_endpoint in global_api_endpoints:
29
+ default_value = format_api_name(default_api_endpoint)
30
+ else:
31
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
32
+ except Exception as e:
33
+ logging.error(f"Error setting default API endpoint: {str(e)}")
34
+ default_value = None
35
+
36
  with gr.TabItem("Ebook(epub) Files", visible=True):
37
  with gr.Row():
38
  with gr.Column():
39
  gr.Markdown("# Import .epub files")
40
+ gr.Markdown("Upload multiple .epub files or a .zip file containing multiple .epub files")
41
  gr.Markdown(
42
  "🔗 **How to remove DRM from your ebooks:** [Reddit Guide](https://www.reddit.com/r/Calibre/comments/1ck4w8e/2024_guide_on_removing_drm_from_kobo_kindle_ebooks/)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ # Updated to support multiple files
45
+ import_files = gr.File(
46
+ label="Upload files for import",
47
+ file_count="multiple",
48
+ file_types=[".epub", ".zip", ".html", ".htm", ".xml", ".opml"]
49
+ )
50
 
51
+ # Optional fields for overriding auto-extracted metadata
52
+ author_input = gr.Textbox(
53
+ label="Author Override (optional)",
54
+ placeholder="Enter author name to override auto-extracted metadata"
55
+ )
56
+ keywords_input = gr.Textbox(
57
+ label="Keywords (like genre or publish year)",
58
+ placeholder="Enter keywords, comma-separated - will be applied to all uploaded books"
59
+ )
60
+ system_prompt_input = gr.Textbox(
61
+ label="System Prompt",
62
+ lines=3,
63
+ value=""""
64
+ <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
65
+ **Bulleted Note Creation Guidelines**
66
+
67
+ **Headings**:
68
+ - Based on referenced topics, not categories like quotes or terms
69
+ - Surrounded by **bold** formatting
70
+ - Not listed as bullet points
71
+ - No space between headings and list items underneath
72
+
73
+ **Emphasis**:
74
+ - **Important terms** set in bold font
75
+ - **Text ending in a colon**: also bolded
76
+
77
+ **Review**:
78
+ - Ensure adherence to specified format
79
+ - Do not reference these instructions in your response.</s>[INST]
80
+ """
81
+ )
82
+ custom_prompt_input = gr.Textbox(
83
+ label="Custom User Prompt",
84
+ placeholder="Enter a custom user prompt for summarization (optional)"
85
+ )
86
  auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
87
+
88
+ # API configuration
89
  api_name_input = gr.Dropdown(
90
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
91
+ value=default_value,
92
+ label="API for Summarization/Analysis (Optional)"
93
  )
94
  api_key_input = gr.Textbox(label="API Key", type="password")
95
 
96
  # Chunking options
97
+ max_chunk_size = gr.Slider(
98
+ minimum=100,
99
+ maximum=2000,
100
+ value=500,
101
+ step=50,
102
+ label="Max Chunk Size"
103
+ )
104
+ chunk_overlap = gr.Slider(
105
+ minimum=0,
106
+ maximum=500,
107
+ value=200,
108
+ step=10,
109
+ label="Chunk Overlap"
110
+ )
111
+ custom_chapter_pattern = gr.Textbox(
112
+ label="Custom Chapter Pattern (optional)",
113
+ placeholder="Enter a custom regex pattern for chapter detection"
114
+ )
115
 
116
+ import_button = gr.Button("Import eBooks")
117
 
 
118
  with gr.Column():
119
  with gr.Row():
120
  import_output = gr.Textbox(label="Import Status", lines=10, interactive=False)
 
122
  import_button.click(
123
  fn=import_file_handler,
124
  inputs=[
125
+ import_files, # Now handles multiple files
 
126
  author_input,
127
  keywords_input,
128
+ system_prompt_input,
129
  custom_prompt_input,
130
  auto_summarize_checkbox,
131
  api_name_input,
 
137
  outputs=import_output
138
  )
139
 
140
+ return import_files, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
141
 
142
  #
143
  # End of File
144
+ ########################################################################################################################
App_Function_Libraries/Gradio_UI/Character_Chat_tab.py CHANGED
@@ -2,10 +2,10 @@
2
  # Description: Library for character card import functions
3
  #
4
  # Imports
 
5
  import re
6
  import tempfile
7
  import uuid
8
- from datetime import datetime
9
  import json
10
  import logging
11
  import io
@@ -21,7 +21,7 @@ import gradio as gr
21
  from App_Function_Libraries.Character_Chat.Character_Chat_Lib import validate_character_book, validate_v2_card, \
22
  replace_placeholders, replace_user_placeholder, extract_json_from_image, parse_character_book, \
23
  load_chat_and_character, load_chat_history, load_character_and_image, extract_character_id, load_character_wrapper
24
- from App_Function_Libraries.Chat import chat
25
  from App_Function_Libraries.DB.Character_Chat_DB import (
26
  add_character_card,
27
  get_character_cards,
@@ -32,9 +32,12 @@ from App_Function_Libraries.DB.Character_Chat_DB import (
32
  update_character_chat,
33
  delete_character_chat,
34
  delete_character_card,
35
- update_character_card, search_character_chats,
36
  )
37
- from App_Function_Libraries.Utils.Utils import sanitize_user_input
 
 
 
38
  #
39
  ############################################################################################################
40
  #
@@ -252,8 +255,37 @@ def export_all_characters():
252
  # Gradio tabs
253
 
254
  def create_character_card_interaction_tab():
 
 
 
 
 
 
 
 
 
 
255
  with gr.TabItem("Chat with a Character Card", visible=True):
256
  gr.Markdown("# Chat with a Character Card")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  with gr.Row():
258
  with gr.Column(scale=1):
259
  character_image = gr.Image(label="Character Image", type="pil")
@@ -265,13 +297,10 @@ def create_character_card_interaction_tab():
265
  load_characters_button = gr.Button("Load Existing Characters")
266
  character_dropdown = gr.Dropdown(label="Select Character", choices=[])
267
  user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here")
 
268
  api_name_input = gr.Dropdown(
269
- choices=[
270
- "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
271
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
272
- "Custom-OpenAI-API"
273
- ],
274
- value="HuggingFace",
275
  label="API for Interaction (Mandatory)"
276
  )
277
  api_key_input = gr.Textbox(
@@ -281,24 +310,8 @@ def create_character_card_interaction_tab():
281
  temperature_slider = gr.Slider(
282
  minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature"
283
  )
284
- import_chat_button = gr.Button("Import Chat History")
285
  chat_file_upload = gr.File(label="Upload Chat History JSON", visible=True)
286
-
287
- # Chat History Import and Search
288
- gr.Markdown("## Search and Load Existing Chats")
289
- chat_search_query = gr.Textbox(
290
- label="Search Chats",
291
- placeholder="Enter chat name or keywords to search"
292
- )
293
- chat_search_button = gr.Button("Search Chats")
294
- chat_search_dropdown = gr.Dropdown(label="Search Results", choices=[], visible=False)
295
- load_chat_button = gr.Button("Load Selected Chat", visible=False)
296
-
297
- # Checkbox to Decide Whether to Save Chats by Default
298
- auto_save_checkbox = gr.Checkbox(label="Save chats automatically", value=True)
299
- chat_media_name = gr.Textbox(label="Custom Chat Name (optional)", visible=True)
300
- save_chat_history_to_db = gr.Button("Save Chat History to Database")
301
- save_status = gr.Textbox(label="Save Status", interactive=False)
302
 
303
  with gr.Column(scale=2):
304
  chat_history = gr.Chatbot(label="Conversation", height=800)
@@ -307,6 +320,7 @@ def create_character_card_interaction_tab():
307
  answer_for_me_button = gr.Button("Answer for Me")
308
  continue_talking_button = gr.Button("Continue Talking")
309
  regenerate_button = gr.Button("Regenerate Last Message")
 
310
  clear_chat_button = gr.Button("Clear Chat")
311
  save_snapshot_button = gr.Button("Save Chat Snapshot")
312
  update_chat_dropdown = gr.Dropdown(label="Select Chat to Update", choices=[], visible=False)
@@ -491,23 +505,114 @@ def create_character_card_interaction_tab():
491
 
492
  return history, save_status
493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  def save_chat_history_to_db_wrapper(
495
- chat_history, conversation_id, media_content,
496
- chat_media_name, char_data, auto_save
497
- ):
498
- if not char_data or not chat_history:
499
- return "No character or chat history available.", ""
 
 
 
 
500
 
501
- character_id = char_data.get('id')
502
- if not character_id:
503
- return "Character ID not found.", ""
 
 
 
 
504
 
505
- conversation_name = chat_media_name or f"Chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
506
- chat_id = add_character_chat(character_id, conversation_name, chat_history)
507
- if chat_id:
508
- return f"Chat saved successfully with ID {chat_id}.", ""
509
- else:
510
- return "Failed to save chat.", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
 
512
  def update_character_info(name):
513
  return load_character_and_image(name, user_name.value)
@@ -871,6 +976,10 @@ def create_character_card_interaction_tab():
871
  auto_save_checkbox
872
  ],
873
  outputs=[chat_history, save_status]
 
 
 
 
874
  )
875
 
876
  continue_talking_button.click(
@@ -885,6 +994,10 @@ def create_character_card_interaction_tab():
885
  auto_save_checkbox
886
  ],
887
  outputs=[chat_history, save_status]
 
 
 
 
888
  )
889
 
890
  import_card_button.click(
@@ -903,6 +1016,10 @@ def create_character_card_interaction_tab():
903
  fn=clear_chat_history,
904
  inputs=[character_data, user_name_input],
905
  outputs=[chat_history, character_data]
 
 
 
 
906
  )
907
 
908
  character_dropdown.change(
@@ -928,7 +1045,13 @@ def create_character_card_interaction_tab():
928
  auto_save_checkbox
929
  ],
930
  outputs=[chat_history, save_status]
931
- ).then(lambda: "", outputs=user_input)
 
 
 
 
 
 
932
 
933
  regenerate_button.click(
934
  fn=regenerate_last_message,
@@ -942,6 +1065,10 @@ def create_character_card_interaction_tab():
942
  auto_save_checkbox
943
  ],
944
  outputs=[chat_history, save_status]
 
 
 
 
945
  )
946
 
947
  import_chat_button.click(
@@ -951,8 +1078,12 @@ def create_character_card_interaction_tab():
951
 
952
  chat_file_upload.change(
953
  fn=import_chat_history,
954
- inputs=[chat_file_upload, chat_history, character_data],
955
  outputs=[chat_history, character_data, save_status]
 
 
 
 
956
  )
957
 
958
  save_chat_history_to_db.click(
@@ -1009,6 +1140,10 @@ def create_character_card_interaction_tab():
1009
  fn=load_selected_chat_from_search,
1010
  inputs=[chat_search_dropdown, user_name_input],
1011
  outputs=[character_data, chat_history, character_image, save_status]
 
 
 
 
1012
  )
1013
 
1014
  # Show Load Chat Button when a chat is selected
@@ -1023,8 +1158,8 @@ def create_character_card_interaction_tab():
1023
 
1024
 
1025
  def create_character_chat_mgmt_tab():
1026
- with gr.TabItem("Character and Chat Management", visible=True):
1027
- gr.Markdown("# Character and Chat Management")
1028
 
1029
  with gr.Row():
1030
  # Left Column: Character Import and Chat Management
@@ -1057,13 +1192,17 @@ def create_character_chat_mgmt_tab():
1057
  gr.Markdown("## Chat Management")
1058
  select_chat = gr.Dropdown(label="Select Chat", choices=[], visible=False, interactive=True)
1059
  load_chat_button = gr.Button("Load Selected Chat", visible=False)
1060
- conversation_list = gr.Dropdown(label="Select Conversation or Character", choices=[])
1061
  conversation_mapping = gr.State({})
1062
 
1063
  with gr.Tabs():
1064
  with gr.TabItem("Edit", visible=True):
1065
  chat_content = gr.TextArea(label="Chat/Character Content (JSON)", lines=20, max_lines=50)
1066
  save_button = gr.Button("Save Changes")
 
 
 
 
1067
  delete_button = gr.Button("Delete Conversation/Character", variant="stop")
1068
 
1069
  with gr.TabItem("Preview", visible=True):
@@ -1306,6 +1445,90 @@ def create_character_chat_mgmt_tab():
1306
 
1307
  return "Import results:\n" + "\n".join(results)
1308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1309
  # Register new callback for character import
1310
  import_characters_button.click(
1311
  fn=import_multiple_characters,
@@ -1368,6 +1591,18 @@ def create_character_chat_mgmt_tab():
1368
  outputs=select_character
1369
  )
1370
 
 
 
 
 
 
 
 
 
 
 
 
 
1371
  return (
1372
  character_files, import_characters_button, import_status,
1373
  search_query, search_button, search_results, search_status,
 
2
  # Description: Library for character card import functions
3
  #
4
  # Imports
5
+ from datetime import datetime
6
  import re
7
  import tempfile
8
  import uuid
 
9
  import json
10
  import logging
11
  import io
 
21
  from App_Function_Libraries.Character_Chat.Character_Chat_Lib import validate_character_book, validate_v2_card, \
22
  replace_placeholders, replace_user_placeholder, extract_json_from_image, parse_character_book, \
23
  load_chat_and_character, load_chat_history, load_character_and_image, extract_character_id, load_character_wrapper
24
+ from App_Function_Libraries.Chat.Chat_Functions import chat, approximate_token_count
25
  from App_Function_Libraries.DB.Character_Chat_DB import (
26
  add_character_card,
27
  get_character_cards,
 
32
  update_character_chat,
33
  delete_character_chat,
34
  delete_character_card,
35
+ update_character_card, search_character_chats, save_chat_history_to_character_db,
36
  )
37
+ from App_Function_Libraries.Utils.Utils import sanitize_user_input, format_api_name, global_api_endpoints, \
38
+ default_api_endpoint, load_comprehensive_config
39
+
40
+
41
  #
42
  ############################################################################################################
43
  #
 
255
  # Gradio tabs
256
 
257
  def create_character_card_interaction_tab():
258
+ try:
259
+ default_value = None
260
+ if default_api_endpoint:
261
+ if default_api_endpoint in global_api_endpoints:
262
+ default_value = format_api_name(default_api_endpoint)
263
+ else:
264
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
265
+ except Exception as e:
266
+ logging.error(f"Error setting default API endpoint: {str(e)}")
267
+ default_value = None
268
  with gr.TabItem("Chat with a Character Card", visible=True):
269
  gr.Markdown("# Chat with a Character Card")
270
+ with gr.Row():
271
+ with gr.Column(scale=1):
272
+ # Checkbox to Decide Whether to Save Chats by Default
273
+ config = load_comprehensive_config()
274
+ auto_save_value = config.get('auto-save', 'save_character_chats', fallback='False')
275
+ auto_save_checkbox = gr.Checkbox(label="Save chats automatically", value=auto_save_value)
276
+ chat_media_name = gr.Textbox(label="Custom Chat Name (optional)", visible=True)
277
+ save_chat_history_to_db = gr.Button("Save Chat History to Database")
278
+ save_status = gr.Textbox(label="Status", interactive=False)
279
+ with gr.Column(scale=2):
280
+ gr.Markdown("## Search and Load Existing Chats")
281
+ chat_search_query = gr.Textbox(
282
+ label="Search Chats",
283
+ placeholder="Enter chat name or keywords to search"
284
+ )
285
+ chat_search_button = gr.Button("Search Chats")
286
+ chat_search_dropdown = gr.Dropdown(label="Search Results", choices=[], visible=False)
287
+ load_chat_button = gr.Button("Load Selected Chat", visible=False)
288
+
289
  with gr.Row():
290
  with gr.Column(scale=1):
291
  character_image = gr.Image(label="Character Image", type="pil")
 
297
  load_characters_button = gr.Button("Load Existing Characters")
298
  character_dropdown = gr.Dropdown(label="Select Character", choices=[])
299
  user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here")
300
+ # Refactored API selection dropdown
301
  api_name_input = gr.Dropdown(
302
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
303
+ value=default_value,
 
 
 
 
304
  label="API for Interaction (Mandatory)"
305
  )
306
  api_key_input = gr.Textbox(
 
310
  temperature_slider = gr.Slider(
311
  minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature"
312
  )
 
313
  chat_file_upload = gr.File(label="Upload Chat History JSON", visible=True)
314
+ import_chat_button = gr.Button("Import Chat History")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
  with gr.Column(scale=2):
317
  chat_history = gr.Chatbot(label="Conversation", height=800)
 
320
  answer_for_me_button = gr.Button("Answer for Me")
321
  continue_talking_button = gr.Button("Continue Talking")
322
  regenerate_button = gr.Button("Regenerate Last Message")
323
+ token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False)
324
  clear_chat_button = gr.Button("Clear Chat")
325
  save_snapshot_button = gr.Button("Save Chat Snapshot")
326
  update_chat_dropdown = gr.Dropdown(label="Select Chat to Update", choices=[], visible=False)
 
505
 
506
  return history, save_status
507
 
508
+ def validate_chat_history(chat_history: List[Tuple[Optional[str], str]]) -> bool:
509
+ """
510
+ Validate the chat history format and content.
511
+
512
+ Args:
513
+ chat_history: List of message tuples (user_message, bot_message)
514
+
515
+ Returns:
516
+ bool: True if valid, False if invalid
517
+ """
518
+ if not isinstance(chat_history, list):
519
+ return False
520
+
521
+ for entry in chat_history:
522
+ if not isinstance(entry, tuple) or len(entry) != 2:
523
+ return False
524
+ # First element can be None (for system messages) or str
525
+ if not (entry[0] is None or isinstance(entry[0], str)):
526
+ return False
527
+ # Second element (bot response) must be str and not empty
528
+ if not isinstance(entry[1], str) or not entry[1].strip():
529
+ return False
530
+
531
+ return True
532
+
533
+ def sanitize_conversation_name(name: str) -> str:
534
+ """
535
+ Sanitize the conversation name.
536
+
537
+ Args:
538
+ name: Raw conversation name
539
+
540
+ Returns:
541
+ str: Sanitized conversation name
542
+ """
543
+ # Remove any non-alphanumeric characters except spaces and basic punctuation
544
+ sanitized = re.sub(r'[^a-zA-Z0-9\s\-_.]', '', name)
545
+ # Limit length
546
+ sanitized = sanitized[:100]
547
+ # Ensure it's not empty
548
+ if not sanitized.strip():
549
+ sanitized = f"Chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
550
+ return sanitized
551
+
552
  def save_chat_history_to_db_wrapper(
553
+ chat_history: List[Tuple[Optional[str], str]],
554
+ conversation_id: str,
555
+ media_content: Dict,
556
+ chat_media_name: str,
557
+ char_data: Dict,
558
+ auto_save: bool
559
+ ) -> Tuple[str, str]:
560
+ """
561
+ Save chat history to the database with validation.
562
 
563
+ Args:
564
+ chat_history: List of message tuples
565
+ conversation_id: Current conversation ID
566
+ media_content: Media content metadata
567
+ chat_media_name: Custom name for the chat
568
+ char_data: Character data dictionary
569
+ auto_save: Auto-save flag
570
 
571
+ Returns:
572
+ Tuple[str, str]: (status message, detail message)
573
+ """
574
+ try:
575
+ # Basic input validation
576
+ if not chat_history:
577
+ return "No chat history to save.", ""
578
+
579
+ if not validate_chat_history(chat_history):
580
+ return "Invalid chat history format.", "Please ensure the chat history is valid."
581
+
582
+ if not char_data:
583
+ return "No character selected.", "Please select a character first."
584
+
585
+ character_id = char_data.get('id')
586
+ if not character_id:
587
+ return "Invalid character data: No character ID found.", ""
588
+
589
+ # Sanitize and prepare conversation name
590
+ conversation_name = sanitize_conversation_name(
591
+ chat_media_name if chat_media_name.strip()
592
+ else f"Chat with {char_data.get('name', 'Unknown')} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
593
+ )
594
+
595
+ # Save to the database using your existing function
596
+ chat_id = save_chat_history_to_character_db(
597
+ character_id=character_id,
598
+ conversation_name=conversation_name,
599
+ chat_history=chat_history
600
+ )
601
+
602
+ if chat_id:
603
+ success_message = (
604
+ f"Chat saved successfully!\n"
605
+ f"ID: {chat_id}\n"
606
+ f"Name: {conversation_name}\n"
607
+ f"Messages: {len(chat_history)}"
608
+ )
609
+ return success_message, ""
610
+ else:
611
+ return "Failed to save chat to database.", "Database operation failed."
612
+
613
+ except Exception as e:
614
+ logging.error(f"Error saving chat history: {str(e)}", exc_info=True)
615
+ return f"Error saving chat: {str(e)}", "Please check the logs for more details."
616
 
617
  def update_character_info(name):
618
  return load_character_and_image(name, user_name.value)
 
976
  auto_save_checkbox
977
  ],
978
  outputs=[chat_history, save_status]
979
+ ).then(
980
+ lambda history: approximate_token_count(history),
981
+ inputs=[chat_history],
982
+ outputs=[token_count_display]
983
  )
984
 
985
  continue_talking_button.click(
 
994
  auto_save_checkbox
995
  ],
996
  outputs=[chat_history, save_status]
997
+ ).then(
998
+ lambda history: approximate_token_count(history),
999
+ inputs=[chat_history],
1000
+ outputs=[token_count_display]
1001
  )
1002
 
1003
  import_card_button.click(
 
1016
  fn=clear_chat_history,
1017
  inputs=[character_data, user_name_input],
1018
  outputs=[chat_history, character_data]
1019
+ ).then(
1020
+ lambda history: approximate_token_count(history),
1021
+ inputs=[chat_history],
1022
+ outputs=[token_count_display]
1023
  )
1024
 
1025
  character_dropdown.change(
 
1045
  auto_save_checkbox
1046
  ],
1047
  outputs=[chat_history, save_status]
1048
+ ).then(
1049
+ lambda: "", outputs=user_input
1050
+ ).then(
1051
+ lambda history: approximate_token_count(history),
1052
+ inputs=[chat_history],
1053
+ outputs=[token_count_display]
1054
+ )
1055
 
1056
  regenerate_button.click(
1057
  fn=regenerate_last_message,
 
1065
  auto_save_checkbox
1066
  ],
1067
  outputs=[chat_history, save_status]
1068
+ ).then(
1069
+ lambda history: approximate_token_count(history),
1070
+ inputs=[chat_history],
1071
+ outputs=[token_count_display]
1072
  )
1073
 
1074
  import_chat_button.click(
 
1078
 
1079
  chat_file_upload.change(
1080
  fn=import_chat_history,
1081
+ inputs=[chat_file_upload, chat_history, character_data, user_name_input],
1082
  outputs=[chat_history, character_data, save_status]
1083
+ ).then(
1084
+ lambda history: approximate_token_count(history),
1085
+ inputs=[chat_history],
1086
+ outputs=[token_count_display]
1087
  )
1088
 
1089
  save_chat_history_to_db.click(
 
1140
  fn=load_selected_chat_from_search,
1141
  inputs=[chat_search_dropdown, user_name_input],
1142
  outputs=[character_data, chat_history, character_image, save_status]
1143
+ ).then(
1144
+ lambda history: approximate_token_count(history),
1145
+ inputs=[chat_history],
1146
+ outputs=[token_count_display]
1147
  )
1148
 
1149
  # Show Load Chat Button when a chat is selected
 
1158
 
1159
 
1160
  def create_character_chat_mgmt_tab():
1161
+ with gr.TabItem("Character Chat Management", visible=True):
1162
+ gr.Markdown("# Character Chat Management")
1163
 
1164
  with gr.Row():
1165
  # Left Column: Character Import and Chat Management
 
1192
  gr.Markdown("## Chat Management")
1193
  select_chat = gr.Dropdown(label="Select Chat", choices=[], visible=False, interactive=True)
1194
  load_chat_button = gr.Button("Load Selected Chat", visible=False)
1195
+ conversation_list = gr.Dropdown(label="Select Conversation", choices=[])
1196
  conversation_mapping = gr.State({})
1197
 
1198
  with gr.Tabs():
1199
  with gr.TabItem("Edit", visible=True):
1200
  chat_content = gr.TextArea(label="Chat/Character Content (JSON)", lines=20, max_lines=50)
1201
  save_button = gr.Button("Save Changes")
1202
+ export_chat_button = gr.Button("Export Current Conversation", variant="secondary")
1203
+ export_all_chats_button = gr.Button("Export All Character Conversations", variant="secondary")
1204
+ export_file = gr.File(label="Downloaded File", visible=False)
1205
+ export_status = gr.Markdown("")
1206
  delete_button = gr.Button("Delete Conversation/Character", variant="stop")
1207
 
1208
  with gr.TabItem("Preview", visible=True):
 
1445
 
1446
  return "Import results:\n" + "\n".join(results)
1447
 
1448
+ def export_current_conversation(selected_chat):
1449
+ if not selected_chat:
1450
+ return "Please select a conversation to export.", None
1451
+
1452
+ try:
1453
+ chat_id = int(selected_chat.split('(ID: ')[1].rstrip(')'))
1454
+ chat = get_character_chat_by_id(chat_id)
1455
+
1456
+ if not chat:
1457
+ return "Selected chat not found.", None
1458
+
1459
+ # Ensure chat_history is properly parsed
1460
+ chat_history = chat['chat_history']
1461
+ if isinstance(chat_history, str):
1462
+ chat_history = json.loads(chat_history)
1463
+
1464
+ export_data = {
1465
+ "conversation_id": chat['id'],
1466
+ "conversation_name": chat['conversation_name'],
1467
+ "character_id": chat['character_id'],
1468
+ "chat_history": chat_history,
1469
+ "exported_at": datetime.now().isoformat()
1470
+ }
1471
+
1472
+ # Convert to JSON string
1473
+ json_str = json.dumps(export_data, indent=2, ensure_ascii=False)
1474
+
1475
+ # Create file name
1476
+ file_name = f"conversation_{chat['id']}_{chat['conversation_name']}.json"
1477
+
1478
+ # Return file for download
1479
+ return "Conversation exported successfully!", (file_name, json_str, "application/json")
1480
+
1481
+ except Exception as e:
1482
+ logging.error(f"Error exporting conversation: {e}")
1483
+ return f"Error exporting conversation: {str(e)}", None
1484
+
1485
+ def export_all_character_conversations(character_selection):
1486
+ if not character_selection:
1487
+ return "Please select a character first.", None
1488
+
1489
+ try:
1490
+ character_id = int(character_selection.split('(ID: ')[1].rstrip(')'))
1491
+ character = get_character_card_by_id(character_id)
1492
+ chats = get_character_chats(character_id=character_id)
1493
+
1494
+ if not chats:
1495
+ return "No conversations found for this character.", None
1496
+
1497
+ # Process chat histories
1498
+ conversations = []
1499
+ for chat in chats:
1500
+ chat_history = chat['chat_history']
1501
+ if isinstance(chat_history, str):
1502
+ chat_history = json.loads(chat_history)
1503
+
1504
+ conversations.append({
1505
+ "conversation_id": chat['id'],
1506
+ "conversation_name": chat['conversation_name'],
1507
+ "chat_history": chat_history
1508
+ })
1509
+
1510
+ export_data = {
1511
+ "character": {
1512
+ "id": character['id'],
1513
+ "name": character['name']
1514
+ },
1515
+ "conversations": conversations,
1516
+ "exported_at": datetime.now().isoformat()
1517
+ }
1518
+
1519
+ # Convert to JSON string
1520
+ json_str = json.dumps(export_data, indent=2, ensure_ascii=False)
1521
+
1522
+ # Create file name
1523
+ file_name = f"all_conversations_{character['name']}_{character['id']}.json"
1524
+
1525
+ # Return file for download
1526
+ return "All conversations exported successfully!", (file_name, json_str, "application/json")
1527
+
1528
+ except Exception as e:
1529
+ logging.error(f"Error exporting all conversations: {e}")
1530
+ return f"Error exporting conversations: {str(e)}", None
1531
+
1532
  # Register new callback for character import
1533
  import_characters_button.click(
1534
  fn=import_multiple_characters,
 
1591
  outputs=select_character
1592
  )
1593
 
1594
+ export_chat_button.click(
1595
+ fn=export_current_conversation,
1596
+ inputs=[select_chat],
1597
+ outputs=[export_status, export_file]
1598
+ )
1599
+
1600
+ export_all_chats_button.click(
1601
+ fn=export_all_character_conversations,
1602
+ inputs=[select_character],
1603
+ outputs=[export_status, export_file]
1604
+ )
1605
+
1606
  return (
1607
  character_files, import_characters_button, import_status,
1608
  search_query, search_button, search_results, search_status,
App_Function_Libraries/Gradio_UI/Character_interaction_tab.py CHANGED
@@ -17,9 +17,12 @@ import gradio as gr
17
  from PIL import Image
18
  #
19
  # Local Imports
20
- from App_Function_Libraries.Chat import chat, load_characters, save_chat_history_to_db_wrapper
21
  from App_Function_Libraries.Gradio_UI.Chat_ui import chat_wrapper
22
  from App_Function_Libraries.Gradio_UI.Writing_tab import generate_writing_feedback
 
 
 
23
  #
24
  ########################################################################################################################
25
  #
@@ -253,6 +256,16 @@ def character_interaction(character1: str, character2: str, api_endpoint: str, a
253
 
254
 
255
  def create_multiple_character_chat_tab():
 
 
 
 
 
 
 
 
 
 
256
  with gr.TabItem("Multi-Character Chat", visible=True):
257
  characters, conversation, current_character, other_character = character_interaction_setup()
258
 
@@ -264,13 +277,12 @@ def create_multiple_character_chat_tab():
264
  character_selectors = [gr.Dropdown(label=f"Character {i + 1}", choices=list(characters.keys())) for i in
265
  range(4)]
266
 
267
- api_endpoint = gr.Dropdown(label="API Endpoint",
268
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
269
- "Mistral",
270
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM",
271
- "ollama", "HuggingFace",
272
- "Custom-OpenAI-API"],
273
- value="HuggingFace")
274
  api_key = gr.Textbox(label="API Key (if required)", type="password")
275
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
276
  scenario = gr.Textbox(label="Scenario (optional)", lines=3)
@@ -393,17 +405,26 @@ def create_multiple_character_chat_tab():
393
 
394
  # From `Fuzzlewumper` on Reddit.
395
  def create_narrator_controlled_conversation_tab():
 
 
 
 
 
 
 
 
 
 
396
  with gr.TabItem("Narrator-Controlled Conversation", visible=True):
397
  gr.Markdown("# Narrator-Controlled Conversation")
398
 
399
  with gr.Row():
400
  with gr.Column(scale=1):
 
401
  api_endpoint = gr.Dropdown(
402
- label="API Endpoint",
403
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
404
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
405
- "Custom-OpenAI-API"],
406
- value="HuggingFace"
407
  )
408
  api_key = gr.Textbox(label="API Key (if required)", type="password")
409
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
 
17
  from PIL import Image
18
  #
19
  # Local Imports
20
+ from App_Function_Libraries.Chat.Chat_Functions import chat, load_characters, save_chat_history_to_db_wrapper
21
  from App_Function_Libraries.Gradio_UI.Chat_ui import chat_wrapper
22
  from App_Function_Libraries.Gradio_UI.Writing_tab import generate_writing_feedback
23
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, format_api_name, global_api_endpoints
24
+
25
+
26
  #
27
  ########################################################################################################################
28
  #
 
256
 
257
 
258
  def create_multiple_character_chat_tab():
259
+ try:
260
+ default_value = None
261
+ if default_api_endpoint:
262
+ if default_api_endpoint in global_api_endpoints:
263
+ default_value = format_api_name(default_api_endpoint)
264
+ else:
265
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
266
+ except Exception as e:
267
+ logging.error(f"Error setting default API endpoint: {str(e)}")
268
+ default_value = None
269
  with gr.TabItem("Multi-Character Chat", visible=True):
270
  characters, conversation, current_character, other_character = character_interaction_setup()
271
 
 
277
  character_selectors = [gr.Dropdown(label=f"Character {i + 1}", choices=list(characters.keys())) for i in
278
  range(4)]
279
 
280
+ # Refactored API selection dropdown
281
+ api_endpoint = gr.Dropdown(
282
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
283
+ value=default_value,
284
+ label="API for Interaction (Optional)"
285
+ )
 
286
  api_key = gr.Textbox(label="API Key (if required)", type="password")
287
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
288
  scenario = gr.Textbox(label="Scenario (optional)", lines=3)
 
405
 
406
  # From `Fuzzlewumper` on Reddit.
407
  def create_narrator_controlled_conversation_tab():
408
+ try:
409
+ default_value = None
410
+ if default_api_endpoint:
411
+ if default_api_endpoint in global_api_endpoints:
412
+ default_value = format_api_name(default_api_endpoint)
413
+ else:
414
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
415
+ except Exception as e:
416
+ logging.error(f"Error setting default API endpoint: {str(e)}")
417
+ default_value = None
418
  with gr.TabItem("Narrator-Controlled Conversation", visible=True):
419
  gr.Markdown("# Narrator-Controlled Conversation")
420
 
421
  with gr.Row():
422
  with gr.Column(scale=1):
423
+ # Refactored API selection dropdown
424
  api_endpoint = gr.Dropdown(
425
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
426
+ value=default_value,
427
+ label="API for Chat Interaction (Optional)"
 
 
428
  )
429
  api_key = gr.Textbox(label="API Key (if required)", type="password")
430
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
App_Function_Libraries/Gradio_UI/Chat_ui.py CHANGED
@@ -2,23 +2,25 @@
2
  # Description: Chat interface functions for Gradio
3
  #
4
  # Imports
5
- import html
6
- import json
7
  import logging
8
  import os
9
  import sqlite3
 
10
  from datetime import datetime
11
  #
12
  # External Imports
13
  import gradio as gr
14
  #
15
  # Local Imports
16
- from App_Function_Libraries.Chat import chat, save_chat_history, update_chat_content, save_chat_history_to_db_wrapper
17
- from App_Function_Libraries.DB.DB_Manager import add_chat_message, search_chat_conversations, create_chat_conversation, \
18
- get_chat_messages, update_chat_message, delete_chat_message, load_preset_prompts, db
 
 
 
19
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_user_prompt
20
-
21
-
22
  #
23
  #
24
  ########################################################################################################################
@@ -91,10 +93,9 @@ def chat_wrapper(message, history, media_content, selected_parts, api_endpoint,
91
  # Create a new conversation
92
  media_id = media_content.get('id', None)
93
  conversation_name = f"Chat about {media_content.get('title', 'Unknown Media')} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
94
- conversation_id = create_chat_conversation(media_id, conversation_name)
95
-
96
  # Add user message to the database
97
- user_message_id = add_chat_message(conversation_id, "user", message)
98
 
99
  # Include the selected parts and custom_prompt only for the first message
100
  if not history and selected_parts:
@@ -113,7 +114,7 @@ def chat_wrapper(message, history, media_content, selected_parts, api_endpoint,
113
 
114
  if save_conversation:
115
  # Add assistant message to the database
116
- add_chat_message(conversation_id, "assistant", bot_message)
117
 
118
  # Update history
119
  new_history = history + [(message, bot_message)]
@@ -123,51 +124,57 @@ def chat_wrapper(message, history, media_content, selected_parts, api_endpoint,
123
  logging.error(f"Error in chat wrapper: {str(e)}")
124
  return "An error occurred.", history, conversation_id
125
 
 
126
  def search_conversations(query):
 
127
  try:
128
- conversations = search_chat_conversations(query)
129
- if not conversations:
130
- print(f"Debug - Search Conversations - No results found for query: {query}")
 
 
 
 
 
 
 
131
  return gr.update(choices=[])
132
 
 
133
  conversation_options = [
134
- (f"{c['conversation_name']} (Media: {c['media_title']}, ID: {c['id']})", c['id'])
135
- for c in conversations
136
  ]
137
- print(f"Debug - Search Conversations - Options: {conversation_options}")
138
  return gr.update(choices=conversation_options)
139
  except Exception as e:
140
- print(f"Debug - Search Conversations - Error: {str(e)}")
141
  return gr.update(choices=[])
142
 
143
 
144
  def load_conversation(conversation_id):
 
145
  if not conversation_id:
146
  return [], None
147
 
148
- messages = get_chat_messages(conversation_id)
149
- history = [
150
- (msg['message'], None) if msg['sender'] == 'user' else (None, msg['message'])
151
- for msg in messages
152
- ]
153
- return history, conversation_id
154
-
155
-
156
- def update_message_in_chat(message_id, new_text, history):
157
- update_chat_message(message_id, new_text)
158
- updated_history = [(msg1, msg2) if msg1[1] != message_id and msg2[1] != message_id
159
- else ((new_text, msg1[1]) if msg1[1] == message_id else (new_text, msg2[1]))
160
- for msg1, msg2 in history]
161
- return updated_history
162
 
 
 
 
 
 
163
 
164
- def delete_message_from_chat(message_id, history):
165
- delete_chat_message(message_id)
166
- updated_history = [(msg1, msg2) for msg1, msg2 in history if msg1[1] != message_id and msg2[1] != message_id]
167
- return updated_history
168
 
169
 
170
- def regenerate_last_message(history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature, system_prompt):
 
171
  if not history:
172
  return history, "No messages to regenerate."
173
 
@@ -200,7 +207,56 @@ def regenerate_last_message(history, media_content, selected_parts, api_endpoint
200
 
201
  return new_history, "Last message regenerated successfully."
202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  def create_chat_interface():
 
 
 
 
 
 
 
 
 
 
204
  custom_css = """
205
  .chatbot-container .message-wrap .message {
206
  font-size: 14px !important;
@@ -215,9 +271,19 @@ def create_chat_interface():
215
 
216
  with gr.Row():
217
  with gr.Column(scale=1):
218
- search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
219
- search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title",
220
- label="Search By")
 
 
 
 
 
 
 
 
 
 
221
  search_button = gr.Button("Search")
222
  items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
223
  item_mapping = gr.State({})
@@ -237,53 +303,60 @@ def create_chat_interface():
237
  with gr.Row():
238
  load_conversations_btn = gr.Button("Load Selected Conversation")
239
 
240
- api_endpoint = gr.Dropdown(label="Select API Endpoint",
241
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
242
- "Mistral", "OpenRouter",
243
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama",
244
- "HuggingFace"])
 
245
  api_key = gr.Textbox(label="API Key (if required)", type="password")
 
 
 
 
 
246
  custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
247
  value=False,
248
  visible=True)
249
  preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
250
  value=False,
251
  visible=True)
252
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
253
- choices=load_preset_prompts(),
254
- visible=False)
255
- user_prompt = gr.Textbox(label="Custom Prompt",
256
- placeholder="Enter custom prompt here",
257
- lines=3,
258
- visible=False)
259
- system_prompt_input = gr.Textbox(label="System Prompt",
260
- value="You are a helpful AI assitant",
261
- lines=3,
262
- visible=False)
 
 
 
 
 
 
 
263
  with gr.Column(scale=2):
264
- chatbot = gr.Chatbot(height=600, elem_classes="chatbot-container")
265
  msg = gr.Textbox(label="Enter your message")
266
  submit = gr.Button("Submit")
267
  regenerate_button = gr.Button("Regenerate Last Message")
 
268
  clear_chat_button = gr.Button("Clear Chat")
269
 
270
- edit_message_id = gr.Number(label="Message ID to Edit", visible=False)
271
- edit_message_text = gr.Textbox(label="Edit Message", visible=False)
272
- update_message_button = gr.Button("Update Message", visible=False)
273
-
274
- delete_message_id = gr.Number(label="Message ID to Delete", visible=False)
275
- delete_message_button = gr.Button("Delete Message", visible=False)
276
-
277
  chat_media_name = gr.Textbox(label="Custom Chat Name(optional)")
278
  save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
 
279
  save_chat_history_as_file = gr.Button("Save Chat History as File")
280
  download_file = gr.File(label="Download Chat History")
281
- save_status = gr.Textbox(label="Save Status", interactive=False)
282
 
283
  # Restore original functionality
284
  search_button.click(
285
- fn=update_dropdown,
286
- inputs=[search_query_input, search_type_input],
287
  outputs=[items_output, item_mapping]
288
  )
289
 
@@ -314,21 +387,72 @@ def create_chat_interface():
314
  clear_chat,
315
  outputs=[chatbot, conversation_id]
316
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  preset_prompt.change(
318
  update_prompts,
319
- inputs=preset_prompt,
320
  outputs=[user_prompt, system_prompt_input]
321
  )
 
322
  custom_prompt_checkbox.change(
323
  fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
324
  inputs=[custom_prompt_checkbox],
325
  outputs=[user_prompt, system_prompt_input]
326
  )
327
- preset_prompt_checkbox.change(
328
- fn=lambda x: gr.update(visible=x),
329
- inputs=[preset_prompt_checkbox],
330
- outputs=[preset_prompt]
331
- )
332
  submit.click(
333
  chat_wrapper,
334
  inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, conversation_id,
@@ -341,6 +465,10 @@ def create_chat_interface():
341
  ).then( # Clear the user prompt after the first message
342
  lambda: (gr.update(value=""), gr.update(value="")),
343
  outputs=[user_prompt, system_prompt_input]
 
 
 
 
344
  )
345
 
346
  items_output.change(
@@ -348,6 +476,7 @@ def create_chat_interface():
348
  inputs=[items_output, use_content, use_summary, use_prompt, item_mapping],
349
  outputs=[media_content, selected_parts]
350
  )
 
351
  use_content.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
352
  outputs=[selected_parts])
353
  use_summary.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
@@ -377,18 +506,6 @@ def create_chat_interface():
377
  outputs=[chat_history]
378
  )
379
 
380
- update_message_button.click(
381
- update_message_in_chat,
382
- inputs=[edit_message_id, edit_message_text, chat_history],
383
- outputs=[chatbot]
384
- )
385
-
386
- delete_message_button.click(
387
- delete_message_from_chat,
388
- inputs=[delete_message_id, chat_history],
389
- outputs=[chatbot]
390
- )
391
-
392
  save_chat_history_as_file.click(
393
  save_chat_history,
394
  inputs=[chatbot, conversation_id],
@@ -403,15 +520,28 @@ def create_chat_interface():
403
 
404
  regenerate_button.click(
405
  regenerate_last_message,
406
- inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temperature, system_prompt_input],
 
407
  outputs=[chatbot, save_status]
 
 
 
 
408
  )
409
 
410
- chatbot.select(show_edit_message, None, [edit_message_text, edit_message_id, update_message_button])
411
- chatbot.select(show_delete_message, None, [delete_message_id, delete_message_button])
412
-
413
 
414
  def create_chat_interface_stacked():
 
 
 
 
 
 
 
 
 
 
 
415
  custom_css = """
416
  .chatbot-container .message-wrap .message {
417
  font-size: 14px !important;
@@ -426,9 +556,19 @@ def create_chat_interface_stacked():
426
 
427
  with gr.Row():
428
  with gr.Column():
429
- search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
430
- search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title",
431
- label="Search By")
 
 
 
 
 
 
 
 
 
 
432
  search_button = gr.Button("Search")
433
  items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
434
  item_mapping = gr.State({})
@@ -446,45 +586,165 @@ def create_chat_interface_stacked():
446
  search_conversations_btn = gr.Button("Search Conversations")
447
  load_conversations_btn = gr.Button("Load Selected Conversation")
448
  with gr.Column():
449
- api_endpoint = gr.Dropdown(label="Select API Endpoint",
450
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
451
- "OpenRouter", "Mistral", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi",
452
- "VLLM", "ollama", "HuggingFace"])
 
 
453
  api_key = gr.Textbox(label="API Key (if required)", type="password")
454
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
455
- choices=load_preset_prompts(),
456
- visible=True)
457
- system_prompt = gr.Textbox(label="System Prompt",
458
- value="You are a helpful AI assistant.",
459
- lines=3,
460
- visible=True)
461
- user_prompt = gr.Textbox(label="Custom User Prompt",
462
- placeholder="Enter custom prompt here",
463
- lines=3,
464
- visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  gr.Markdown("Scroll down for the chat window...")
466
  with gr.Row():
467
  with gr.Column(scale=1):
468
- chatbot = gr.Chatbot(height=600, elem_classes="chatbot-container")
469
  msg = gr.Textbox(label="Enter your message")
470
  with gr.Row():
471
  with gr.Column():
472
  submit = gr.Button("Submit")
473
  regenerate_button = gr.Button("Regenerate Last Message")
 
474
  clear_chat_button = gr.Button("Clear Chat")
475
  chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True)
476
  save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
 
477
  save_chat_history_as_file = gr.Button("Save Chat History as File")
478
  with gr.Column():
479
  download_file = gr.File(label="Download Chat History")
480
 
481
  # Restore original functionality
482
  search_button.click(
483
- fn=update_dropdown,
484
- inputs=[search_query_input, search_type_input],
485
  outputs=[items_output, item_mapping]
486
  )
487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  def update_prompts(preset_name):
489
  prompts = update_user_prompt(preset_name)
490
  return (
@@ -492,13 +752,85 @@ def create_chat_interface_stacked():
492
  gr.update(value=prompts["system_prompt"], visible=True)
493
  )
494
 
 
 
 
495
  clear_chat_button.click(
496
  clear_chat,
497
- outputs=[chatbot, conversation_id]
498
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
  preset_prompt.change(
500
  update_prompts,
501
- inputs=preset_prompt,
502
  outputs=[user_prompt, system_prompt]
503
  )
504
 
@@ -507,13 +839,14 @@ def create_chat_interface_stacked():
507
  inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt,
508
  conversation_id, save_conversation, temp, system_prompt],
509
  outputs=[msg, chatbot, conversation_id]
510
- ).then( # Clear the message box after submission
511
  lambda x: gr.update(value=""),
512
  inputs=[chatbot],
513
  outputs=[msg]
514
- ).then( # Clear the user prompt after the first message
515
- lambda: gr.update(value=""),
516
- outputs=[user_prompt, system_prompt]
 
517
  )
518
 
519
  items_output.change(
@@ -559,18 +892,31 @@ def create_chat_interface_stacked():
559
  save_chat_history_to_db.click(
560
  save_chat_history_to_db_wrapper,
561
  inputs=[chatbot, conversation_id, media_content, chat_media_name],
562
- outputs=[conversation_id, gr.Textbox(label="Save Status")]
563
  )
564
 
565
  regenerate_button.click(
566
  regenerate_last_message,
567
  inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temp, system_prompt],
568
  outputs=[chatbot, gr.Textbox(label="Regenerate Status")]
 
 
 
 
569
  )
570
 
571
 
572
- # FIXME - System prompts
573
  def create_chat_interface_multi_api():
 
 
 
 
 
 
 
 
 
 
574
  custom_css = """
575
  .chatbot-container .message-wrap .message {
576
  font-size: 14px !important;
@@ -596,9 +942,31 @@ def create_chat_interface_multi_api():
596
  use_summary = gr.Checkbox(label="Use Summary")
597
  use_prompt = gr.Checkbox(label="Use Prompt")
598
  with gr.Column():
599
- preset_prompt = gr.Dropdown(label="Select Preset Prompt", choices=load_preset_prompts(), visible=True)
600
- system_prompt = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", lines=5)
601
- user_prompt = gr.Textbox(label="Modify Prompt (Prefixed to your message every time)", lines=5, value="", visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
 
603
  with gr.Row():
604
  chatbots = []
@@ -606,17 +974,23 @@ def create_chat_interface_multi_api():
606
  api_keys = []
607
  temperatures = []
608
  regenerate_buttons = []
 
609
  for i in range(3):
610
  with gr.Column():
611
  gr.Markdown(f"### Chat Window {i + 1}")
612
- api_endpoint = gr.Dropdown(label=f"API Endpoint {i + 1}",
613
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq",
614
- "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold",
615
- "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"])
 
 
616
  api_key = gr.Textbox(label=f"API Key {i + 1} (if required)", type="password")
617
  temperature = gr.Slider(label=f"Temperature {i + 1}", minimum=0.0, maximum=1.0, step=0.05,
618
  value=0.7)
619
  chatbot = gr.Chatbot(height=800, elem_classes="chat-window")
 
 
 
620
  regenerate_button = gr.Button(f"Regenerate Last Message {i + 1}")
621
  chatbots.append(chatbot)
622
  api_endpoints.append(api_endpoint)
@@ -642,16 +1016,103 @@ def create_chat_interface_multi_api():
642
  outputs=[items_output, item_mapping]
643
  )
644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
  preset_prompt.change(update_user_prompt, inputs=preset_prompt, outputs=user_prompt)
646
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
647
 
648
  def clear_all_chats():
649
- return [[]] * 3 + [[]] * 3
650
 
651
  clear_chat_button.click(
652
  clear_all_chats,
653
- outputs=chatbots + chat_history
654
  )
 
655
  def chat_wrapper_multi(message, custom_prompt, system_prompt, *args):
656
  chat_histories = args[:3]
657
  chatbots = args[3:6]
@@ -681,6 +1142,11 @@ def create_chat_interface_multi_api():
681
 
682
  return [gr.update(value="")] + new_chatbots + new_chat_histories
683
 
 
 
 
 
 
684
 
685
  def regenerate_last_message(chat_history, chatbot, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature, system_prompt):
686
  if not chat_history:
@@ -717,8 +1183,13 @@ def create_chat_interface_multi_api():
717
  for i in range(3):
718
  regenerate_buttons[i].click(
719
  regenerate_last_message,
720
- inputs=[chat_history[i], chatbots[i], media_content, selected_parts, api_endpoints[i], api_keys[i], user_prompt, temperatures[i], system_prompt],
 
721
  outputs=[chatbots[i], chat_history[i], gr.Textbox(label=f"Regenerate Status {i + 1}")]
 
 
 
 
722
  )
723
 
724
  # In the create_chat_interface_multi_api function:
@@ -731,6 +1202,10 @@ def create_chat_interface_multi_api():
731
  ).then(
732
  lambda: (gr.update(value=""), gr.update(value="")),
733
  outputs=[msg, user_prompt]
 
 
 
 
734
  )
735
 
736
  items_output.change(
@@ -747,8 +1222,17 @@ def create_chat_interface_multi_api():
747
  )
748
 
749
 
750
-
751
  def create_chat_interface_four():
 
 
 
 
 
 
 
 
 
 
752
  custom_css = """
753
  .chatbot-container .message-wrap .message {
754
  font-size: 14px !important;
@@ -762,17 +1246,32 @@ def create_chat_interface_four():
762
  with gr.TabItem("Four Independent API Chats", visible=True):
763
  gr.Markdown("# Four Independent API Chat Interfaces")
764
 
 
 
 
 
 
 
765
  with gr.Row():
766
  with gr.Column():
767
  preset_prompt = gr.Dropdown(
768
- label="Select Preset Prompt",
769
- choices=load_preset_prompts(),
770
  visible=True
771
  )
 
 
 
772
  user_prompt = gr.Textbox(
773
- label="Modify Prompt",
774
  lines=3
775
  )
 
 
 
 
 
 
776
  with gr.Column():
777
  gr.Markdown("Scroll down for the chat windows...")
778
 
@@ -781,13 +1280,11 @@ def create_chat_interface_four():
781
  def create_single_chat_interface(index, user_prompt_component):
782
  with gr.Column():
783
  gr.Markdown(f"### Chat Window {index + 1}")
 
784
  api_endpoint = gr.Dropdown(
785
- label=f"API Endpoint {index + 1}",
786
- choices=[
787
- "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq",
788
- "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold",
789
- "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"
790
- ]
791
  )
792
  api_key = gr.Textbox(
793
  label=f"API Key {index + 1} (if required)",
@@ -804,6 +1301,8 @@ def create_chat_interface_four():
804
  msg = gr.Textbox(label=f"Enter your message for Chat {index + 1}")
805
  submit = gr.Button(f"Submit to Chat {index + 1}")
806
  regenerate_button = gr.Button(f"Regenerate Last Message {index + 1}")
 
 
807
  clear_chat_button = gr.Button(f"Clear Chat {index + 1}")
808
 
809
  # State to maintain chat history
@@ -819,7 +1318,8 @@ def create_chat_interface_four():
819
  'submit': submit,
820
  'regenerate_button': regenerate_button,
821
  'clear_chat_button': clear_chat_button,
822
- 'chat_history': chat_history
 
823
  })
824
 
825
  # Create four chat interfaces arranged in a 2x2 grid
@@ -830,10 +1330,47 @@ def create_chat_interface_four():
830
  create_single_chat_interface(i * 2 + j, user_prompt)
831
 
832
  # Update user_prompt based on preset_prompt selection
 
 
 
 
833
  preset_prompt.change(
834
- fn=update_user_prompt,
835
- inputs=preset_prompt,
836
- outputs=user_prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
  )
838
 
839
  def chat_wrapper_single(message, chat_history, api_endpoint, api_key, temperature, user_prompt):
@@ -913,6 +1450,10 @@ def create_chat_interface_four():
913
  interface['chatbot'],
914
  interface['chat_history']
915
  ]
 
 
 
 
916
  )
917
 
918
  interface['regenerate_button'].click(
@@ -929,12 +1470,18 @@ def create_chat_interface_four():
929
  interface['chat_history'],
930
  gr.Textbox(label="Regenerate Status")
931
  ]
 
 
 
 
932
  )
933
 
 
 
 
934
  interface['clear_chat_button'].click(
935
  clear_chat_single,
936
- inputs=[],
937
- outputs=[interface['chatbot'], interface['chat_history']]
938
  )
939
 
940
 
@@ -953,233 +1500,11 @@ def chat_wrapper_single(message, chat_history, chatbot, api_endpoint, api_key, t
953
 
954
  return new_msg, updated_chatbot, new_history, new_conv_id
955
 
956
-
957
- # FIXME - Finish implementing functions + testing/valdidation
958
- def create_chat_management_tab():
959
- with gr.TabItem("Chat Management", visible=True):
960
- gr.Markdown("# Chat Management")
961
-
962
- with gr.Row():
963
- search_query = gr.Textbox(label="Search Conversations")
964
- search_button = gr.Button("Search")
965
-
966
- conversation_list = gr.Dropdown(label="Select Conversation", choices=[])
967
- conversation_mapping = gr.State({})
968
-
969
- with gr.Tabs():
970
- with gr.TabItem("Edit", visible=True):
971
- chat_content = gr.TextArea(label="Chat Content (JSON)", lines=20, max_lines=50)
972
- save_button = gr.Button("Save Changes")
973
- delete_button = gr.Button("Delete Conversation", variant="stop")
974
-
975
- with gr.TabItem("Preview", visible=True):
976
- chat_preview = gr.HTML(label="Chat Preview")
977
- result_message = gr.Markdown("")
978
-
979
- def search_conversations(query):
980
- conversations = search_chat_conversations(query)
981
- choices = [f"{conv['conversation_name']} (Media: {conv['media_title']}, ID: {conv['id']})" for conv in
982
- conversations]
983
- mapping = {choice: conv['id'] for choice, conv in zip(choices, conversations)}
984
- return gr.update(choices=choices), mapping
985
-
986
- def load_conversations(selected, conversation_mapping):
987
- logging.info(f"Selected: {selected}")
988
- logging.info(f"Conversation mapping: {conversation_mapping}")
989
-
990
- try:
991
- if selected and selected in conversation_mapping:
992
- conversation_id = conversation_mapping[selected]
993
- messages = get_chat_messages(conversation_id)
994
- conversation_data = {
995
- "conversation_id": conversation_id,
996
- "messages": messages
997
- }
998
- json_content = json.dumps(conversation_data, indent=2)
999
-
1000
- # Create HTML preview
1001
- html_preview = "<div style='max-height: 500px; overflow-y: auto;'>"
1002
- for msg in messages:
1003
- sender_style = "background-color: #e6f3ff;" if msg[
1004
- 'sender'] == 'user' else "background-color: #f0f0f0;"
1005
- html_preview += f"<div style='margin-bottom: 10px; padding: 10px; border-radius: 5px; {sender_style}'>"
1006
- html_preview += f"<strong>{msg['sender']}:</strong> {html.escape(msg['message'])}<br>"
1007
- html_preview += f"<small>Timestamp: {msg['timestamp']}</small>"
1008
- html_preview += "</div>"
1009
- html_preview += "</div>"
1010
-
1011
- logging.info("Returning json_content and html_preview")
1012
- return json_content, html_preview
1013
- else:
1014
- logging.warning("No conversation selected or not in mapping")
1015
- return "", "<p>No conversation selected</p>"
1016
- except Exception as e:
1017
- logging.error(f"Error in load_conversations: {str(e)}")
1018
- return f"Error: {str(e)}", "<p>Error loading conversation</p>"
1019
-
1020
- def validate_conversation_json(content):
1021
- try:
1022
- data = json.loads(content)
1023
- if not isinstance(data, dict):
1024
- return False, "Invalid JSON structure: root should be an object"
1025
- if "conversation_id" not in data or not isinstance(data["conversation_id"], int):
1026
- return False, "Missing or invalid conversation_id"
1027
- if "messages" not in data or not isinstance(data["messages"], list):
1028
- return False, "Missing or invalid messages array"
1029
- for msg in data["messages"]:
1030
- if not all(key in msg for key in ["sender", "message"]):
1031
- return False, "Invalid message structure: missing required fields"
1032
- return True, data
1033
- except json.JSONDecodeError as e:
1034
- return False, f"Invalid JSON: {str(e)}"
1035
-
1036
- def save_conversation(selected, conversation_mapping, content):
1037
- if not selected or selected not in conversation_mapping:
1038
- return "Please select a conversation before saving.", "<p>No changes made</p>"
1039
-
1040
- conversation_id = conversation_mapping[selected]
1041
- is_valid, result = validate_conversation_json(content)
1042
-
1043
- if not is_valid:
1044
- return f"Error: {result}", "<p>No changes made due to error</p>"
1045
-
1046
- conversation_data = result
1047
- if conversation_data["conversation_id"] != conversation_id:
1048
- return "Error: Conversation ID mismatch.", "<p>No changes made due to ID mismatch</p>"
1049
-
1050
- try:
1051
- with db.get_connection() as conn:
1052
- conn.execute("BEGIN TRANSACTION")
1053
- cursor = conn.cursor()
1054
-
1055
- # Backup original conversation
1056
- cursor.execute("SELECT * FROM ChatMessages WHERE conversation_id = ?", (conversation_id,))
1057
- original_messages = cursor.fetchall()
1058
- backup_data = json.dumps({"conversation_id": conversation_id, "messages": original_messages})
1059
-
1060
- # You might want to save this backup_data somewhere
1061
-
1062
- # Delete existing messages
1063
- cursor.execute("DELETE FROM ChatMessages WHERE conversation_id = ?", (conversation_id,))
1064
-
1065
- # Insert updated messages
1066
- for message in conversation_data["messages"]:
1067
- cursor.execute('''
1068
- INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
1069
- VALUES (?, ?, ?, COALESCE(?, CURRENT_TIMESTAMP))
1070
- ''', (conversation_id, message["sender"], message["message"], message.get("timestamp")))
1071
-
1072
- conn.commit()
1073
-
1074
- # Create updated HTML preview
1075
- html_preview = "<div style='max-height: 500px; overflow-y: auto;'>"
1076
- for msg in conversation_data["messages"]:
1077
- sender_style = "background-color: #e6f3ff;" if msg[
1078
- 'sender'] == 'user' else "background-color: #f0f0f0;"
1079
- html_preview += f"<div style='margin-bottom: 10px; padding: 10px; border-radius: 5px; {sender_style}'>"
1080
- html_preview += f"<strong>{msg['sender']}:</strong> {html.escape(msg['message'])}<br>"
1081
- html_preview += f"<small>Timestamp: {msg.get('timestamp', 'N/A')}</small>"
1082
- html_preview += "</div>"
1083
- html_preview += "</div>"
1084
-
1085
- return "Conversation updated successfully.", html_preview
1086
- except sqlite3.Error as e:
1087
- conn.rollback()
1088
- logging.error(f"Database error in save_conversation: {e}")
1089
- return f"Error updating conversation: {str(e)}", "<p>Error occurred while saving</p>"
1090
- except Exception as e:
1091
- conn.rollback()
1092
- logging.error(f"Unexpected error in save_conversation: {e}")
1093
- return f"Unexpected error: {str(e)}", "<p>Unexpected error occurred</p>"
1094
-
1095
- def delete_conversation(selected, conversation_mapping):
1096
- if not selected or selected not in conversation_mapping:
1097
- return "Please select a conversation before deleting.", "<p>No changes made</p>", gr.update(choices=[])
1098
-
1099
- conversation_id = conversation_mapping[selected]
1100
-
1101
- try:
1102
- with db.get_connection() as conn:
1103
- cursor = conn.cursor()
1104
-
1105
- # Delete messages associated with the conversation
1106
- cursor.execute("DELETE FROM ChatMessages WHERE conversation_id = ?", (conversation_id,))
1107
-
1108
- # Delete the conversation itself
1109
- cursor.execute("DELETE FROM ChatConversations WHERE id = ?", (conversation_id,))
1110
-
1111
- conn.commit()
1112
-
1113
- # Update the conversation list
1114
- remaining_conversations = [choice for choice in conversation_mapping.keys() if choice != selected]
1115
- updated_mapping = {choice: conversation_mapping[choice] for choice in remaining_conversations}
1116
-
1117
- return "Conversation deleted successfully.", "<p>Conversation deleted</p>", gr.update(choices=remaining_conversations)
1118
- except sqlite3.Error as e:
1119
- conn.rollback()
1120
- logging.error(f"Database error in delete_conversation: {e}")
1121
- return f"Error deleting conversation: {str(e)}", "<p>Error occurred while deleting</p>", gr.update()
1122
- except Exception as e:
1123
- conn.rollback()
1124
- logging.error(f"Unexpected error in delete_conversation: {e}")
1125
- return f"Unexpected error: {str(e)}", "<p>Unexpected error occurred</p>", gr.update()
1126
-
1127
- def parse_formatted_content(formatted_content):
1128
- lines = formatted_content.split('\n')
1129
- conversation_id = int(lines[0].split(': ')[1])
1130
- timestamp = lines[1].split(': ')[1]
1131
- history = []
1132
- current_role = None
1133
- current_content = None
1134
- for line in lines[3:]:
1135
- if line.startswith("Role: "):
1136
- if current_role is not None:
1137
- history.append({"role": current_role, "content": ["", current_content]})
1138
- current_role = line.split(': ')[1]
1139
- elif line.startswith("Content: "):
1140
- current_content = line.split(': ', 1)[1]
1141
- if current_role is not None:
1142
- history.append({"role": current_role, "content": ["", current_content]})
1143
- return json.dumps({
1144
- "conversation_id": conversation_id,
1145
- "timestamp": timestamp,
1146
- "history": history
1147
- }, indent=2)
1148
-
1149
- search_button.click(
1150
- search_conversations,
1151
- inputs=[search_query],
1152
- outputs=[conversation_list, conversation_mapping]
1153
- )
1154
-
1155
- conversation_list.change(
1156
- load_conversations,
1157
- inputs=[conversation_list, conversation_mapping],
1158
- outputs=[chat_content, chat_preview]
1159
- )
1160
-
1161
- save_button.click(
1162
- save_conversation,
1163
- inputs=[conversation_list, conversation_mapping, chat_content],
1164
- outputs=[result_message, chat_preview]
1165
- )
1166
-
1167
- delete_button.click(
1168
- delete_conversation,
1169
- inputs=[conversation_list, conversation_mapping],
1170
- outputs=[result_message, chat_preview, conversation_list]
1171
- )
1172
-
1173
- return search_query, search_button, conversation_list, conversation_mapping, chat_content, save_button, delete_button, result_message, chat_preview
1174
-
1175
-
1176
-
1177
  # Mock function to simulate LLM processing
1178
  def process_with_llm(workflow, context, prompt, api_endpoint, api_key):
1179
  api_key_snippet = api_key[:5] + "..." if api_key else "Not provided"
1180
  return f"LLM output using {api_endpoint} (API Key: {api_key_snippet}) for {workflow} with context: {context[:30]}... and prompt: {prompt[:30]}..."
1181
 
1182
-
1183
  #
1184
  # End of Chat_ui.py
1185
  #######################################################################################################################
 
2
  # Description: Chat interface functions for Gradio
3
  #
4
  # Imports
 
 
5
  import logging
6
  import os
7
  import sqlite3
8
+ import time
9
  from datetime import datetime
10
  #
11
  # External Imports
12
  import gradio as gr
13
  #
14
  # Local Imports
15
+ from App_Function_Libraries.Chat.Chat_Functions import approximate_token_count, chat, save_chat_history, \
16
+ update_chat_content, save_chat_history_to_db_wrapper
17
+ from App_Function_Libraries.DB.DB_Manager import db, load_chat_history, start_new_conversation, \
18
+ save_message, search_conversations_by_keywords, \
19
+ get_all_conversations, delete_messages_in_conversation, search_media_db, list_prompts
20
+ from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_db_connection
21
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_user_prompt
22
+ from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
23
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, format_api_name, global_api_endpoints
24
  #
25
  #
26
  ########################################################################################################################
 
93
  # Create a new conversation
94
  media_id = media_content.get('id', None)
95
  conversation_name = f"Chat about {media_content.get('title', 'Unknown Media')} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
96
+ conversation_id = start_new_conversation(title=conversation_name, media_id=media_id)
 
97
  # Add user message to the database
98
+ user_message_id = save_message(conversation_id, role="user", content=message)
99
 
100
  # Include the selected parts and custom_prompt only for the first message
101
  if not history and selected_parts:
 
114
 
115
  if save_conversation:
116
  # Add assistant message to the database
117
+ save_message(conversation_id, role="assistant", content=bot_message)
118
 
119
  # Update history
120
  new_history = history + [(message, bot_message)]
 
124
  logging.error(f"Error in chat wrapper: {str(e)}")
125
  return "An error occurred.", history, conversation_id
126
 
127
+
128
  def search_conversations(query):
129
+ """Convert existing chat search to use RAG chat functions"""
130
  try:
131
+ # Use the RAG search function - search by title if given a query
132
+ if query and query.strip():
133
+ results, _, _ = search_conversations_by_keywords(
134
+ title_query=query.strip()
135
+ )
136
+ else:
137
+ # Get all conversations if no query
138
+ results, _, _ = get_all_conversations()
139
+
140
+ if not results:
141
  return gr.update(choices=[])
142
 
143
+ # Format choices to match existing UI format
144
  conversation_options = [
145
+ (f"{conv['title']} (ID: {conv['conversation_id'][:8]})", conv['conversation_id'])
146
+ for conv in results
147
  ]
148
+
149
  return gr.update(choices=conversation_options)
150
  except Exception as e:
151
+ logging.error(f"Error searching conversations: {str(e)}")
152
  return gr.update(choices=[])
153
 
154
 
155
  def load_conversation(conversation_id):
156
+ """Convert existing load to use RAG chat functions"""
157
  if not conversation_id:
158
  return [], None
159
 
160
+ try:
161
+ # Use RAG load function
162
+ messages, _, _ = load_chat_history(conversation_id)
 
 
 
 
 
 
 
 
 
 
 
163
 
164
+ # Convert to chatbot history format
165
+ history = [
166
+ (content, None) if role == 'user' else (None, content)
167
+ for role, content in messages
168
+ ]
169
 
170
+ return history, conversation_id
171
+ except Exception as e:
172
+ logging.error(f"Error loading conversation: {str(e)}")
173
+ return [], None
174
 
175
 
176
+ def regenerate_last_message(history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature,
177
+ system_prompt):
178
  if not history:
179
  return history, "No messages to regenerate."
180
 
 
207
 
208
  return new_history, "Last message regenerated successfully."
209
 
210
+
211
+ def update_dropdown_multiple(query, search_type, keywords=""):
212
+ """Updated function to handle multiple search results using search_media_db"""
213
+ try:
214
+ # Define search fields based on search type
215
+ search_fields = []
216
+ if search_type.lower() == "keyword":
217
+ # When searching by keyword, we'll search across multiple fields
218
+ search_fields = ["title", "content", "author"]
219
+ else:
220
+ # Otherwise use the specific field
221
+ search_fields = [search_type.lower()]
222
+
223
+ # Perform the search
224
+ results = search_media_db(
225
+ search_query=query,
226
+ search_fields=search_fields,
227
+ keywords=keywords,
228
+ page=1,
229
+ results_per_page=50 # Adjust as needed
230
+ )
231
+
232
+ # Process results
233
+ item_map = {}
234
+ formatted_results = []
235
+
236
+ for row in results:
237
+ id, url, title, type_, content, author, date, prompt, summary = row
238
+ # Create a display text that shows relevant info
239
+ display_text = f"{title} - {author or 'Unknown'} ({date})"
240
+ formatted_results.append(display_text)
241
+ item_map[display_text] = id
242
+
243
+ return gr.update(choices=formatted_results), item_map
244
+ except Exception as e:
245
+ logging.error(f"Error in update_dropdown_multiple: {str(e)}")
246
+ return gr.update(choices=[]), {}
247
+
248
+
249
  def create_chat_interface():
250
+ try:
251
+ default_value = None
252
+ if default_api_endpoint:
253
+ if default_api_endpoint in global_api_endpoints:
254
+ default_value = format_api_name(default_api_endpoint)
255
+ else:
256
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
257
+ except Exception as e:
258
+ logging.error(f"Error setting default API endpoint: {str(e)}")
259
+ default_value = None
260
  custom_css = """
261
  .chatbot-container .message-wrap .message {
262
  font-size: 14px !important;
 
271
 
272
  with gr.Row():
273
  with gr.Column(scale=1):
274
+ search_query_input = gr.Textbox(
275
+ label="Search Query",
276
+ placeholder="Enter your search query here..."
277
+ )
278
+ search_type_input = gr.Radio(
279
+ choices=["Title", "Content", "Author", "Keyword"],
280
+ value="Keyword",
281
+ label="Search By"
282
+ )
283
+ keyword_filter_input = gr.Textbox(
284
+ label="Filter by Keywords (comma-separated)",
285
+ placeholder="ml, ai, python, etc..."
286
+ )
287
  search_button = gr.Button("Search")
288
  items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
289
  item_mapping = gr.State({})
 
303
  with gr.Row():
304
  load_conversations_btn = gr.Button("Load Selected Conversation")
305
 
306
+ # Refactored API selection dropdown
307
+ api_endpoint = gr.Dropdown(
308
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
309
+ value=default_value,
310
+ label="API for Chat Interaction (Optional)"
311
+ )
312
  api_key = gr.Textbox(label="API Key (if required)", type="password")
313
+
314
+ # Initialize state variables for pagination
315
+ current_page_state = gr.State(value=1)
316
+ total_pages_state = gr.State(value=1)
317
+
318
  custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
319
  value=False,
320
  visible=True)
321
  preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
322
  value=False,
323
  visible=True)
324
+ with gr.Row():
325
+ # Add pagination controls
326
+ preset_prompt = gr.Dropdown(label="Select Preset Prompt",
327
+ choices=[],
328
+ visible=False)
329
+ with gr.Row():
330
+ prev_page_button = gr.Button("Previous Page", visible=False)
331
+ page_display = gr.Markdown("Page 1 of X", visible=False)
332
+ next_page_button = gr.Button("Next Page", visible=False)
333
+ system_prompt_input = gr.Textbox(label="System Prompt",
334
+ value="You are a helpful AI assistant",
335
+ lines=3,
336
+ visible=False)
337
+ with gr.Row():
338
+ user_prompt = gr.Textbox(label="Custom Prompt",
339
+ placeholder="Enter custom prompt here",
340
+ lines=3,
341
+ visible=False)
342
  with gr.Column(scale=2):
343
+ chatbot = gr.Chatbot(height=800, elem_classes="chatbot-container")
344
  msg = gr.Textbox(label="Enter your message")
345
  submit = gr.Button("Submit")
346
  regenerate_button = gr.Button("Regenerate Last Message")
347
+ token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False)
348
  clear_chat_button = gr.Button("Clear Chat")
349
 
 
 
 
 
 
 
 
350
  chat_media_name = gr.Textbox(label="Custom Chat Name(optional)")
351
  save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
352
+ save_status = gr.Textbox(label="Save Status", interactive=False)
353
  save_chat_history_as_file = gr.Button("Save Chat History as File")
354
  download_file = gr.File(label="Download Chat History")
 
355
 
356
  # Restore original functionality
357
  search_button.click(
358
+ fn=update_dropdown_multiple,
359
+ inputs=[search_query_input, search_type_input, keyword_filter_input],
360
  outputs=[items_output, item_mapping]
361
  )
362
 
 
387
  clear_chat,
388
  outputs=[chatbot, conversation_id]
389
  )
390
+
391
+ # Function to handle preset prompt checkbox change
392
+ def on_preset_prompt_checkbox_change(is_checked):
393
+ if is_checked:
394
+ prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
395
+ page_display_text = f"Page {current_page} of {total_pages}"
396
+ return (
397
+ gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
398
+ gr.update(visible=True), # prev_page_button
399
+ gr.update(visible=True), # next_page_button
400
+ gr.update(value=page_display_text, visible=True), # page_display
401
+ current_page, # current_page_state
402
+ total_pages # total_pages_state
403
+ )
404
+ else:
405
+ return (
406
+ gr.update(visible=False, interactive=False), # preset_prompt
407
+ gr.update(visible=False), # prev_page_button
408
+ gr.update(visible=False), # next_page_button
409
+ gr.update(visible=False), # page_display
410
+ 1, # current_page_state
411
+ 1 # total_pages_state
412
+ )
413
+
414
+ preset_prompt_checkbox.change(
415
+ fn=on_preset_prompt_checkbox_change,
416
+ inputs=[preset_prompt_checkbox],
417
+ outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
418
+ )
419
+
420
+ def on_prev_page_click(current_page, total_pages):
421
+ new_page = max(current_page - 1, 1)
422
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
423
+ page_display_text = f"Page {current_page} of {total_pages}"
424
+ return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
425
+
426
+ prev_page_button.click(
427
+ fn=on_prev_page_click,
428
+ inputs=[current_page_state, total_pages_state],
429
+ outputs=[preset_prompt, page_display, current_page_state]
430
+ )
431
+
432
+ def on_next_page_click(current_page, total_pages):
433
+ new_page = min(current_page + 1, total_pages)
434
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
435
+ page_display_text = f"Page {current_page} of {total_pages}"
436
+ return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
437
+
438
+ next_page_button.click(
439
+ fn=on_next_page_click,
440
+ inputs=[current_page_state, total_pages_state],
441
+ outputs=[preset_prompt, page_display, current_page_state]
442
+ )
443
+
444
  preset_prompt.change(
445
  update_prompts,
446
+ inputs=[preset_prompt],
447
  outputs=[user_prompt, system_prompt_input]
448
  )
449
+
450
  custom_prompt_checkbox.change(
451
  fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
452
  inputs=[custom_prompt_checkbox],
453
  outputs=[user_prompt, system_prompt_input]
454
  )
455
+
 
 
 
 
456
  submit.click(
457
  chat_wrapper,
458
  inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, conversation_id,
 
465
  ).then( # Clear the user prompt after the first message
466
  lambda: (gr.update(value=""), gr.update(value="")),
467
  outputs=[user_prompt, system_prompt_input]
468
+ ).then(
469
+ lambda history: approximate_token_count(history),
470
+ inputs=[chatbot],
471
+ outputs=[token_count_display]
472
  )
473
 
474
  items_output.change(
 
476
  inputs=[items_output, use_content, use_summary, use_prompt, item_mapping],
477
  outputs=[media_content, selected_parts]
478
  )
479
+
480
  use_content.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
481
  outputs=[selected_parts])
482
  use_summary.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
 
506
  outputs=[chat_history]
507
  )
508
 
 
 
 
 
 
 
 
 
 
 
 
 
509
  save_chat_history_as_file.click(
510
  save_chat_history,
511
  inputs=[chatbot, conversation_id],
 
520
 
521
  regenerate_button.click(
522
  regenerate_last_message,
523
+ inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temperature,
524
+ system_prompt_input],
525
  outputs=[chatbot, save_status]
526
+ ).then(
527
+ lambda history: approximate_token_count(history),
528
+ inputs=[chatbot],
529
+ outputs=[token_count_display]
530
  )
531
 
 
 
 
532
 
533
  def create_chat_interface_stacked():
534
+ try:
535
+ default_value = None
536
+ if default_api_endpoint:
537
+ if default_api_endpoint in global_api_endpoints:
538
+ default_value = format_api_name(default_api_endpoint)
539
+ else:
540
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
541
+ except Exception as e:
542
+ logging.error(f"Error setting default API endpoint: {str(e)}")
543
+ default_value = None
544
+
545
  custom_css = """
546
  .chatbot-container .message-wrap .message {
547
  font-size: 14px !important;
 
556
 
557
  with gr.Row():
558
  with gr.Column():
559
+ search_query_input = gr.Textbox(
560
+ label="Search Query",
561
+ placeholder="Enter your search query here..."
562
+ )
563
+ search_type_input = gr.Radio(
564
+ choices=["Title", "Content", "Author", "Keyword"],
565
+ value="Keyword",
566
+ label="Search By"
567
+ )
568
+ keyword_filter_input = gr.Textbox(
569
+ label="Filter by Keywords (comma-separated)",
570
+ placeholder="ml, ai, python, etc..."
571
+ )
572
  search_button = gr.Button("Search")
573
  items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
574
  item_mapping = gr.State({})
 
586
  search_conversations_btn = gr.Button("Search Conversations")
587
  load_conversations_btn = gr.Button("Load Selected Conversation")
588
  with gr.Column():
589
+ # Refactored API selection dropdown
590
+ api_endpoint = gr.Dropdown(
591
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
592
+ value=default_value,
593
+ label="API for Chat Interaction (Optional)"
594
+ )
595
  api_key = gr.Textbox(label="API Key (if required)", type="password")
596
+
597
+ # Initialize state variables for pagination
598
+ current_page_state = gr.State(value=1)
599
+ total_pages_state = gr.State(value=1)
600
+
601
+ custom_prompt_checkbox = gr.Checkbox(
602
+ label="Use a Custom Prompt",
603
+ value=False,
604
+ visible=True
605
+ )
606
+ preset_prompt_checkbox = gr.Checkbox(
607
+ label="Use a pre-set Prompt",
608
+ value=False,
609
+ visible=True
610
+ )
611
+
612
+ with gr.Row():
613
+ preset_prompt = gr.Dropdown(
614
+ label="Select Preset Prompt",
615
+ choices=[],
616
+ visible=False
617
+ )
618
+ with gr.Row():
619
+ prev_page_button = gr.Button("Previous Page", visible=False)
620
+ page_display = gr.Markdown("Page 1 of X", visible=False)
621
+ next_page_button = gr.Button("Next Page", visible=False)
622
+
623
+ system_prompt = gr.Textbox(
624
+ label="System Prompt",
625
+ value="You are a helpful AI assistant.",
626
+ lines=4,
627
+ visible=False
628
+ )
629
+ user_prompt = gr.Textbox(
630
+ label="Custom User Prompt",
631
+ placeholder="Enter custom prompt here",
632
+ lines=4,
633
+ visible=False
634
+ )
635
  gr.Markdown("Scroll down for the chat window...")
636
  with gr.Row():
637
  with gr.Column(scale=1):
638
+ chatbot = gr.Chatbot(height=800, elem_classes="chatbot-container")
639
  msg = gr.Textbox(label="Enter your message")
640
  with gr.Row():
641
  with gr.Column():
642
  submit = gr.Button("Submit")
643
  regenerate_button = gr.Button("Regenerate Last Message")
644
+ token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False)
645
  clear_chat_button = gr.Button("Clear Chat")
646
  chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True)
647
  save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
648
+ save_status = gr.Textbox(label="Save Status", interactive=False)
649
  save_chat_history_as_file = gr.Button("Save Chat History as File")
650
  with gr.Column():
651
  download_file = gr.File(label="Download Chat History")
652
 
653
  # Restore original functionality
654
  search_button.click(
655
+ fn=update_dropdown_multiple,
656
+ inputs=[search_query_input, search_type_input, keyword_filter_input],
657
  outputs=[items_output, item_mapping]
658
  )
659
 
660
+ def search_conversations(query):
661
+ try:
662
+ # Use RAG search with title search
663
+ if query and query.strip():
664
+ results, _, _ = search_conversations_by_keywords(title_query=query.strip())
665
+ else:
666
+ results, _, _ = get_all_conversations()
667
+
668
+ if not results:
669
+ return gr.update(choices=[])
670
+
671
+ # Format choices to match UI
672
+ conversation_options = [
673
+ (f"{conv['title']} (ID: {conv['conversation_id'][:8]})", conv['conversation_id'])
674
+ for conv in results
675
+ ]
676
+
677
+ return gr.update(choices=conversation_options)
678
+ except Exception as e:
679
+ logging.error(f"Error searching conversations: {str(e)}")
680
+ return gr.update(choices=[])
681
+
682
+ def load_conversation(conversation_id):
683
+ if not conversation_id:
684
+ return [], None
685
+
686
+ try:
687
+ # Use RAG load function
688
+ messages, _, _ = load_chat_history(conversation_id)
689
+
690
+ # Convert to chatbot history format
691
+ history = [
692
+ (content, None) if role == 'user' else (None, content)
693
+ for role, content in messages
694
+ ]
695
+
696
+ return history, conversation_id
697
+ except Exception as e:
698
+ logging.error(f"Error loading conversation: {str(e)}")
699
+ return [], None
700
+
701
+ def save_chat_history_to_db_wrapper(chatbot, conversation_id, media_content, chat_name=None):
702
+ log_counter("save_chat_history_to_db_attempt")
703
+ start_time = time.time()
704
+ logging.info(f"Attempting to save chat history. Media content type: {type(media_content)}")
705
+
706
+ try:
707
+ # First check if we can access the database
708
+ try:
709
+ with get_db_connection() as conn:
710
+ cursor = conn.cursor()
711
+ cursor.execute("SELECT 1")
712
+ except sqlite3.DatabaseError as db_error:
713
+ logging.error(f"Database is corrupted or inaccessible: {str(db_error)}")
714
+ return conversation_id, gr.update(
715
+ value="Database error: The database file appears to be corrupted. Please contact support.")
716
+
717
+ # For both new and existing conversations
718
+ try:
719
+ if not conversation_id:
720
+ title = chat_name if chat_name else "Untitled Conversation"
721
+ conversation_id = start_new_conversation(title=title)
722
+ logging.info(f"Created new conversation with ID: {conversation_id}")
723
+
724
+ # Update existing messages
725
+ delete_messages_in_conversation(conversation_id)
726
+ for user_msg, assistant_msg in chatbot:
727
+ if user_msg:
728
+ save_message(conversation_id, "user", user_msg)
729
+ if assistant_msg:
730
+ save_message(conversation_id, "assistant", assistant_msg)
731
+ except sqlite3.DatabaseError as db_error:
732
+ logging.error(f"Database error during message save: {str(db_error)}")
733
+ return conversation_id, gr.update(
734
+ value="Database error: Unable to save messages. Please try again or contact support.")
735
+
736
+ save_duration = time.time() - start_time
737
+ log_histogram("save_chat_history_to_db_duration", save_duration)
738
+ log_counter("save_chat_history_to_db_success")
739
+
740
+ return conversation_id, gr.update(value="Chat history saved successfully!")
741
+
742
+ except Exception as e:
743
+ log_counter("save_chat_history_to_db_error", labels={"error": str(e)})
744
+ error_message = f"Failed to save chat history: {str(e)}"
745
+ logging.error(error_message, exc_info=True)
746
+ return conversation_id, gr.update(value=error_message)
747
+
748
  def update_prompts(preset_name):
749
  prompts = update_user_prompt(preset_name)
750
  return (
 
752
  gr.update(value=prompts["system_prompt"], visible=True)
753
  )
754
 
755
+ def clear_chat():
756
+ return [], None, 0 # Empty history, conversation_id, and token count
757
+
758
  clear_chat_button.click(
759
  clear_chat,
760
+ outputs=[chatbot, conversation_id, token_count_display]
761
  )
762
+
763
+ # Handle custom prompt checkbox change
764
+ def on_custom_prompt_checkbox_change(is_checked):
765
+ return (
766
+ gr.update(visible=is_checked),
767
+ gr.update(visible=is_checked)
768
+ )
769
+
770
+ custom_prompt_checkbox.change(
771
+ fn=on_custom_prompt_checkbox_change,
772
+ inputs=[custom_prompt_checkbox],
773
+ outputs=[user_prompt, system_prompt]
774
+ )
775
+
776
+ # Handle preset prompt checkbox change
777
+ def on_preset_prompt_checkbox_change(is_checked):
778
+ if is_checked:
779
+ prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
780
+ page_display_text = f"Page {current_page} of {total_pages}"
781
+ return (
782
+ gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
783
+ gr.update(visible=True), # prev_page_button
784
+ gr.update(visible=True), # next_page_button
785
+ gr.update(value=page_display_text, visible=True), # page_display
786
+ current_page, # current_page_state
787
+ total_pages # total_pages_state
788
+ )
789
+ else:
790
+ return (
791
+ gr.update(visible=False, interactive=False), # preset_prompt
792
+ gr.update(visible=False), # prev_page_button
793
+ gr.update(visible=False), # next_page_button
794
+ gr.update(visible=False), # page_display
795
+ 1, # current_page_state
796
+ 1 # total_pages_state
797
+ )
798
+
799
+ preset_prompt_checkbox.change(
800
+ fn=on_preset_prompt_checkbox_change,
801
+ inputs=[preset_prompt_checkbox],
802
+ outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
803
+ )
804
+
805
+ # Pagination button functions
806
+ def on_prev_page_click(current_page, total_pages):
807
+ new_page = max(current_page - 1, 1)
808
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
809
+ page_display_text = f"Page {current_page} of {total_pages}"
810
+ return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
811
+
812
+ prev_page_button.click(
813
+ fn=on_prev_page_click,
814
+ inputs=[current_page_state, total_pages_state],
815
+ outputs=[preset_prompt, page_display, current_page_state]
816
+ )
817
+
818
+ def on_next_page_click(current_page, total_pages):
819
+ new_page = min(current_page + 1, total_pages)
820
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
821
+ page_display_text = f"Page {current_page} of {total_pages}"
822
+ return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
823
+
824
+ next_page_button.click(
825
+ fn=on_next_page_click,
826
+ inputs=[current_page_state, total_pages_state],
827
+ outputs=[preset_prompt, page_display, current_page_state]
828
+ )
829
+
830
+ # Update prompts when a preset is selected
831
  preset_prompt.change(
832
  update_prompts,
833
+ inputs=[preset_prompt],
834
  outputs=[user_prompt, system_prompt]
835
  )
836
 
 
839
  inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt,
840
  conversation_id, save_conversation, temp, system_prompt],
841
  outputs=[msg, chatbot, conversation_id]
842
+ ).then(
843
  lambda x: gr.update(value=""),
844
  inputs=[chatbot],
845
  outputs=[msg]
846
+ ).then(
847
+ lambda history: approximate_token_count(history),
848
+ inputs=[chatbot],
849
+ outputs=[token_count_display]
850
  )
851
 
852
  items_output.change(
 
892
  save_chat_history_to_db.click(
893
  save_chat_history_to_db_wrapper,
894
  inputs=[chatbot, conversation_id, media_content, chat_media_name],
895
+ outputs=[conversation_id, save_status]
896
  )
897
 
898
  regenerate_button.click(
899
  regenerate_last_message,
900
  inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temp, system_prompt],
901
  outputs=[chatbot, gr.Textbox(label="Regenerate Status")]
902
+ ).then(
903
+ lambda history: approximate_token_count(history),
904
+ inputs=[chatbot],
905
+ outputs=[token_count_display]
906
  )
907
 
908
 
 
909
  def create_chat_interface_multi_api():
910
+ try:
911
+ default_value = None
912
+ if default_api_endpoint:
913
+ if default_api_endpoint in global_api_endpoints:
914
+ default_value = format_api_name(default_api_endpoint)
915
+ else:
916
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
917
+ except Exception as e:
918
+ logging.error(f"Error setting default API endpoint: {str(e)}")
919
+ default_value = None
920
  custom_css = """
921
  .chatbot-container .message-wrap .message {
922
  font-size: 14px !important;
 
942
  use_summary = gr.Checkbox(label="Use Summary")
943
  use_prompt = gr.Checkbox(label="Use Prompt")
944
  with gr.Column():
945
+ # Initialize state variables for pagination
946
+ current_page_state = gr.State(value=1)
947
+ total_pages_state = gr.State(value=1)
948
+
949
+ custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
950
+ value=False,
951
+ visible=True)
952
+ preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
953
+ value=False,
954
+ visible=True)
955
+ with gr.Row():
956
+ # Add pagination controls
957
+ preset_prompt = gr.Dropdown(label="Select Preset Prompt",
958
+ choices=[],
959
+ visible=False)
960
+ with gr.Row():
961
+ prev_page_button = gr.Button("Previous Page", visible=False)
962
+ page_display = gr.Markdown("Page 1 of X", visible=False)
963
+ next_page_button = gr.Button("Next Page", visible=False)
964
+ system_prompt = gr.Textbox(label="System Prompt",
965
+ value="You are a helpful AI assistant.",
966
+ lines=5,
967
+ visible=True)
968
+ user_prompt = gr.Textbox(label="Modify Prompt (Prefixed to your message every time)", lines=5,
969
+ value="", visible=True)
970
 
971
  with gr.Row():
972
  chatbots = []
 
974
  api_keys = []
975
  temperatures = []
976
  regenerate_buttons = []
977
+ token_count_displays = []
978
  for i in range(3):
979
  with gr.Column():
980
  gr.Markdown(f"### Chat Window {i + 1}")
981
+ # Refactored API selection dropdown
982
+ api_endpoint = gr.Dropdown(
983
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
984
+ value=default_value,
985
+ label="API for Chat Interaction (Optional)"
986
+ )
987
  api_key = gr.Textbox(label=f"API Key {i + 1} (if required)", type="password")
988
  temperature = gr.Slider(label=f"Temperature {i + 1}", minimum=0.0, maximum=1.0, step=0.05,
989
  value=0.7)
990
  chatbot = gr.Chatbot(height=800, elem_classes="chat-window")
991
+ token_count_display = gr.Number(label=f"Approximate Token Count {i + 1}", value=0,
992
+ interactive=False)
993
+ token_count_displays.append(token_count_display)
994
  regenerate_button = gr.Button(f"Regenerate Last Message {i + 1}")
995
  chatbots.append(chatbot)
996
  api_endpoints.append(api_endpoint)
 
1016
  outputs=[items_output, item_mapping]
1017
  )
1018
 
1019
+ def update_prompts(preset_name):
1020
+ prompts = update_user_prompt(preset_name)
1021
+ return (
1022
+ gr.update(value=prompts["user_prompt"], visible=True),
1023
+ gr.update(value=prompts["system_prompt"], visible=True)
1024
+ )
1025
+
1026
+ def on_custom_prompt_checkbox_change(is_checked):
1027
+ return (
1028
+ gr.update(visible=is_checked),
1029
+ gr.update(visible=is_checked)
1030
+ )
1031
+
1032
+ custom_prompt_checkbox.change(
1033
+ fn=on_custom_prompt_checkbox_change,
1034
+ inputs=[custom_prompt_checkbox],
1035
+ outputs=[user_prompt, system_prompt]
1036
+ )
1037
+
1038
+ def clear_all_chats():
1039
+ return [[]] * 3 + [[]] * 3 + [0] * 3
1040
+
1041
+ clear_chat_button.click(
1042
+ clear_all_chats,
1043
+ outputs=chatbots + chat_history + token_count_displays
1044
+ )
1045
+
1046
+ def on_preset_prompt_checkbox_change(is_checked):
1047
+ if is_checked:
1048
+ prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
1049
+ page_display_text = f"Page {current_page} of {total_pages}"
1050
+ return (
1051
+ gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
1052
+ gr.update(visible=True), # prev_page_button
1053
+ gr.update(visible=True), # next_page_button
1054
+ gr.update(value=page_display_text, visible=True), # page_display
1055
+ current_page, # current_page_state
1056
+ total_pages # total_pages_state
1057
+ )
1058
+ else:
1059
+ return (
1060
+ gr.update(visible=False, interactive=False), # preset_prompt
1061
+ gr.update(visible=False), # prev_page_button
1062
+ gr.update(visible=False), # next_page_button
1063
+ gr.update(visible=False), # page_display
1064
+ 1, # current_page_state
1065
+ 1 # total_pages_state
1066
+ )
1067
+
1068
  preset_prompt.change(update_user_prompt, inputs=preset_prompt, outputs=user_prompt)
1069
 
1070
+ preset_prompt_checkbox.change(
1071
+ fn=on_preset_prompt_checkbox_change,
1072
+ inputs=[preset_prompt_checkbox],
1073
+ outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state,
1074
+ total_pages_state]
1075
+ )
1076
+
1077
+ def on_prev_page_click(current_page, total_pages):
1078
+ new_page = max(current_page - 1, 1)
1079
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
1080
+ page_display_text = f"Page {current_page} of {total_pages}"
1081
+ return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
1082
+
1083
+ prev_page_button.click(
1084
+ fn=on_prev_page_click,
1085
+ inputs=[current_page_state, total_pages_state],
1086
+ outputs=[preset_prompt, page_display, current_page_state]
1087
+ )
1088
+
1089
+ def on_next_page_click(current_page, total_pages):
1090
+ new_page = min(current_page + 1, total_pages)
1091
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
1092
+ page_display_text = f"Page {current_page} of {total_pages}"
1093
+ return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
1094
+
1095
+ next_page_button.click(
1096
+ fn=on_next_page_click,
1097
+ inputs=[current_page_state, total_pages_state],
1098
+ outputs=[preset_prompt, page_display, current_page_state]
1099
+ )
1100
+
1101
+ # Update prompts when a preset is selected
1102
+ preset_prompt.change(
1103
+ update_prompts,
1104
+ inputs=[preset_prompt],
1105
+ outputs=[user_prompt, system_prompt]
1106
+ )
1107
 
1108
  def clear_all_chats():
1109
+ return [[]] * 3 + [[]] * 3 + [0] * 3
1110
 
1111
  clear_chat_button.click(
1112
  clear_all_chats,
1113
+ outputs=chatbots + chat_history + token_count_displays
1114
  )
1115
+
1116
  def chat_wrapper_multi(message, custom_prompt, system_prompt, *args):
1117
  chat_histories = args[:3]
1118
  chatbots = args[3:6]
 
1142
 
1143
  return [gr.update(value="")] + new_chatbots + new_chat_histories
1144
 
1145
+ def update_token_counts(*histories):
1146
+ token_counts = []
1147
+ for history in histories:
1148
+ token_counts.append(approximate_token_count(history))
1149
+ return token_counts
1150
 
1151
  def regenerate_last_message(chat_history, chatbot, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature, system_prompt):
1152
  if not chat_history:
 
1183
  for i in range(3):
1184
  regenerate_buttons[i].click(
1185
  regenerate_last_message,
1186
+ inputs=[chat_history[i], chatbots[i], media_content, selected_parts, api_endpoints[i], api_keys[i],
1187
+ user_prompt, temperatures[i], system_prompt],
1188
  outputs=[chatbots[i], chat_history[i], gr.Textbox(label=f"Regenerate Status {i + 1}")]
1189
+ ).then(
1190
+ lambda history: approximate_token_count(history),
1191
+ inputs=[chat_history[i]],
1192
+ outputs=[token_count_displays[i]]
1193
  )
1194
 
1195
  # In the create_chat_interface_multi_api function:
 
1202
  ).then(
1203
  lambda: (gr.update(value=""), gr.update(value="")),
1204
  outputs=[msg, user_prompt]
1205
+ ).then(
1206
+ update_token_counts,
1207
+ inputs=chat_history,
1208
+ outputs=token_count_displays
1209
  )
1210
 
1211
  items_output.change(
 
1222
  )
1223
 
1224
 
 
1225
  def create_chat_interface_four():
1226
+ try:
1227
+ default_value = None
1228
+ if default_api_endpoint:
1229
+ if default_api_endpoint in global_api_endpoints:
1230
+ default_value = format_api_name(default_api_endpoint)
1231
+ else:
1232
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
1233
+ except Exception as e:
1234
+ logging.error(f"Error setting default API endpoint: {str(e)}")
1235
+ default_value = None
1236
  custom_css = """
1237
  .chatbot-container .message-wrap .message {
1238
  font-size: 14px !important;
 
1246
  with gr.TabItem("Four Independent API Chats", visible=True):
1247
  gr.Markdown("# Four Independent API Chat Interfaces")
1248
 
1249
+ # Initialize prompts during component creation
1250
+ prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
1251
+ current_page_state = gr.State(value=current_page)
1252
+ total_pages_state = gr.State(value=total_pages)
1253
+ page_display_text = f"Page {current_page} of {total_pages}"
1254
+
1255
  with gr.Row():
1256
  with gr.Column():
1257
  preset_prompt = gr.Dropdown(
1258
+ label="Select Preset Prompt (This will be prefixed to your messages, recommend copy/pasting and then clearing the User Prompt box)",
1259
+ choices=prompts,
1260
  visible=True
1261
  )
1262
+ prev_page_button = gr.Button("Previous Page", visible=True)
1263
+ page_display = gr.Markdown(page_display_text, visible=True)
1264
+ next_page_button = gr.Button("Next Page", visible=True)
1265
  user_prompt = gr.Textbox(
1266
+ label="Modify User Prompt",
1267
  lines=3
1268
  )
1269
+ system_prompt = gr.Textbox(
1270
+ label="System Prompt",
1271
+ value="You are a helpful AI assistant.",
1272
+ lines=3
1273
+ )
1274
+
1275
  with gr.Column():
1276
  gr.Markdown("Scroll down for the chat windows...")
1277
 
 
1280
  def create_single_chat_interface(index, user_prompt_component):
1281
  with gr.Column():
1282
  gr.Markdown(f"### Chat Window {index + 1}")
1283
+ # Refactored API selection dropdown
1284
  api_endpoint = gr.Dropdown(
1285
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
1286
+ value=default_value,
1287
+ label="API for Chat Interaction (Optional)"
 
 
 
1288
  )
1289
  api_key = gr.Textbox(
1290
  label=f"API Key {index + 1} (if required)",
 
1301
  msg = gr.Textbox(label=f"Enter your message for Chat {index + 1}")
1302
  submit = gr.Button(f"Submit to Chat {index + 1}")
1303
  regenerate_button = gr.Button(f"Regenerate Last Message {index + 1}")
1304
+ token_count_display = gr.Number(label=f"Approximate Token Count {index + 1}", value=0,
1305
+ interactive=False)
1306
  clear_chat_button = gr.Button(f"Clear Chat {index + 1}")
1307
 
1308
  # State to maintain chat history
 
1318
  'submit': submit,
1319
  'regenerate_button': regenerate_button,
1320
  'clear_chat_button': clear_chat_button,
1321
+ 'chat_history': chat_history,
1322
+ 'token_count_display': token_count_display
1323
  })
1324
 
1325
  # Create four chat interfaces arranged in a 2x2 grid
 
1330
  create_single_chat_interface(i * 2 + j, user_prompt)
1331
 
1332
  # Update user_prompt based on preset_prompt selection
1333
+ def update_prompts(preset_name):
1334
+ prompts = update_user_prompt(preset_name)
1335
+ return gr.update(value=prompts["user_prompt"]), gr.update(value=prompts["system_prompt"])
1336
+
1337
  preset_prompt.change(
1338
+ fn=update_prompts,
1339
+ inputs=[preset_prompt],
1340
+ outputs=[user_prompt, system_prompt]
1341
+ )
1342
+
1343
+ # Pagination button functions
1344
+ def on_prev_page_click(current_page, total_pages):
1345
+ new_page = max(current_page - 1, 1)
1346
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
1347
+ page_display_text = f"Page {current_page} of {total_pages}"
1348
+ return (
1349
+ gr.update(choices=prompts),
1350
+ gr.update(value=page_display_text),
1351
+ current_page
1352
+ )
1353
+
1354
+ prev_page_button.click(
1355
+ fn=on_prev_page_click,
1356
+ inputs=[current_page_state, total_pages_state],
1357
+ outputs=[preset_prompt, page_display, current_page_state]
1358
+ )
1359
+
1360
+ def on_next_page_click(current_page, total_pages):
1361
+ new_page = min(current_page + 1, total_pages)
1362
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
1363
+ page_display_text = f"Page {current_page} of {total_pages}"
1364
+ return (
1365
+ gr.update(choices=prompts),
1366
+ gr.update(value=page_display_text),
1367
+ current_page
1368
+ )
1369
+
1370
+ next_page_button.click(
1371
+ fn=on_next_page_click,
1372
+ inputs=[current_page_state, total_pages_state],
1373
+ outputs=[preset_prompt, page_display, current_page_state]
1374
  )
1375
 
1376
  def chat_wrapper_single(message, chat_history, api_endpoint, api_key, temperature, user_prompt):
 
1450
  interface['chatbot'],
1451
  interface['chat_history']
1452
  ]
1453
+ ).then(
1454
+ lambda history: approximate_token_count(history),
1455
+ inputs=[interface['chat_history']],
1456
+ outputs=[interface['token_count_display']]
1457
  )
1458
 
1459
  interface['regenerate_button'].click(
 
1470
  interface['chat_history'],
1471
  gr.Textbox(label="Regenerate Status")
1472
  ]
1473
+ ).then(
1474
+ lambda history: approximate_token_count(history),
1475
+ inputs=[interface['chat_history']],
1476
+ outputs=[interface['token_count_display']]
1477
  )
1478
 
1479
+ def clear_chat_single():
1480
+ return [], [], 0
1481
+
1482
  interface['clear_chat_button'].click(
1483
  clear_chat_single,
1484
+ outputs=[interface['chatbot'], interface['chat_history'], interface['token_count_display']]
 
1485
  )
1486
 
1487
 
 
1500
 
1501
  return new_msg, updated_chatbot, new_history, new_conv_id
1502
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1503
  # Mock function to simulate LLM processing
1504
  def process_with_llm(workflow, context, prompt, api_endpoint, api_key):
1505
  api_key_snippet = api_key[:5] + "..." if api_key else "Not provided"
1506
  return f"LLM output using {api_endpoint} (API Key: {api_key_snippet}) for {workflow} with context: {context[:30]}... and prompt: {prompt[:30]}..."
1507
 
 
1508
  #
1509
  # End of Chat_ui.py
1510
  #######################################################################################################################
App_Function_Libraries/Gradio_UI/Embeddings_tab.py CHANGED
@@ -4,6 +4,7 @@
4
  # Imports
5
  import json
6
  import logging
 
7
  #
8
  # External Imports
9
  import gradio as gr
@@ -11,26 +12,58 @@ import numpy as np
11
  from tqdm import tqdm
12
  #
13
  # Local Imports
14
- from App_Function_Libraries.DB.DB_Manager import get_all_content_from_database
 
 
15
  from App_Function_Libraries.RAG.ChromaDB_Library import chroma_client, \
16
  store_in_chroma, situate_context
17
  from App_Function_Libraries.RAG.Embeddings_Create import create_embedding, create_embeddings_batch
18
  from App_Function_Libraries.Chunk_Lib import improved_chunking_process, chunk_for_embedding
 
 
 
19
  #
20
  ########################################################################################################################
21
  #
22
  # Functions:
23
 
24
  def create_embeddings_tab():
 
 
 
 
 
 
 
 
 
 
 
 
25
  with gr.TabItem("Create Embeddings", visible=True):
26
  gr.Markdown("# Create Embeddings for All Content")
27
 
28
  with gr.Row():
29
  with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  embedding_provider = gr.Radio(
31
  choices=["huggingface", "local", "openai"],
32
  label="Select Embedding Provider",
33
- value="huggingface"
34
  )
35
  gr.Markdown("Note: Local provider requires a running Llama.cpp/llamafile server.")
36
  gr.Markdown("OpenAI provider requires a valid API key.")
@@ -65,22 +98,24 @@ def create_embeddings_tab():
65
 
66
  embedding_api_url = gr.Textbox(
67
  label="API URL (for local provider)",
68
- value="http://localhost:8080/embedding",
69
  visible=False
70
  )
71
 
72
- # Add chunking options
73
  chunking_method = gr.Dropdown(
74
  choices=["words", "sentences", "paragraphs", "tokens", "semantic"],
75
  label="Chunking Method",
76
  value="words"
77
  )
78
  max_chunk_size = gr.Slider(
79
- minimum=1, maximum=8000, step=1, value=500,
 
80
  label="Max Chunk Size"
81
  )
82
  chunk_overlap = gr.Slider(
83
- minimum=0, maximum=4000, step=1, value=200,
 
84
  label="Chunk Overlap"
85
  )
86
  adaptive_chunking = gr.Checkbox(
@@ -92,6 +127,7 @@ def create_embeddings_tab():
92
 
93
  with gr.Column():
94
  status_output = gr.Textbox(label="Status", lines=10)
 
95
 
96
  def update_provider_options(provider):
97
  if provider == "huggingface":
@@ -107,23 +143,54 @@ def create_embeddings_tab():
107
  else:
108
  return gr.update(visible=False)
109
 
110
- embedding_provider.change(
111
- fn=update_provider_options,
112
- inputs=[embedding_provider],
113
- outputs=[huggingface_model, openai_model, custom_embedding_model, embedding_api_url]
114
- )
115
-
116
- huggingface_model.change(
117
- fn=update_huggingface_options,
118
- inputs=[huggingface_model],
119
- outputs=[custom_embedding_model]
120
- )
121
 
122
- def create_all_embeddings(provider, hf_model, openai_model, custom_model, api_url, method, max_size, overlap, adaptive):
 
123
  try:
124
- all_content = get_all_content_from_database()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  if not all_content:
126
- return "No content found in the database."
127
 
128
  chunk_options = {
129
  'method': method,
@@ -132,7 +199,7 @@ def create_embeddings_tab():
132
  'adaptive': adaptive
133
  }
134
 
135
- collection_name = "all_content_embeddings"
136
  collection = chroma_client.get_or_create_collection(name=collection_name)
137
 
138
  # Determine the model to use
@@ -141,55 +208,113 @@ def create_embeddings_tab():
141
  elif provider == "openai":
142
  model = openai_model
143
  else:
144
- model = custom_model
 
 
 
 
145
 
146
- for item in all_content:
147
- media_id = item['id']
148
  text = item['content']
149
 
150
  chunks = improved_chunking_process(text, chunk_options)
151
- for i, chunk in enumerate(chunks):
152
  chunk_text = chunk['text']
153
- chunk_id = f"doc_{media_id}_chunk_{i}"
154
-
155
- existing = collection.get(ids=[chunk_id])
156
- if existing['ids']:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  continue
158
 
159
- embedding = create_embedding(chunk_text, provider, model, api_url)
160
- metadata = {
161
- "media_id": str(media_id),
162
- "chunk_index": i,
163
- "total_chunks": len(chunks),
164
- "chunking_method": method,
165
- "max_chunk_size": max_size,
166
- "chunk_overlap": overlap,
167
- "adaptive_chunking": adaptive,
168
- "embedding_model": model,
169
- "embedding_provider": provider,
170
- **chunk['metadata']
171
- }
172
- store_in_chroma(collection_name, [chunk_text], [embedding], [chunk_id], [metadata])
173
-
174
- return "Embeddings created and stored successfully for all content."
175
  except Exception as e:
176
  logging.error(f"Error during embedding creation: {str(e)}")
177
  return f"Error: {str(e)}"
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  create_button.click(
180
  fn=create_all_embeddings,
181
- inputs=[embedding_provider, huggingface_model, openai_model, custom_embedding_model, embedding_api_url,
182
- chunking_method, max_chunk_size, chunk_overlap, adaptive_chunking],
 
 
 
183
  outputs=status_output
184
  )
185
 
186
 
187
  def create_view_embeddings_tab():
 
 
 
 
 
 
 
 
 
 
 
 
188
  with gr.TabItem("View/Update Embeddings", visible=True):
189
  gr.Markdown("# View and Update Embeddings")
190
- item_mapping = gr.State({})
 
 
191
  with gr.Row():
192
  with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  item_dropdown = gr.Dropdown(label="Select Item", choices=[], interactive=True)
194
  refresh_button = gr.Button("Refresh Item List")
195
  embedding_status = gr.Textbox(label="Embedding Status", interactive=False)
@@ -236,9 +361,10 @@ def create_view_embeddings_tab():
236
 
237
  embedding_api_url = gr.Textbox(
238
  label="API URL (for local provider)",
239
- value="http://localhost:8080/embedding",
240
  visible=False
241
  )
 
242
  chunking_method = gr.Dropdown(
243
  choices=["words", "sentences", "paragraphs", "tokens", "semantic"],
244
  label="Chunking Method",
@@ -267,15 +393,45 @@ def create_view_embeddings_tab():
267
  )
268
  contextual_api_key = gr.Textbox(label="API Key", lines=1)
269
 
270
- def get_items_with_embedding_status():
 
 
 
 
 
 
 
 
 
 
271
  try:
272
- items = get_all_content_from_database()
273
- collection = chroma_client.get_or_create_collection(name="all_content_embeddings")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  choices = []
275
  new_item_mapping = {}
276
  for item in items:
277
  try:
278
- result = collection.get(ids=[f"doc_{item['id']}_chunk_0"])
 
279
  embedding_exists = result is not None and result.get('ids') and len(result['ids']) > 0
280
  status = "Embedding exists" if embedding_exists else "No embedding"
281
  except Exception as e:
@@ -303,40 +459,62 @@ def create_view_embeddings_tab():
303
  else:
304
  return gr.update(visible=False)
305
 
306
- def check_embedding_status(selected_item, item_mapping):
307
  if not selected_item:
308
  return "Please select an item", "", ""
309
 
 
 
 
 
 
 
 
310
  try:
311
  item_id = item_mapping.get(selected_item)
312
  if item_id is None:
313
  return f"Invalid item selected: {selected_item}", "", ""
314
 
315
  item_title = selected_item.rsplit(' (', 1)[0]
316
- collection = chroma_client.get_or_create_collection(name="all_content_embeddings")
 
 
 
 
 
 
 
 
317
 
318
- result = collection.get(ids=[f"doc_{item_id}_chunk_0"], include=["embeddings", "metadatas"])
319
- logging.info(f"ChromaDB result for item '{item_title}' (ID: {item_id}): {result}")
 
320
 
321
- if not result['ids']:
 
322
  return f"No embedding found for item '{item_title}' (ID: {item_id})", "", ""
323
 
324
- if not result['embeddings'] or not result['embeddings'][0]:
 
325
  return f"Embedding data missing for item '{item_title}' (ID: {item_id})", "", ""
326
 
327
  embedding = result['embeddings'][0]
328
- metadata = result['metadatas'][0] if result['metadatas'] else {}
329
  embedding_preview = str(embedding[:50])
330
  status = f"Embedding exists for item '{item_title}' (ID: {item_id})"
331
  return status, f"First 50 elements of embedding:\n{embedding_preview}", json.dumps(metadata, indent=2)
332
 
333
  except Exception as e:
334
- logging.error(f"Error in check_embedding_status: {str(e)}")
335
  return f"Error processing item: {selected_item}. Details: {str(e)}", "", ""
336
 
337
- def create_new_embedding_for_item(selected_item, provider, hf_model, openai_model, custom_model, api_url,
338
- method, max_size, overlap, adaptive,
339
- item_mapping, use_contextual, contextual_api_choice=None):
 
 
 
 
340
  if not selected_item:
341
  return "Please select an item", "", ""
342
 
@@ -345,8 +523,26 @@ def create_view_embeddings_tab():
345
  if item_id is None:
346
  return f"Invalid item selected: {selected_item}", "", ""
347
 
348
- items = get_all_content_from_database()
349
- item = next((item for item in items if item['id'] == item_id), None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  if not item:
351
  return f"Item not found: {item_id}", "", ""
352
 
@@ -359,11 +555,11 @@ def create_view_embeddings_tab():
359
 
360
  logging.info(f"Chunking content for item: {item['title']} (ID: {item_id})")
361
  chunks = chunk_for_embedding(item['content'], item['title'], chunk_options)
362
- collection_name = "all_content_embeddings"
363
  collection = chroma_client.get_or_create_collection(name=collection_name)
364
 
365
  # Delete existing embeddings for this item
366
- existing_ids = [f"doc_{item_id}_chunk_{i}" for i in range(len(chunks))]
367
  collection.delete(ids=existing_ids)
368
  logging.info(f"Deleted {len(existing_ids)} existing embeddings for item {item_id}")
369
 
@@ -381,7 +577,7 @@ def create_view_embeddings_tab():
381
  contextualized_text = chunk_text
382
  context = None
383
 
384
- chunk_id = f"doc_{item_id}_chunk_{i}"
385
 
386
  # Determine the model to use
387
  if provider == "huggingface":
@@ -392,7 +588,7 @@ def create_view_embeddings_tab():
392
  model = custom_model
393
 
394
  metadata = {
395
- "media_id": str(item_id),
396
  "chunk_index": i,
397
  "total_chunks": len(chunks),
398
  "chunking_method": method,
@@ -441,15 +637,25 @@ def create_view_embeddings_tab():
441
  logging.error(f"Error in create_new_embedding_for_item: {str(e)}", exc_info=True)
442
  return f"Error creating embedding: {str(e)}", "", ""
443
 
 
 
 
 
 
 
 
444
  refresh_button.click(
445
  get_items_with_embedding_status,
 
446
  outputs=[item_dropdown, item_mapping]
447
  )
 
448
  item_dropdown.change(
449
  check_embedding_status,
450
- inputs=[item_dropdown, item_mapping],
451
  outputs=[embedding_status, embedding_preview, embedding_metadata]
452
  )
 
453
  create_new_embedding_button.click(
454
  create_new_embedding_for_item,
455
  inputs=[item_dropdown, embedding_provider, huggingface_model, openai_model, custom_embedding_model, embedding_api_url,
@@ -469,9 +675,10 @@ def create_view_embeddings_tab():
469
  )
470
 
471
  return (item_dropdown, refresh_button, embedding_status, embedding_preview, embedding_metadata,
472
- create_new_embedding_button, embedding_provider, huggingface_model, openai_model, custom_embedding_model, embedding_api_url,
473
- chunking_method, max_chunk_size, chunk_overlap, adaptive_chunking,
474
- use_contextual_embeddings, contextual_api_choice, contextual_api_key)
 
475
 
476
 
477
  def create_purge_embeddings_tab():
 
4
  # Imports
5
  import json
6
  import logging
7
+ import os
8
  #
9
  # External Imports
10
  import gradio as gr
 
12
  from tqdm import tqdm
13
  #
14
  # Local Imports
15
+ from App_Function_Libraries.DB.DB_Manager import get_all_content_from_database, get_all_conversations, \
16
+ get_conversation_text, get_note_by_id
17
+ from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_all_notes
18
  from App_Function_Libraries.RAG.ChromaDB_Library import chroma_client, \
19
  store_in_chroma, situate_context
20
  from App_Function_Libraries.RAG.Embeddings_Create import create_embedding, create_embeddings_batch
21
  from App_Function_Libraries.Chunk_Lib import improved_chunking_process, chunk_for_embedding
22
+ from App_Function_Libraries.Utils.Utils import load_and_log_configs
23
+
24
+
25
  #
26
  ########################################################################################################################
27
  #
28
  # Functions:
29
 
30
  def create_embeddings_tab():
31
+ # Load configuration first
32
+ config = load_and_log_configs()
33
+ if not config:
34
+ raise ValueError("Could not load configuration")
35
+
36
+ # Get database paths from config
37
+ db_config = config['db_config']
38
+ media_db_path = db_config['sqlite_path']
39
+ rag_qa_db_path = os.path.join(os.path.dirname(media_db_path), "rag_qa.db")
40
+ character_chat_db_path = os.path.join(os.path.dirname(media_db_path), "chatDB.db")
41
+ chroma_db_path = db_config['chroma_db_path']
42
+
43
  with gr.TabItem("Create Embeddings", visible=True):
44
  gr.Markdown("# Create Embeddings for All Content")
45
 
46
  with gr.Row():
47
  with gr.Column():
48
+ # Database selection at the top
49
+ database_selection = gr.Radio(
50
+ choices=["Media DB", "RAG Chat", "Character Chat"],
51
+ label="Select Content Source",
52
+ value="Media DB",
53
+ info="Choose which database to create embeddings from"
54
+ )
55
+
56
+ # Add database path display
57
+ current_db_path = gr.Textbox(
58
+ label="Current Database Path",
59
+ value=media_db_path,
60
+ interactive=False
61
+ )
62
+
63
  embedding_provider = gr.Radio(
64
  choices=["huggingface", "local", "openai"],
65
  label="Select Embedding Provider",
66
+ value=config['embedding_config']['embedding_provider'] or "huggingface"
67
  )
68
  gr.Markdown("Note: Local provider requires a running Llama.cpp/llamafile server.")
69
  gr.Markdown("OpenAI provider requires a valid API key.")
 
98
 
99
  embedding_api_url = gr.Textbox(
100
  label="API URL (for local provider)",
101
+ value=config['embedding_config']['embedding_api_url'],
102
  visible=False
103
  )
104
 
105
+ # Add chunking options with config defaults
106
  chunking_method = gr.Dropdown(
107
  choices=["words", "sentences", "paragraphs", "tokens", "semantic"],
108
  label="Chunking Method",
109
  value="words"
110
  )
111
  max_chunk_size = gr.Slider(
112
+ minimum=1, maximum=8000, step=1,
113
+ value=config['embedding_config']['chunk_size'],
114
  label="Max Chunk Size"
115
  )
116
  chunk_overlap = gr.Slider(
117
+ minimum=0, maximum=4000, step=1,
118
+ value=config['embedding_config']['overlap'],
119
  label="Chunk Overlap"
120
  )
121
  adaptive_chunking = gr.Checkbox(
 
127
 
128
  with gr.Column():
129
  status_output = gr.Textbox(label="Status", lines=10)
130
+ progress = gr.Progress()
131
 
132
  def update_provider_options(provider):
133
  if provider == "huggingface":
 
143
  else:
144
  return gr.update(visible=False)
145
 
146
+ def update_database_path(database_type):
147
+ if database_type == "Media DB":
148
+ return media_db_path
149
+ elif database_type == "RAG Chat":
150
+ return rag_qa_db_path
151
+ else: # Character Chat
152
+ return character_chat_db_path
 
 
 
 
153
 
154
+ def create_all_embeddings(provider, hf_model, openai_model, custom_model, api_url, method,
155
+ max_size, overlap, adaptive, database_type, progress=gr.Progress()):
156
  try:
157
+ # Initialize content based on database selection
158
+ if database_type == "Media DB":
159
+ all_content = get_all_content_from_database()
160
+ content_type = "media"
161
+ elif database_type == "RAG Chat":
162
+ all_content = []
163
+ page = 1
164
+ while True:
165
+ conversations, total_pages, _ = get_all_conversations(page=page)
166
+ if not conversations:
167
+ break
168
+ all_content.extend([{
169
+ 'id': conv['conversation_id'],
170
+ 'content': get_conversation_text(conv['conversation_id']),
171
+ 'title': conv['title'],
172
+ 'type': 'conversation'
173
+ } for conv in conversations])
174
+ progress(page / total_pages, desc=f"Loading conversations... Page {page}/{total_pages}")
175
+ page += 1
176
+ else: # Character Chat
177
+ all_content = []
178
+ page = 1
179
+ while True:
180
+ notes, total_pages, _ = get_all_notes(page=page)
181
+ if not notes:
182
+ break
183
+ all_content.extend([{
184
+ 'id': note['id'],
185
+ 'content': f"{note['title']}\n\n{note['content']}",
186
+ 'conversation_id': note['conversation_id'],
187
+ 'type': 'note'
188
+ } for note in notes])
189
+ progress(page / total_pages, desc=f"Loading notes... Page {page}/{total_pages}")
190
+ page += 1
191
+
192
  if not all_content:
193
+ return "No content found in the selected database."
194
 
195
  chunk_options = {
196
  'method': method,
 
199
  'adaptive': adaptive
200
  }
201
 
202
+ collection_name = f"{database_type.lower().replace(' ', '_')}_embeddings"
203
  collection = chroma_client.get_or_create_collection(name=collection_name)
204
 
205
  # Determine the model to use
 
208
  elif provider == "openai":
209
  model = openai_model
210
  else:
211
+ model = api_url
212
+
213
+ total_items = len(all_content)
214
+ for idx, item in enumerate(all_content):
215
+ progress((idx + 1) / total_items, desc=f"Processing item {idx + 1} of {total_items}")
216
 
217
+ content_id = item['id']
 
218
  text = item['content']
219
 
220
  chunks = improved_chunking_process(text, chunk_options)
221
+ for chunk_idx, chunk in enumerate(chunks):
222
  chunk_text = chunk['text']
223
+ chunk_id = f"{database_type.lower()}_{content_id}_chunk_{chunk_idx}"
224
+
225
+ try:
226
+ embedding = create_embedding(chunk_text, provider, model, api_url)
227
+ metadata = {
228
+ 'content_id': str(content_id),
229
+ 'chunk_index': int(chunk_idx),
230
+ 'total_chunks': int(len(chunks)),
231
+ 'chunking_method': method,
232
+ 'max_chunk_size': int(max_size),
233
+ 'chunk_overlap': int(overlap),
234
+ 'adaptive_chunking': bool(adaptive),
235
+ 'embedding_model': model,
236
+ 'embedding_provider': provider,
237
+ 'content_type': item.get('type', 'media'),
238
+ 'conversation_id': item.get('conversation_id'),
239
+ **{k: (int(v) if isinstance(v, str) and v.isdigit() else v)
240
+ for k, v in chunk['metadata'].items()}
241
+ }
242
+ store_in_chroma(collection_name, [chunk_text], [embedding], [chunk_id], [metadata])
243
+
244
+ except Exception as e:
245
+ logging.error(f"Error processing chunk {chunk_id}: {str(e)}")
246
  continue
247
 
248
+ return f"Embeddings created and stored successfully for all {database_type} content."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  except Exception as e:
250
  logging.error(f"Error during embedding creation: {str(e)}")
251
  return f"Error: {str(e)}"
252
 
253
+ # Event handlers
254
+ embedding_provider.change(
255
+ fn=update_provider_options,
256
+ inputs=[embedding_provider],
257
+ outputs=[huggingface_model, openai_model, custom_embedding_model, embedding_api_url]
258
+ )
259
+
260
+ huggingface_model.change(
261
+ fn=update_huggingface_options,
262
+ inputs=[huggingface_model],
263
+ outputs=[custom_embedding_model]
264
+ )
265
+
266
+ database_selection.change(
267
+ fn=update_database_path,
268
+ inputs=[database_selection],
269
+ outputs=[current_db_path]
270
+ )
271
+
272
  create_button.click(
273
  fn=create_all_embeddings,
274
+ inputs=[
275
+ embedding_provider, huggingface_model, openai_model, custom_embedding_model,
276
+ embedding_api_url, chunking_method, max_chunk_size, chunk_overlap,
277
+ adaptive_chunking, database_selection
278
+ ],
279
  outputs=status_output
280
  )
281
 
282
 
283
  def create_view_embeddings_tab():
284
+ # Load configuration first
285
+ config = load_and_log_configs()
286
+ if not config:
287
+ raise ValueError("Could not load configuration")
288
+
289
+ # Get database paths from config
290
+ db_config = config['db_config']
291
+ media_db_path = db_config['sqlite_path']
292
+ rag_qa_db_path = os.path.join(os.path.dirname(media_db_path), "rag_chat.db")
293
+ character_chat_db_path = os.path.join(os.path.dirname(media_db_path), "character_chat.db")
294
+ chroma_db_path = db_config['chroma_db_path']
295
+
296
  with gr.TabItem("View/Update Embeddings", visible=True):
297
  gr.Markdown("# View and Update Embeddings")
298
+ # Initialize item_mapping as a Gradio State
299
+
300
+
301
  with gr.Row():
302
  with gr.Column():
303
+ # Add database selection
304
+ database_selection = gr.Radio(
305
+ choices=["Media DB", "RAG Chat", "Character Chat"],
306
+ label="Select Content Source",
307
+ value="Media DB",
308
+ info="Choose which database to view embeddings from"
309
+ )
310
+
311
+ # Add database path display
312
+ current_db_path = gr.Textbox(
313
+ label="Current Database Path",
314
+ value=media_db_path,
315
+ interactive=False
316
+ )
317
+
318
  item_dropdown = gr.Dropdown(label="Select Item", choices=[], interactive=True)
319
  refresh_button = gr.Button("Refresh Item List")
320
  embedding_status = gr.Textbox(label="Embedding Status", interactive=False)
 
361
 
362
  embedding_api_url = gr.Textbox(
363
  label="API URL (for local provider)",
364
+ value=config['embedding_config']['embedding_api_url'],
365
  visible=False
366
  )
367
+
368
  chunking_method = gr.Dropdown(
369
  choices=["words", "sentences", "paragraphs", "tokens", "semantic"],
370
  label="Chunking Method",
 
393
  )
394
  contextual_api_key = gr.Textbox(label="API Key", lines=1)
395
 
396
+ item_mapping = gr.State(value={})
397
+
398
+ def update_database_path(database_type):
399
+ if database_type == "Media DB":
400
+ return media_db_path
401
+ elif database_type == "RAG Chat":
402
+ return rag_qa_db_path
403
+ else: # Character Chat
404
+ return character_chat_db_path
405
+
406
+ def get_items_with_embedding_status(database_type):
407
  try:
408
+ # Get items based on database selection
409
+ if database_type == "Media DB":
410
+ items = get_all_content_from_database()
411
+ elif database_type == "RAG Chat":
412
+ conversations, _, _ = get_all_conversations(page=1)
413
+ items = [{
414
+ 'id': conv['conversation_id'],
415
+ 'title': conv['title'],
416
+ 'type': 'conversation'
417
+ } for conv in conversations]
418
+ else: # Character Chat
419
+ notes, _, _ = get_all_notes(page=1)
420
+ items = [{
421
+ 'id': note['id'],
422
+ 'title': note['title'],
423
+ 'type': 'note'
424
+ } for note in notes]
425
+
426
+ collection_name = f"{database_type.lower().replace(' ', '_')}_embeddings"
427
+ collection = chroma_client.get_or_create_collection(name=collection_name)
428
+
429
  choices = []
430
  new_item_mapping = {}
431
  for item in items:
432
  try:
433
+ chunk_id = f"{database_type.lower()}_{item['id']}_chunk_0"
434
+ result = collection.get(ids=[chunk_id])
435
  embedding_exists = result is not None and result.get('ids') and len(result['ids']) > 0
436
  status = "Embedding exists" if embedding_exists else "No embedding"
437
  except Exception as e:
 
459
  else:
460
  return gr.update(visible=False)
461
 
462
+ def check_embedding_status(selected_item, database_type, item_mapping):
463
  if not selected_item:
464
  return "Please select an item", "", ""
465
 
466
+ if item_mapping is None:
467
+ # If mapping is None, try to refresh it
468
+ try:
469
+ _, item_mapping = get_items_with_embedding_status(database_type)
470
+ except Exception as e:
471
+ return f"Error initializing item mapping: {str(e)}", "", ""
472
+
473
  try:
474
  item_id = item_mapping.get(selected_item)
475
  if item_id is None:
476
  return f"Invalid item selected: {selected_item}", "", ""
477
 
478
  item_title = selected_item.rsplit(' (', 1)[0]
479
+ collection_name = f"{database_type.lower().replace(' ', '_')}_embeddings"
480
+ collection = chroma_client.get_or_create_collection(name=collection_name)
481
+ chunk_id = f"{database_type.lower()}_{item_id}_chunk_0"
482
+
483
+ try:
484
+ result = collection.get(ids=[chunk_id], include=["embeddings", "metadatas"])
485
+ except Exception as e:
486
+ logging.error(f"ChromaDB get error: {str(e)}")
487
+ return f"Error retrieving embedding for '{item_title}': {str(e)}", "", ""
488
 
489
+ # Check if result exists and has the expected structure
490
+ if not result or not isinstance(result, dict):
491
+ return f"No embedding found for item '{item_title}' (ID: {item_id})", "", ""
492
 
493
+ # Check if we have any results
494
+ if not result.get('ids') or len(result['ids']) == 0:
495
  return f"No embedding found for item '{item_title}' (ID: {item_id})", "", ""
496
 
497
+ # Check if embeddings exist
498
+ if not result.get('embeddings') or not result['embeddings'][0]:
499
  return f"Embedding data missing for item '{item_title}' (ID: {item_id})", "", ""
500
 
501
  embedding = result['embeddings'][0]
502
+ metadata = result.get('metadatas', [{}])[0] if result.get('metadatas') else {}
503
  embedding_preview = str(embedding[:50])
504
  status = f"Embedding exists for item '{item_title}' (ID: {item_id})"
505
  return status, f"First 50 elements of embedding:\n{embedding_preview}", json.dumps(metadata, indent=2)
506
 
507
  except Exception as e:
508
+ logging.error(f"Error in check_embedding_status: {str(e)}", exc_info=True)
509
  return f"Error processing item: {selected_item}. Details: {str(e)}", "", ""
510
 
511
+ def refresh_and_update(database_type):
512
+ choices_update, new_mapping = get_items_with_embedding_status(database_type)
513
+ return choices_update, new_mapping
514
+
515
+ def create_new_embedding_for_item(selected_item, database_type, provider, hf_model, openai_model,
516
+ custom_model, api_url, method, max_size, overlap, adaptive,
517
+ item_mapping, use_contextual, contextual_api_choice=None):
518
  if not selected_item:
519
  return "Please select an item", "", ""
520
 
 
523
  if item_id is None:
524
  return f"Invalid item selected: {selected_item}", "", ""
525
 
526
+ # Get item content based on database type
527
+ if database_type == "Media DB":
528
+ items = get_all_content_from_database()
529
+ item = next((item for item in items if item['id'] == item_id), None)
530
+ elif database_type == "RAG Chat":
531
+ item = {
532
+ 'id': item_id,
533
+ 'content': get_conversation_text(item_id),
534
+ 'title': selected_item.rsplit(' (', 1)[0],
535
+ 'type': 'conversation'
536
+ }
537
+ else: # Character Chat
538
+ note = get_note_by_id(item_id)
539
+ item = {
540
+ 'id': item_id,
541
+ 'content': f"{note['title']}\n\n{note['content']}",
542
+ 'title': note['title'],
543
+ 'type': 'note'
544
+ }
545
+
546
  if not item:
547
  return f"Item not found: {item_id}", "", ""
548
 
 
555
 
556
  logging.info(f"Chunking content for item: {item['title']} (ID: {item_id})")
557
  chunks = chunk_for_embedding(item['content'], item['title'], chunk_options)
558
+ collection_name = f"{database_type.lower().replace(' ', '_')}_embeddings"
559
  collection = chroma_client.get_or_create_collection(name=collection_name)
560
 
561
  # Delete existing embeddings for this item
562
+ existing_ids = [f"{database_type.lower()}_{item_id}_chunk_{i}" for i in range(len(chunks))]
563
  collection.delete(ids=existing_ids)
564
  logging.info(f"Deleted {len(existing_ids)} existing embeddings for item {item_id}")
565
 
 
577
  contextualized_text = chunk_text
578
  context = None
579
 
580
+ chunk_id = f"{database_type.lower()}_{item_id}_chunk_{i}"
581
 
582
  # Determine the model to use
583
  if provider == "huggingface":
 
588
  model = custom_model
589
 
590
  metadata = {
591
+ "content_id": str(item_id),
592
  "chunk_index": i,
593
  "total_chunks": len(chunks),
594
  "chunking_method": method,
 
637
  logging.error(f"Error in create_new_embedding_for_item: {str(e)}", exc_info=True)
638
  return f"Error creating embedding: {str(e)}", "", ""
639
 
640
+ # Wire up all the event handlers
641
+ database_selection.change(
642
+ update_database_path,
643
+ inputs=[database_selection],
644
+ outputs=[current_db_path]
645
+ )
646
+
647
  refresh_button.click(
648
  get_items_with_embedding_status,
649
+ inputs=[database_selection],
650
  outputs=[item_dropdown, item_mapping]
651
  )
652
+
653
  item_dropdown.change(
654
  check_embedding_status,
655
+ inputs=[item_dropdown, database_selection, item_mapping],
656
  outputs=[embedding_status, embedding_preview, embedding_metadata]
657
  )
658
+
659
  create_new_embedding_button.click(
660
  create_new_embedding_for_item,
661
  inputs=[item_dropdown, embedding_provider, huggingface_model, openai_model, custom_embedding_model, embedding_api_url,
 
675
  )
676
 
677
  return (item_dropdown, refresh_button, embedding_status, embedding_preview, embedding_metadata,
678
+ create_new_embedding_button, embedding_provider, huggingface_model, openai_model,
679
+ custom_embedding_model, embedding_api_url, chunking_method, max_chunk_size,
680
+ chunk_overlap, adaptive_chunking, use_contextual_embeddings,
681
+ contextual_api_choice, contextual_api_key)
682
 
683
 
684
  def create_purge_embeddings_tab():
App_Function_Libraries/Gradio_UI/Evaluations_Benchmarks_tab.py CHANGED
@@ -1,9 +1,12 @@
1
  ###################################################################################################
2
  # Evaluations_Benchmarks_tab.py - Gradio code for G-Eval testing
3
  # We will use the G-Eval API to evaluate the quality of the generated summaries.
 
4
 
5
  import gradio as gr
6
  from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
 
 
7
 
8
  def create_geval_tab():
9
  with gr.Tab("G-Eval", visible=True):
@@ -31,13 +34,25 @@ def create_geval_tab():
31
 
32
 
33
  def create_infinite_bench_tab():
 
 
 
 
 
 
 
 
 
 
34
  with gr.Tab("Infinite Bench", visible=True):
35
  gr.Markdown("# Infinite Bench Evaluation (Coming Soon)")
36
  with gr.Row():
37
  with gr.Column():
 
38
  api_name_input = gr.Dropdown(
39
- choices=["OpenAI", "Anthropic", "Cohere", "Groq", "OpenRouter", "DeepSeek", "HuggingFace", "Mistral", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "Local-LLM", "Ollama"],
40
- label="Select API"
 
41
  )
42
  api_key_input = gr.Textbox(label="API Key (if required)", type="password")
43
  evaluate_button = gr.Button("Evaluate Summary")
 
1
  ###################################################################################################
2
  # Evaluations_Benchmarks_tab.py - Gradio code for G-Eval testing
3
  # We will use the G-Eval API to evaluate the quality of the generated summaries.
4
+ import logging
5
 
6
  import gradio as gr
7
  from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
8
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
9
+
10
 
11
  def create_geval_tab():
12
  with gr.Tab("G-Eval", visible=True):
 
34
 
35
 
36
  def create_infinite_bench_tab():
37
+ try:
38
+ default_value = None
39
+ if default_api_endpoint:
40
+ if default_api_endpoint in global_api_endpoints:
41
+ default_value = format_api_name(default_api_endpoint)
42
+ else:
43
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
44
+ except Exception as e:
45
+ logging.error(f"Error setting default API endpoint: {str(e)}")
46
+ default_value = None
47
  with gr.Tab("Infinite Bench", visible=True):
48
  gr.Markdown("# Infinite Bench Evaluation (Coming Soon)")
49
  with gr.Row():
50
  with gr.Column():
51
+ # Refactored API selection dropdown
52
  api_name_input = gr.Dropdown(
53
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
54
+ value=default_value,
55
+ label="API for Summarization (Optional)"
56
  )
57
  api_key_input = gr.Textbox(label="API Key (if required)", type="password")
58
  evaluate_button = gr.Button("Evaluate Summary")
App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py CHANGED
@@ -7,7 +7,7 @@ import logging
7
  # External Imports
8
  import gradio as gr
9
 
10
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
11
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
12
  #
13
  # Local Imports
@@ -17,6 +17,9 @@ from App_Function_Libraries.Summarization.Local_Summarization_Lib import summari
17
  from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
18
  summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
19
  summarize_with_huggingface
 
 
 
20
  #
21
  #
22
  ############################################################################################################
@@ -24,32 +27,62 @@ from App_Function_Libraries.Summarization.Summarization_General_Lib import summa
24
  # Functions:
25
 
26
  def create_summarize_explain_tab():
 
 
 
 
 
 
 
 
 
 
 
27
  with gr.TabItem("Analyze Text", visible=True):
28
  gr.Markdown("# Analyze / Explain / Summarize Text without ingesting it into the DB")
 
 
 
 
 
29
  with gr.Row():
30
  with gr.Column():
31
  with gr.Row():
32
- text_to_work_input = gr.Textbox(label="Text to be Explained or Summarized",
33
- placeholder="Enter the text you want explained or summarized here",
34
- lines=20)
 
 
35
  with gr.Row():
36
  explanation_checkbox = gr.Checkbox(label="Explain Text", value=True)
37
  summarization_checkbox = gr.Checkbox(label="Summarize Text", value=True)
38
- custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
39
- value=False,
40
- visible=True)
41
- preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
42
- value=False,
43
- visible=True)
 
 
 
 
44
  with gr.Row():
45
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
46
- choices=load_preset_prompts(),
47
- visible=False)
 
 
 
 
 
 
48
  with gr.Row():
49
- custom_prompt_input = gr.Textbox(label="Custom Prompt",
50
- placeholder="Enter custom prompt here",
51
- lines=3,
52
- visible=False)
 
 
53
  with gr.Row():
54
  system_prompt_input = gr.Textbox(label="System Prompt",
55
  value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
@@ -69,19 +102,21 @@ def create_summarize_explain_tab():
69
  - Ensure adherence to specified format
70
  - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
71
  """,
72
- lines=3,
73
  visible=False,
74
  interactive=True)
 
75
  api_endpoint = gr.Dropdown(
76
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
77
- "OpenRouter",
78
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace", "Custom-OpenAI-API"],
79
- value=None,
80
- label="API to be used for request (Mandatory)"
81
  )
82
  with gr.Row():
83
- api_key_input = gr.Textbox(label="API Key (if required)", placeholder="Enter your API key here",
84
- type="password")
 
 
 
85
  with gr.Row():
86
  explain_summarize_button = gr.Button("Explain/Summarize")
87
 
@@ -90,17 +125,83 @@ def create_summarize_explain_tab():
90
  explanation_output = gr.Textbox(label="Explanation:", lines=20)
91
  custom_prompt_output = gr.Textbox(label="Custom Prompt:", lines=20, visible=True)
92
 
 
93
  custom_prompt_checkbox.change(
94
  fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
95
  inputs=[custom_prompt_checkbox],
96
  outputs=[custom_prompt_input, system_prompt_input]
97
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  preset_prompt_checkbox.change(
99
- fn=lambda x: gr.update(visible=x),
100
  inputs=[preset_prompt_checkbox],
101
- outputs=[preset_prompt]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  )
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def update_prompts(preset_name):
105
  prompts = update_user_prompt(preset_name)
106
  return (
@@ -109,18 +210,27 @@ def create_summarize_explain_tab():
109
  )
110
 
111
  preset_prompt.change(
112
- update_prompts,
113
- inputs=preset_prompt,
114
  outputs=[custom_prompt_input, system_prompt_input]
115
  )
116
 
117
  explain_summarize_button.click(
118
  fn=summarize_explain_text,
119
- inputs=[text_to_work_input, api_endpoint, api_key_input, summarization_checkbox, explanation_checkbox, custom_prompt_input, system_prompt_input],
 
 
 
 
 
 
 
 
120
  outputs=[summarization_output, explanation_output, custom_prompt_output]
121
  )
122
 
123
 
 
124
  def summarize_explain_text(message, api_endpoint, api_key, summarization, explanation, custom_prompt, custom_system_prompt,):
125
  global custom_prompt_output
126
  summarization_response = None
 
7
  # External Imports
8
  import gradio as gr
9
 
10
+ from App_Function_Libraries.DB.DB_Manager import list_prompts
11
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
12
  #
13
  # Local Imports
 
17
  from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
18
  summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
19
  summarize_with_huggingface
20
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
21
+
22
+
23
  #
24
  #
25
  ############################################################################################################
 
27
  # Functions:
28
 
29
  def create_summarize_explain_tab():
30
+ try:
31
+ default_value = None
32
+ if default_api_endpoint:
33
+ if default_api_endpoint in global_api_endpoints:
34
+ default_value = format_api_name(default_api_endpoint)
35
+ else:
36
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
37
+ except Exception as e:
38
+ logging.error(f"Error setting default API endpoint: {str(e)}")
39
+ default_value = None
40
+
41
  with gr.TabItem("Analyze Text", visible=True):
42
  gr.Markdown("# Analyze / Explain / Summarize Text without ingesting it into the DB")
43
+
44
+ # Initialize state variables for pagination
45
+ current_page_state = gr.State(value=1)
46
+ total_pages_state = gr.State(value=1)
47
+
48
  with gr.Row():
49
  with gr.Column():
50
  with gr.Row():
51
+ text_to_work_input = gr.Textbox(
52
+ label="Text to be Explained or Summarized",
53
+ placeholder="Enter the text you want explained or summarized here",
54
+ lines=20
55
+ )
56
  with gr.Row():
57
  explanation_checkbox = gr.Checkbox(label="Explain Text", value=True)
58
  summarization_checkbox = gr.Checkbox(label="Summarize Text", value=True)
59
+ custom_prompt_checkbox = gr.Checkbox(
60
+ label="Use a Custom Prompt",
61
+ value=False,
62
+ visible=True
63
+ )
64
+ preset_prompt_checkbox = gr.Checkbox(
65
+ label="Use a pre-set Prompt",
66
+ value=False,
67
+ visible=True
68
+ )
69
  with gr.Row():
70
+ # Add pagination controls
71
+ preset_prompt = gr.Dropdown(
72
+ label="Select Preset Prompt",
73
+ choices=[],
74
+ visible=False
75
+ )
76
+ prev_page_button = gr.Button("Previous Page", visible=False)
77
+ page_display = gr.Markdown("Page 1 of X", visible=False)
78
+ next_page_button = gr.Button("Next Page", visible=False)
79
  with gr.Row():
80
+ custom_prompt_input = gr.Textbox(
81
+ label="Custom Prompt",
82
+ placeholder="Enter custom prompt here",
83
+ lines=10,
84
+ visible=False
85
+ )
86
  with gr.Row():
87
  system_prompt_input = gr.Textbox(label="System Prompt",
88
  value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
 
102
  - Ensure adherence to specified format
103
  - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
104
  """,
105
+ lines=10,
106
  visible=False,
107
  interactive=True)
108
+ # Refactored API selection dropdown
109
  api_endpoint = gr.Dropdown(
110
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
111
+ value=default_value,
112
+ label="API for Summarization/Analysis (Optional)"
 
 
113
  )
114
  with gr.Row():
115
+ api_key_input = gr.Textbox(
116
+ label="API Key (if required)",
117
+ placeholder="Enter your API key here",
118
+ type="password"
119
+ )
120
  with gr.Row():
121
  explain_summarize_button = gr.Button("Explain/Summarize")
122
 
 
125
  explanation_output = gr.Textbox(label="Explanation:", lines=20)
126
  custom_prompt_output = gr.Textbox(label="Custom Prompt:", lines=20, visible=True)
127
 
128
+ # Handle custom prompt checkbox change
129
  custom_prompt_checkbox.change(
130
  fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
131
  inputs=[custom_prompt_checkbox],
132
  outputs=[custom_prompt_input, system_prompt_input]
133
  )
134
+
135
+ # Handle preset prompt checkbox change
136
+ def on_preset_prompt_checkbox_change(is_checked):
137
+ if is_checked:
138
+ prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
139
+ page_display_text = f"Page {current_page} of {total_pages}"
140
+ return (
141
+ gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
142
+ gr.update(visible=True), # prev_page_button
143
+ gr.update(visible=True), # next_page_button
144
+ gr.update(value=page_display_text, visible=True), # page_display
145
+ current_page, # current_page_state
146
+ total_pages # total_pages_state
147
+ )
148
+ else:
149
+ return (
150
+ gr.update(visible=False, interactive=False), # preset_prompt
151
+ gr.update(visible=False), # prev_page_button
152
+ gr.update(visible=False), # next_page_button
153
+ gr.update(visible=False), # page_display
154
+ 1, # current_page_state
155
+ 1 # total_pages_state
156
+ )
157
+
158
  preset_prompt_checkbox.change(
159
+ fn=on_preset_prompt_checkbox_change,
160
  inputs=[preset_prompt_checkbox],
161
+ outputs=[
162
+ preset_prompt,
163
+ prev_page_button,
164
+ next_page_button,
165
+ page_display,
166
+ current_page_state,
167
+ total_pages_state
168
+ ]
169
+ )
170
+
171
+ # Pagination button functions
172
+ def on_prev_page_click(current_page, total_pages):
173
+ new_page = max(current_page - 1, 1)
174
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
175
+ page_display_text = f"Page {current_page} of {total_pages}"
176
+ return (
177
+ gr.update(choices=prompts),
178
+ gr.update(value=page_display_text),
179
+ current_page
180
+ )
181
+
182
+ prev_page_button.click(
183
+ fn=on_prev_page_click,
184
+ inputs=[current_page_state, total_pages_state],
185
+ outputs=[preset_prompt, page_display, current_page_state]
186
  )
187
 
188
+ def on_next_page_click(current_page, total_pages):
189
+ new_page = min(current_page + 1, total_pages)
190
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
191
+ page_display_text = f"Page {current_page} of {total_pages}"
192
+ return (
193
+ gr.update(choices=prompts),
194
+ gr.update(value=page_display_text),
195
+ current_page
196
+ )
197
+
198
+ next_page_button.click(
199
+ fn=on_next_page_click,
200
+ inputs=[current_page_state, total_pages_state],
201
+ outputs=[preset_prompt, page_display, current_page_state]
202
+ )
203
+
204
+ # Update prompts when a preset is selected
205
  def update_prompts(preset_name):
206
  prompts = update_user_prompt(preset_name)
207
  return (
 
210
  )
211
 
212
  preset_prompt.change(
213
+ fn=update_prompts,
214
+ inputs=[preset_prompt],
215
  outputs=[custom_prompt_input, system_prompt_input]
216
  )
217
 
218
  explain_summarize_button.click(
219
  fn=summarize_explain_text,
220
+ inputs=[
221
+ text_to_work_input,
222
+ api_endpoint,
223
+ api_key_input,
224
+ summarization_checkbox,
225
+ explanation_checkbox,
226
+ custom_prompt_input,
227
+ system_prompt_input
228
+ ],
229
  outputs=[summarization_output, explanation_output, custom_prompt_output]
230
  )
231
 
232
 
233
+
234
  def summarize_explain_text(message, api_endpoint, api_key, summarization, explanation, custom_prompt, custom_system_prompt,):
235
  global custom_prompt_output
236
  summarization_response = None
App_Function_Libraries/Gradio_UI/Export_Functionality.py CHANGED
@@ -6,9 +6,11 @@ import math
6
  import logging
7
  import shutil
8
  import tempfile
9
- from typing import List, Dict, Optional, Tuple
10
  import gradio as gr
11
- from App_Function_Libraries.DB.DB_Manager import DatabaseError
 
 
12
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, browse_items
13
 
14
  logger = logging.getLogger(__name__)
@@ -36,7 +38,7 @@ def export_items_by_keyword(keyword: str) -> str:
36
  items = fetch_items_by_keyword(keyword)
37
  if not items:
38
  logger.warning(f"No items found for keyword: {keyword}")
39
- return None
40
 
41
  # Create a temporary directory to store individual markdown files
42
  with tempfile.TemporaryDirectory() as temp_dir:
@@ -66,7 +68,7 @@ def export_items_by_keyword(keyword: str) -> str:
66
  return final_zip_path
67
  except Exception as e:
68
  logger.error(f"Error exporting items for keyword '{keyword}': {str(e)}")
69
- return None
70
 
71
 
72
  def export_selected_items(selected_items: List[Dict]) -> Tuple[Optional[str], str]:
@@ -146,121 +148,747 @@ def display_search_results_export_tab(search_query: str, search_type: str, page:
146
  logger.error(error_message)
147
  return [], error_message, 1, 1
148
 
 
 
 
149
 
150
- def create_export_tab():
151
- with gr.Tab("Search and Export"):
152
- with gr.Row():
153
- with gr.Column():
154
- gr.Markdown("# Search and Export Items")
155
- gr.Markdown("Search for items and export them as markdown files")
156
- gr.Markdown("You can also export items by keyword")
157
- search_query = gr.Textbox(label="Search Query")
158
- search_type = gr.Radio(["Title", "URL", "Keyword", "Content"], label="Search By")
159
- search_button = gr.Button("Search")
160
-
161
- with gr.Column():
162
- prev_button = gr.Button("Previous Page")
163
- next_button = gr.Button("Next Page")
164
-
165
- current_page = gr.State(1)
166
- total_pages = gr.State(1)
167
-
168
- search_results = gr.CheckboxGroup(label="Search Results", choices=[])
169
- export_selected_button = gr.Button("Export Selected Items")
170
-
171
- keyword_input = gr.Textbox(label="Enter keyword for export")
172
- export_by_keyword_button = gr.Button("Export items by keyword")
173
-
174
- export_output = gr.File(label="Download Exported File")
175
- error_output = gr.Textbox(label="Status/Error Messages", interactive=False)
176
-
177
- def search_and_update(query, search_type, page):
178
- results, message, current, total = display_search_results_export_tab(query, search_type, page)
179
- logger.debug(f"search_and_update results: {results}")
180
- return results, message, current, total, gr.update(choices=results)
181
-
182
- search_button.click(
183
- fn=search_and_update,
184
- inputs=[search_query, search_type, current_page],
185
- outputs=[search_results, error_output, current_page, total_pages, search_results],
186
- show_progress="full"
187
- )
188
-
189
-
190
- def update_page(current, total, direction):
191
- new_page = max(1, min(total, current + direction))
192
- return new_page
193
-
194
- prev_button.click(
195
- fn=update_page,
196
- inputs=[current_page, total_pages, gr.State(-1)],
197
- outputs=[current_page]
198
- ).then(
199
- fn=search_and_update,
200
- inputs=[search_query, search_type, current_page],
201
- outputs=[search_results, error_output, current_page, total_pages],
202
- show_progress=True
203
- )
204
-
205
- next_button.click(
206
- fn=update_page,
207
- inputs=[current_page, total_pages, gr.State(1)],
208
- outputs=[current_page]
209
- ).then(
210
- fn=search_and_update,
211
- inputs=[search_query, search_type, current_page],
212
- outputs=[search_results, error_output, current_page, total_pages],
213
- show_progress=True
214
- )
215
-
216
- def handle_export_selected(selected_items):
217
- logger.debug(f"Exporting selected items: {selected_items}")
218
- return export_selected_items(selected_items)
219
-
220
- export_selected_button.click(
221
- fn=handle_export_selected,
222
- inputs=[search_results],
223
- outputs=[export_output, error_output],
224
- show_progress="full"
225
- )
226
-
227
- export_by_keyword_button.click(
228
- fn=export_items_by_keyword,
229
- inputs=[keyword_input],
230
- outputs=[export_output, error_output],
231
- show_progress="full"
232
- )
233
-
234
- def handle_item_selection(selected_items):
235
- logger.debug(f"Selected items: {selected_items}")
236
- if not selected_items:
237
- return None, "No item selected"
238
-
239
- try:
240
- # Assuming selected_items is a list of dictionaries
241
- selected_item = selected_items[0]
242
- logger.debug(f"First selected item: {selected_item}")
243
-
244
- # Check if 'value' is a string (JSON) or already a dictionary
245
- if isinstance(selected_item['value'], str):
246
- item_data = json.loads(selected_item['value'])
247
- else:
248
- item_data = selected_item['value']
249
-
250
- logger.debug(f"Item data: {item_data}")
251
-
252
- item_id = item_data['id']
253
- return export_item_as_markdown(item_id)
254
- except Exception as e:
255
- error_message = f"Error processing selected item: {str(e)}"
256
- logger.error(error_message)
257
- return None, error_message
258
-
259
- search_results.select(
260
- fn=handle_item_selection,
261
- inputs=[search_results],
262
- outputs=[export_output, error_output],
263
- show_progress="full"
264
- )
265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
 
6
  import logging
7
  import shutil
8
  import tempfile
9
+ from typing import List, Dict, Optional, Tuple, Any
10
  import gradio as gr
11
+ from App_Function_Libraries.DB.DB_Manager import DatabaseError, fetch_all_notes, fetch_all_conversations, \
12
+ get_keywords_for_note, fetch_notes_by_ids, fetch_conversations_by_ids
13
+ from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_keywords_for_conversation
14
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, browse_items
15
 
16
  logger = logging.getLogger(__name__)
 
38
  items = fetch_items_by_keyword(keyword)
39
  if not items:
40
  logger.warning(f"No items found for keyword: {keyword}")
41
+ return f"No items found for keyword: {keyword}"
42
 
43
  # Create a temporary directory to store individual markdown files
44
  with tempfile.TemporaryDirectory() as temp_dir:
 
68
  return final_zip_path
69
  except Exception as e:
70
  logger.error(f"Error exporting items for keyword '{keyword}': {str(e)}")
71
+ return f"Error exporting items for keyword '{keyword}': {str(e)}"
72
 
73
 
74
  def export_selected_items(selected_items: List[Dict]) -> Tuple[Optional[str], str]:
 
148
  logger.error(error_message)
149
  return [], error_message, 1, 1
150
 
151
+ #
152
+ # End of Media DB Export functionality
153
+ ################################################################
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
+ ################################################################
157
+ #
158
+ # Functions for RAG Chat DB Export functionality
159
+
160
+
161
+ def export_rag_conversations_as_json(
162
+ selected_conversations: Optional[List[Dict[str, Any]]] = None
163
+ ) -> Tuple[Optional[str], str]:
164
+ """
165
+ Export conversations to a JSON file.
166
+
167
+ Args:
168
+ selected_conversations: Optional list of conversation dictionaries
169
+
170
+ Returns:
171
+ Tuple of (filename or None, status message)
172
+ """
173
+ try:
174
+ if selected_conversations:
175
+ # Extract conversation IDs from selected items
176
+ conversation_ids = []
177
+ for item in selected_conversations:
178
+ if isinstance(item, str):
179
+ item_data = json.loads(item)
180
+ elif isinstance(item, dict) and 'value' in item:
181
+ item_data = item['value'] if isinstance(item['value'], dict) else json.loads(item['value'])
182
+ else:
183
+ item_data = item
184
+ conversation_ids.append(item_data['conversation_id'])
185
+
186
+ conversations = fetch_conversations_by_ids(conversation_ids)
187
+ else:
188
+ conversations = fetch_all_conversations()
189
+
190
+ export_data = []
191
+ for conversation_id, title, messages in conversations:
192
+ # Get keywords for the conversation
193
+ keywords = get_keywords_for_conversation(conversation_id)
194
+
195
+ conversation_data = {
196
+ "conversation_id": conversation_id,
197
+ "title": title,
198
+ "keywords": keywords,
199
+ "messages": [
200
+ {"role": role, "content": content}
201
+ for role, content in messages
202
+ ]
203
+ }
204
+ export_data.append(conversation_data)
205
+
206
+ filename = "rag_conversations_export.json"
207
+ with open(filename, "w", encoding='utf-8') as f:
208
+ json.dump(export_data, f, indent=2, ensure_ascii=False)
209
+
210
+ logger.info(f"Successfully exported {len(export_data)} conversations to {filename}")
211
+ return filename, f"Successfully exported {len(export_data)} conversations to {filename}"
212
+ except Exception as e:
213
+ error_message = f"Error exporting conversations: {str(e)}"
214
+ logger.error(error_message)
215
+ return None, error_message
216
+
217
+
218
+ def export_rag_notes_as_json(
219
+ selected_notes: Optional[List[Dict[str, Any]]] = None
220
+ ) -> Tuple[Optional[str], str]:
221
+ """
222
+ Export notes to a JSON file.
223
+
224
+ Args:
225
+ selected_notes: Optional list of note dictionaries
226
+
227
+ Returns:
228
+ Tuple of (filename or None, status message)
229
+ """
230
+ try:
231
+ if selected_notes:
232
+ # Extract note IDs from selected items
233
+ note_ids = []
234
+ for item in selected_notes:
235
+ if isinstance(item, str):
236
+ item_data = json.loads(item)
237
+ elif isinstance(item, dict) and 'value' in item:
238
+ item_data = item['value'] if isinstance(item['value'], dict) else json.loads(item['value'])
239
+ else:
240
+ item_data = item
241
+ note_ids.append(item_data['id'])
242
+
243
+ notes = fetch_notes_by_ids(note_ids)
244
+ else:
245
+ notes = fetch_all_notes()
246
+
247
+ export_data = []
248
+ for note_id, title, content in notes:
249
+ # Get keywords for the note
250
+ keywords = get_keywords_for_note(note_id)
251
+
252
+ note_data = {
253
+ "note_id": note_id,
254
+ "title": title,
255
+ "content": content,
256
+ "keywords": keywords
257
+ }
258
+ export_data.append(note_data)
259
+
260
+ filename = "rag_notes_export.json"
261
+ with open(filename, "w", encoding='utf-8') as f:
262
+ json.dump(export_data, f, indent=2, ensure_ascii=False)
263
+
264
+ logger.info(f"Successfully exported {len(export_data)} notes to {filename}")
265
+ return filename, f"Successfully exported {len(export_data)} notes to {filename}"
266
+ except Exception as e:
267
+ error_message = f"Error exporting notes: {str(e)}"
268
+ logger.error(error_message)
269
+ return None, error_message
270
+
271
+
272
+ def display_rag_conversations(search_query: str = "", page: int = 1, items_per_page: int = 10):
273
+ """Display conversations for selection in the export tab."""
274
+ try:
275
+ conversations = fetch_all_conversations()
276
+
277
+ if search_query:
278
+ # Simple search implementation - can be enhanced based on needs
279
+ conversations = [
280
+ conv for conv in conversations
281
+ if search_query.lower() in conv[1].lower() # Search in title
282
+ ]
283
+
284
+ # Implement pagination
285
+ start_idx = (page - 1) * items_per_page
286
+ end_idx = start_idx + items_per_page
287
+ paginated_conversations = conversations[start_idx:end_idx]
288
+ total_pages = (len(conversations) + items_per_page - 1) // items_per_page
289
+
290
+ # Format for checkbox group
291
+ checkbox_data = [
292
+ {
293
+ "name": f"Title: {title}\nMessages: {len(messages)}",
294
+ "value": {"conversation_id": conv_id, "title": title}
295
+ }
296
+ for conv_id, title, messages in paginated_conversations
297
+ ]
298
+
299
+ return (
300
+ checkbox_data,
301
+ f"Found {len(conversations)} conversations (showing page {page} of {total_pages})",
302
+ page,
303
+ total_pages
304
+ )
305
+ except Exception as e:
306
+ error_message = f"Error displaying conversations: {str(e)}"
307
+ logger.error(error_message)
308
+ return [], error_message, 1, 1
309
+
310
+
311
+ def display_rag_notes(search_query: str = "", page: int = 1, items_per_page: int = 10):
312
+ """Display notes for selection in the export tab."""
313
+ try:
314
+ notes = fetch_all_notes()
315
+
316
+ if search_query:
317
+ # Simple search implementation - can be enhanced based on needs
318
+ notes = [
319
+ note for note in notes
320
+ if search_query.lower() in note[1].lower() # Search in title
321
+ or search_query.lower() in note[2].lower() # Search in content
322
+ ]
323
+
324
+ # Implement pagination
325
+ start_idx = (page - 1) * items_per_page
326
+ end_idx = start_idx + items_per_page
327
+ paginated_notes = notes[start_idx:end_idx]
328
+ total_pages = (len(notes) + items_per_page - 1) // items_per_page
329
+
330
+ # Format for checkbox group
331
+ checkbox_data = [
332
+ {
333
+ "name": f"Title: {title}\nContent preview: {content[:100]}...",
334
+ "value": {"id": note_id, "title": title}
335
+ }
336
+ for note_id, title, content in paginated_notes
337
+ ]
338
+
339
+ return (
340
+ checkbox_data,
341
+ f"Found {len(notes)} notes (showing page {page} of {total_pages})",
342
+ page,
343
+ total_pages
344
+ )
345
+ except Exception as e:
346
+ error_message = f"Error displaying notes: {str(e)}"
347
+ logger.error(error_message)
348
+ return [], error_message, 1, 1
349
+
350
+
351
+ def create_rag_export_tab():
352
+ """Create the RAG QA Chat export tab interface."""
353
+ with gr.Tab("RAG QA Chat Export"):
354
+ with gr.Tabs():
355
+ # Conversations Export Tab
356
+ with gr.Tab("Export Conversations"):
357
+ with gr.Row():
358
+ with gr.Column():
359
+ gr.Markdown("## Export RAG QA Chat Conversations")
360
+ conversation_search = gr.Textbox(label="Search Conversations")
361
+ conversation_search_button = gr.Button("Search")
362
+
363
+ with gr.Column():
364
+ conversation_prev_button = gr.Button("Previous Page")
365
+ conversation_next_button = gr.Button("Next Page")
366
+
367
+ conversation_current_page = gr.State(1)
368
+ conversation_total_pages = gr.State(1)
369
+
370
+ conversation_results = gr.CheckboxGroup(label="Select Conversations to Export")
371
+ export_selected_conversations_button = gr.Button("Export Selected Conversations")
372
+ export_all_conversations_button = gr.Button("Export All Conversations")
373
+
374
+ conversation_export_output = gr.File(label="Download Exported Conversations")
375
+ conversation_status = gr.Textbox(label="Status", interactive=False)
376
+
377
+ # Notes Export Tab
378
+ with gr.Tab("Export Notes"):
379
+ with gr.Row():
380
+ with gr.Column():
381
+ gr.Markdown("## Export RAG QA Chat Notes")
382
+ notes_search = gr.Textbox(label="Search Notes")
383
+ notes_search_button = gr.Button("Search")
384
+
385
+ with gr.Column():
386
+ notes_prev_button = gr.Button("Previous Page")
387
+ notes_next_button = gr.Button("Next Page")
388
+
389
+ notes_current_page = gr.State(1)
390
+ notes_total_pages = gr.State(1)
391
+
392
+ notes_results = gr.CheckboxGroup(label="Select Notes to Export")
393
+ export_selected_notes_button = gr.Button("Export Selected Notes")
394
+ export_all_notes_button = gr.Button("Export All Notes")
395
+
396
+ notes_export_output = gr.File(label="Download Exported Notes")
397
+ notes_status = gr.Textbox(label="Status", interactive=False)
398
+
399
+ # Event handlers for conversations
400
+ def search_conversations(query, page):
401
+ return display_rag_conversations(query, page)
402
+
403
+ conversation_search_button.click(
404
+ fn=search_conversations,
405
+ inputs=[conversation_search, conversation_current_page],
406
+ outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
407
+ )
408
+
409
+ def update_conversation_page(current, total, direction):
410
+ new_page = max(1, min(total, current + direction))
411
+ return new_page
412
+
413
+ conversation_prev_button.click(
414
+ fn=update_conversation_page,
415
+ inputs=[conversation_current_page, conversation_total_pages, gr.State(-1)],
416
+ outputs=[conversation_current_page]
417
+ ).then(
418
+ fn=search_conversations,
419
+ inputs=[conversation_search, conversation_current_page],
420
+ outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
421
+ )
422
+
423
+ conversation_next_button.click(
424
+ fn=update_conversation_page,
425
+ inputs=[conversation_current_page, conversation_total_pages, gr.State(1)],
426
+ outputs=[conversation_current_page]
427
+ ).then(
428
+ fn=search_conversations,
429
+ inputs=[conversation_search, conversation_current_page],
430
+ outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
431
+ )
432
+
433
+ export_selected_conversations_button.click(
434
+ fn=export_rag_conversations_as_json,
435
+ inputs=[conversation_results],
436
+ outputs=[conversation_export_output, conversation_status]
437
+ )
438
+
439
+ export_all_conversations_button.click(
440
+ fn=lambda: export_rag_conversations_as_json(),
441
+ outputs=[conversation_export_output, conversation_status]
442
+ )
443
+
444
+ # Event handlers for notes
445
+ def search_notes(query, page):
446
+ return display_rag_notes(query, page)
447
+
448
+ notes_search_button.click(
449
+ fn=search_notes,
450
+ inputs=[notes_search, notes_current_page],
451
+ outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
452
+ )
453
+
454
+ def update_notes_page(current, total, direction):
455
+ new_page = max(1, min(total, current + direction))
456
+ return new_page
457
+
458
+ notes_prev_button.click(
459
+ fn=update_notes_page,
460
+ inputs=[notes_current_page, notes_total_pages, gr.State(-1)],
461
+ outputs=[notes_current_page]
462
+ ).then(
463
+ fn=search_notes,
464
+ inputs=[notes_search, notes_current_page],
465
+ outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
466
+ )
467
+
468
+ notes_next_button.click(
469
+ fn=update_notes_page,
470
+ inputs=[notes_current_page, notes_total_pages, gr.State(1)],
471
+ outputs=[notes_current_page]
472
+ ).then(
473
+ fn=search_notes,
474
+ inputs=[notes_search, notes_current_page],
475
+ outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
476
+ )
477
+
478
+ export_selected_notes_button.click(
479
+ fn=export_rag_notes_as_json,
480
+ inputs=[notes_results],
481
+ outputs=[notes_export_output, notes_status]
482
+ )
483
+
484
+ export_all_notes_button.click(
485
+ fn=lambda: export_rag_notes_as_json(),
486
+ outputs=[notes_export_output, notes_status]
487
+ )
488
+
489
+ #
490
+ # End of RAG Chat DB Export functionality
491
+ #####################################################
492
+
493
+ def create_export_tabs():
494
+ """Create the unified export interface with all export tabs."""
495
+ with gr.Tabs():
496
+ # Media DB Export Tab
497
+ with gr.Tab("Media DB Export"):
498
+ with gr.Row():
499
+ with gr.Column():
500
+ gr.Markdown("# Search and Export Items")
501
+ gr.Markdown("Search for items and export them as markdown files")
502
+ gr.Markdown("You can also export items by keyword")
503
+ search_query = gr.Textbox(label="Search Query")
504
+ search_type = gr.Radio(["Title", "URL", "Keyword", "Content"], label="Search By")
505
+ search_button = gr.Button("Search")
506
+
507
+ with gr.Column():
508
+ prev_button = gr.Button("Previous Page")
509
+ next_button = gr.Button("Next Page")
510
+
511
+ current_page = gr.State(1)
512
+ total_pages = gr.State(1)
513
+
514
+ search_results = gr.CheckboxGroup(label="Search Results", choices=[])
515
+ export_selected_button = gr.Button("Export Selected Items")
516
+
517
+ keyword_input = gr.Textbox(label="Enter keyword for export")
518
+ export_by_keyword_button = gr.Button("Export items by keyword")
519
+
520
+ export_output = gr.File(label="Download Exported File")
521
+ error_output = gr.Textbox(label="Status/Error Messages", interactive=False)
522
+
523
+ # Conversations Export Tab
524
+ with gr.Tab("RAG Conversations Export"):
525
+ with gr.Row():
526
+ with gr.Column():
527
+ gr.Markdown("## Export RAG QA Chat Conversations")
528
+ conversation_search = gr.Textbox(label="Search Conversations")
529
+ conversation_search_button = gr.Button("Search")
530
+
531
+ with gr.Column():
532
+ conversation_prev_button = gr.Button("Previous Page")
533
+ conversation_next_button = gr.Button("Next Page")
534
+
535
+ conversation_current_page = gr.State(1)
536
+ conversation_total_pages = gr.State(1)
537
+
538
+ conversation_results = gr.CheckboxGroup(label="Select Conversations to Export")
539
+ export_selected_conversations_button = gr.Button("Export Selected Conversations")
540
+ export_all_conversations_button = gr.Button("Export All Conversations")
541
+
542
+ conversation_export_output = gr.File(label="Download Exported Conversations")
543
+ conversation_status = gr.Textbox(label="Status", interactive=False)
544
+
545
+ # Notes Export Tab
546
+ with gr.Tab("RAG Notes Export"):
547
+ with gr.Row():
548
+ with gr.Column():
549
+ gr.Markdown("## Export RAG QA Chat Notes")
550
+ notes_search = gr.Textbox(label="Search Notes")
551
+ notes_search_button = gr.Button("Search")
552
+
553
+ with gr.Column():
554
+ notes_prev_button = gr.Button("Previous Page")
555
+ notes_next_button = gr.Button("Next Page")
556
+
557
+ notes_current_page = gr.State(1)
558
+ notes_total_pages = gr.State(1)
559
+
560
+ notes_results = gr.CheckboxGroup(label="Select Notes to Export")
561
+ export_selected_notes_button = gr.Button("Export Selected Notes")
562
+ export_all_notes_button = gr.Button("Export All Notes")
563
+
564
+ notes_export_output = gr.File(label="Download Exported Notes")
565
+ notes_status = gr.Textbox(label="Status", interactive=False)
566
+
567
+ # Event handlers for media DB
568
+ def search_and_update(query, search_type, page):
569
+ results, message, current, total = display_search_results_export_tab(query, search_type, page)
570
+ logger.debug(f"search_and_update results: {results}")
571
+ return results, message, current, total, gr.update(choices=results)
572
+
573
+ def update_page(current, total, direction):
574
+ new_page = max(1, min(total, current + direction))
575
+ return new_page
576
+
577
+ def handle_export_selected(selected_items):
578
+ logger.debug(f"Exporting selected items: {selected_items}")
579
+ return export_selected_items(selected_items)
580
+
581
+ def handle_item_selection(selected_items):
582
+ logger.debug(f"Selected items: {selected_items}")
583
+ if not selected_items:
584
+ return None, "No item selected"
585
+
586
+ try:
587
+ selected_item = selected_items[0]
588
+ logger.debug(f"First selected item: {selected_item}")
589
+
590
+ if isinstance(selected_item['value'], str):
591
+ item_data = json.loads(selected_item['value'])
592
+ else:
593
+ item_data = selected_item['value']
594
+
595
+ logger.debug(f"Item data: {item_data}")
596
+ item_id = item_data['id']
597
+ return export_item_as_markdown(item_id)
598
+ except Exception as e:
599
+ error_message = f"Error processing selected item: {str(e)}"
600
+ logger.error(error_message)
601
+ return None, error_message
602
+
603
+ search_button.click(
604
+ fn=search_and_update,
605
+ inputs=[search_query, search_type, current_page],
606
+ outputs=[search_results, error_output, current_page, total_pages, search_results],
607
+ show_progress="full"
608
+ )
609
+
610
+ prev_button.click(
611
+ fn=update_page,
612
+ inputs=[current_page, total_pages, gr.State(-1)],
613
+ outputs=[current_page]
614
+ ).then(
615
+ fn=search_and_update,
616
+ inputs=[search_query, search_type, current_page],
617
+ outputs=[search_results, error_output, current_page, total_pages],
618
+ show_progress=True
619
+ )
620
+
621
+ next_button.click(
622
+ fn=update_page,
623
+ inputs=[current_page, total_pages, gr.State(1)],
624
+ outputs=[current_page]
625
+ ).then(
626
+ fn=search_and_update,
627
+ inputs=[search_query, search_type, current_page],
628
+ outputs=[search_results, error_output, current_page, total_pages],
629
+ show_progress=True
630
+ )
631
+
632
+ export_selected_button.click(
633
+ fn=handle_export_selected,
634
+ inputs=[search_results],
635
+ outputs=[export_output, error_output],
636
+ show_progress="full"
637
+ )
638
+
639
+ export_by_keyword_button.click(
640
+ fn=export_items_by_keyword,
641
+ inputs=[keyword_input],
642
+ outputs=[export_output, error_output],
643
+ show_progress="full"
644
+ )
645
+
646
+ search_results.select(
647
+ fn=handle_item_selection,
648
+ inputs=[search_results],
649
+ outputs=[export_output, error_output],
650
+ show_progress="full"
651
+ )
652
+
653
+ # Event handlers for conversations
654
+ def search_conversations(query, page):
655
+ return display_rag_conversations(query, page)
656
+
657
+ def update_conversation_page(current, total, direction):
658
+ new_page = max(1, min(total, current + direction))
659
+ return new_page
660
+
661
+ conversation_search_button.click(
662
+ fn=search_conversations,
663
+ inputs=[conversation_search, conversation_current_page],
664
+ outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
665
+ )
666
+
667
+ conversation_prev_button.click(
668
+ fn=update_conversation_page,
669
+ inputs=[conversation_current_page, conversation_total_pages, gr.State(-1)],
670
+ outputs=[conversation_current_page]
671
+ ).then(
672
+ fn=search_conversations,
673
+ inputs=[conversation_search, conversation_current_page],
674
+ outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
675
+ )
676
+
677
+ conversation_next_button.click(
678
+ fn=update_conversation_page,
679
+ inputs=[conversation_current_page, conversation_total_pages, gr.State(1)],
680
+ outputs=[conversation_current_page]
681
+ ).then(
682
+ fn=search_conversations,
683
+ inputs=[conversation_search, conversation_current_page],
684
+ outputs=[conversation_results, conversation_status, conversation_current_page, conversation_total_pages]
685
+ )
686
+
687
+ export_selected_conversations_button.click(
688
+ fn=export_rag_conversations_as_json,
689
+ inputs=[conversation_results],
690
+ outputs=[conversation_export_output, conversation_status]
691
+ )
692
+
693
+ export_all_conversations_button.click(
694
+ fn=lambda: export_rag_conversations_as_json(),
695
+ outputs=[conversation_export_output, conversation_status]
696
+ )
697
+
698
+ # Event handlers for notes
699
+ def search_notes(query, page):
700
+ return display_rag_notes(query, page)
701
+
702
+ def update_notes_page(current, total, direction):
703
+ new_page = max(1, min(total, current + direction))
704
+ return new_page
705
+
706
+ notes_search_button.click(
707
+ fn=search_notes,
708
+ inputs=[notes_search, notes_current_page],
709
+ outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
710
+ )
711
+
712
+ notes_prev_button.click(
713
+ fn=update_notes_page,
714
+ inputs=[notes_current_page, notes_total_pages, gr.State(-1)],
715
+ outputs=[notes_current_page]
716
+ ).then(
717
+ fn=search_notes,
718
+ inputs=[notes_search, notes_current_page],
719
+ outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
720
+ )
721
+
722
+ notes_next_button.click(
723
+ fn=update_notes_page,
724
+ inputs=[notes_current_page, notes_total_pages, gr.State(1)],
725
+ outputs=[notes_current_page]
726
+ ).then(
727
+ fn=search_notes,
728
+ inputs=[notes_search, notes_current_page],
729
+ outputs=[notes_results, notes_status, notes_current_page, notes_total_pages]
730
+ )
731
+
732
+ export_selected_notes_button.click(
733
+ fn=export_rag_notes_as_json,
734
+ inputs=[notes_results],
735
+ outputs=[notes_export_output, notes_status]
736
+ )
737
+
738
+ export_all_notes_button.click(
739
+ fn=lambda: export_rag_notes_as_json(),
740
+ outputs=[notes_export_output, notes_status]
741
+ )
742
+
743
+ with gr.TabItem("Export Prompts", visible=True):
744
+ gr.Markdown("# Export Prompts Database Content")
745
+
746
+ with gr.Row():
747
+ with gr.Column():
748
+ export_type = gr.Radio(
749
+ choices=["All Prompts", "Prompts by Keyword"],
750
+ label="Export Type",
751
+ value="All Prompts"
752
+ )
753
+
754
+ # Keyword selection for filtered export
755
+ with gr.Column(visible=False) as keyword_col:
756
+ keyword_input = gr.Textbox(
757
+ label="Enter Keywords (comma-separated)",
758
+ placeholder="Enter keywords to filter prompts..."
759
+ )
760
+
761
+ # Export format selection
762
+ export_format = gr.Radio(
763
+ choices=["CSV", "Markdown (ZIP)"],
764
+ label="Export Format",
765
+ value="CSV"
766
+ )
767
+
768
+ # Export options
769
+ include_options = gr.CheckboxGroup(
770
+ choices=[
771
+ "Include System Prompts",
772
+ "Include User Prompts",
773
+ "Include Details",
774
+ "Include Author",
775
+ "Include Keywords"
776
+ ],
777
+ label="Export Options",
778
+ value=["Include Keywords", "Include Author"]
779
+ )
780
+
781
+ # Markdown-specific options (only visible when Markdown is selected)
782
+ with gr.Column(visible=False) as markdown_options_col:
783
+ markdown_template = gr.Radio(
784
+ choices=[
785
+ "Basic Template",
786
+ "Detailed Template",
787
+ "Custom Template"
788
+ ],
789
+ label="Markdown Template",
790
+ value="Basic Template"
791
+ )
792
+ custom_template = gr.Textbox(
793
+ label="Custom Template",
794
+ placeholder="Use {title}, {author}, {details}, {system}, {user}, {keywords} as placeholders",
795
+ visible=False
796
+ )
797
+
798
+ export_button = gr.Button("Export Prompts")
799
+
800
+ with gr.Column():
801
+ export_status = gr.Textbox(label="Export Status", interactive=False)
802
+ export_file = gr.File(label="Download Export")
803
+
804
+ def update_ui_visibility(export_type, format_choice, template_choice):
805
+ """Update UI elements visibility based on selections"""
806
+ show_keywords = export_type == "Prompts by Keyword"
807
+ show_markdown_options = format_choice == "Markdown (ZIP)"
808
+ show_custom_template = template_choice == "Custom Template" and show_markdown_options
809
+
810
+ return [
811
+ gr.update(visible=show_keywords), # keyword_col
812
+ gr.update(visible=show_markdown_options), # markdown_options_col
813
+ gr.update(visible=show_custom_template) # custom_template
814
+ ]
815
+
816
+ def handle_export(export_type, keywords, export_format, options, markdown_template, custom_template):
817
+ """Handle the export process based on selected options"""
818
+ try:
819
+ # Parse options
820
+ include_system = "Include System Prompts" in options
821
+ include_user = "Include User Prompts" in options
822
+ include_details = "Include Details" in options
823
+ include_author = "Include Author" in options
824
+ include_keywords = "Include Keywords" in options
825
+
826
+ # Handle keyword filtering
827
+ keyword_list = None
828
+ if export_type == "Prompts by Keyword" and keywords:
829
+ keyword_list = [k.strip() for k in keywords.split(",") if k.strip()]
830
+
831
+ # Get the appropriate template
832
+ template = None
833
+ if export_format == "Markdown (ZIP)":
834
+ if markdown_template == "Custom Template":
835
+ template = custom_template
836
+ else:
837
+ template = markdown_template
838
+
839
+ # Perform export
840
+ from App_Function_Libraries.DB.Prompts_DB import export_prompts
841
+ status, file_path = export_prompts(
842
+ export_format=export_format.split()[0].lower(), # 'csv' or 'markdown'
843
+ filter_keywords=keyword_list,
844
+ include_system=include_system,
845
+ include_user=include_user,
846
+ include_details=include_details,
847
+ include_author=include_author,
848
+ include_keywords=include_keywords,
849
+ markdown_template=template
850
+ )
851
+
852
+ return status, file_path
853
+
854
+ except Exception as e:
855
+ error_msg = f"Export failed: {str(e)}"
856
+ logging.error(error_msg)
857
+ return error_msg, None
858
+
859
+ # Event handlers
860
+ export_type.change(
861
+ fn=lambda t, f, m: update_ui_visibility(t, f, m),
862
+ inputs=[export_type, export_format, markdown_template],
863
+ outputs=[keyword_col, markdown_options_col, custom_template]
864
+ )
865
+
866
+ export_format.change(
867
+ fn=lambda t, f, m: update_ui_visibility(t, f, m),
868
+ inputs=[export_type, export_format, markdown_template],
869
+ outputs=[keyword_col, markdown_options_col, custom_template]
870
+ )
871
+
872
+ markdown_template.change(
873
+ fn=lambda t, f, m: update_ui_visibility(t, f, m),
874
+ inputs=[export_type, export_format, markdown_template],
875
+ outputs=[keyword_col, markdown_options_col, custom_template]
876
+ )
877
+
878
+ export_button.click(
879
+ fn=handle_export,
880
+ inputs=[
881
+ export_type,
882
+ keyword_input,
883
+ export_format,
884
+ include_options,
885
+ markdown_template,
886
+ custom_template
887
+ ],
888
+ outputs=[export_status, export_file]
889
+ )
890
+
891
+ #
892
+ # End of Export_Functionality.py
893
+ ######################################################################################################################
894
 
App_Function_Libraries/Gradio_UI/Gradio_Shared.py CHANGED
@@ -216,11 +216,6 @@ def format_content(content):
216
  return formatted_content
217
 
218
 
219
- def update_prompt_dropdown():
220
- prompt_names = list_prompts()
221
- return gr.update(choices=prompt_names)
222
-
223
-
224
  def display_prompt_details(selected_prompt):
225
  if selected_prompt:
226
  prompts = update_user_prompt(selected_prompt)
 
216
  return formatted_content
217
 
218
 
 
 
 
 
 
219
  def display_prompt_details(selected_prompt):
220
  if selected_prompt:
221
  prompts = update_user_prompt(selected_prompt)
App_Function_Libraries/Gradio_UI/Import_Functionality.py CHANGED
@@ -2,24 +2,31 @@
2
  # Functionality to import content into the DB
3
  #
4
  # Imports
 
5
  from time import sleep
6
  import logging
7
  import re
8
  import shutil
9
  import tempfile
10
  import os
 
 
11
  import traceback
 
 
12
  import zipfile
13
  #
14
  # External Imports
15
  import gradio as gr
 
 
16
  #
17
  # Local Imports
18
- from App_Function_Libraries.DB.DB_Manager import insert_prompt_to_db, load_preset_prompts, import_obsidian_note_to_db, \
19
- add_media_to_database
20
  from App_Function_Libraries.Prompt_Handling import import_prompt_from_file, import_prompts_from_zip#
21
  from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
22
-
23
  ###################################################################################################################
24
  #
25
  # Functions:
@@ -203,15 +210,6 @@ def create_import_single_prompt_tab():
203
  outputs=save_output
204
  )
205
 
206
- def update_prompt_dropdown():
207
- return gr.update(choices=load_preset_prompts())
208
-
209
- save_button.click(
210
- fn=update_prompt_dropdown,
211
- inputs=[],
212
- outputs=[gr.Dropdown(label="Select Preset Prompt")]
213
- )
214
-
215
  def create_import_item_tab():
216
  with gr.TabItem("Import Markdown/Text Files", visible=True):
217
  gr.Markdown("# Import a markdown file or text file into the database")
@@ -250,11 +248,18 @@ def create_import_multiple_prompts_tab():
250
  gr.Markdown("# Import multiple prompts into the database")
251
  gr.Markdown("Upload a zip file containing multiple prompt files (txt or md)")
252
 
 
 
 
 
253
  with gr.Row():
254
  with gr.Column():
255
  zip_file = gr.File(label="Upload zip file for import", file_types=["zip"])
256
  import_button = gr.Button("Import Prompts")
257
  prompts_dropdown = gr.Dropdown(label="Select Prompt to Edit", choices=[])
 
 
 
258
  title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content")
259
  author_input = gr.Textbox(label="Author", placeholder="Enter the author's name")
260
  system_input = gr.Textbox(label="System", placeholder="Enter the system message for the prompt",
@@ -268,6 +273,10 @@ def create_import_multiple_prompts_tab():
268
  save_output = gr.Textbox(label="Save Status")
269
  prompts_display = gr.Textbox(label="Identified Prompts")
270
 
 
 
 
 
271
  def handle_zip_import(zip_file):
272
  result = import_prompts_from_zip(zip_file)
273
  if isinstance(result, list):
@@ -278,6 +287,13 @@ def create_import_multiple_prompts_tab():
278
  else:
279
  return gr.update(value=result), [], gr.update(value=""), []
280
 
 
 
 
 
 
 
 
281
  def handle_prompt_selection(selected_title, prompts):
282
  selected_prompt = next((prompt for prompt in prompts if prompt['title'] == selected_title), None)
283
  if selected_prompt:
@@ -305,23 +321,68 @@ def create_import_multiple_prompts_tab():
305
  outputs=[title_input, author_input, system_input, user_input, keywords_input]
306
  )
307
 
 
308
  def save_prompt_to_db(title, author, system, user, keywords):
309
  keyword_list = [k.strip() for k in keywords.split(',') if k.strip()]
310
- return insert_prompt_to_db(title, author, system, user, keyword_list)
 
311
 
312
  save_button.click(
313
  fn=save_prompt_to_db,
314
  inputs=[title_input, author_input, system_input, user_input, keywords_input],
315
- outputs=save_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  )
317
 
 
318
  def update_prompt_dropdown():
319
- return gr.update(choices=load_preset_prompts())
 
 
 
 
 
 
 
 
320
 
 
321
  save_button.click(
322
  fn=update_prompt_dropdown,
323
  inputs=[],
324
- outputs=[gr.Dropdown(label="Select Preset Prompt")]
325
  )
326
 
327
 
@@ -385,4 +446,392 @@ def import_obsidian_vault(vault_path, progress=gr.Progress()):
385
  except Exception as e:
386
  error_msg = f"Error scanning vault: {str(e)}\n{traceback.format_exc()}"
387
  logger.error(error_msg)
388
- return 0, 0, [error_msg]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # Functionality to import content into the DB
3
  #
4
  # Imports
5
+ from datetime import datetime
6
  from time import sleep
7
  import logging
8
  import re
9
  import shutil
10
  import tempfile
11
  import os
12
+ from pathlib import Path
13
+ import sqlite3
14
  import traceback
15
+ from typing import Optional, List, Dict, Tuple
16
+ import uuid
17
  import zipfile
18
  #
19
  # External Imports
20
  import gradio as gr
21
+ from chardet import detect
22
+
23
  #
24
  # Local Imports
25
+ from App_Function_Libraries.DB.DB_Manager import insert_prompt_to_db, import_obsidian_note_to_db, \
26
+ add_media_to_database, list_prompts
27
  from App_Function_Libraries.Prompt_Handling import import_prompt_from_file, import_prompts_from_zip#
28
  from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
29
+ #
30
  ###################################################################################################################
31
  #
32
  # Functions:
 
210
  outputs=save_output
211
  )
212
 
 
 
 
 
 
 
 
 
 
213
  def create_import_item_tab():
214
  with gr.TabItem("Import Markdown/Text Files", visible=True):
215
  gr.Markdown("# Import a markdown file or text file into the database")
 
248
  gr.Markdown("# Import multiple prompts into the database")
249
  gr.Markdown("Upload a zip file containing multiple prompt files (txt or md)")
250
 
251
+ # Initialize state variables for pagination
252
+ current_page_state = gr.State(value=1)
253
+ total_pages_state = gr.State(value=1)
254
+
255
  with gr.Row():
256
  with gr.Column():
257
  zip_file = gr.File(label="Upload zip file for import", file_types=["zip"])
258
  import_button = gr.Button("Import Prompts")
259
  prompts_dropdown = gr.Dropdown(label="Select Prompt to Edit", choices=[])
260
+ prev_page_button = gr.Button("Previous Page", visible=False)
261
+ page_display = gr.Markdown("Page 1 of X", visible=False)
262
+ next_page_button = gr.Button("Next Page", visible=False)
263
  title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content")
264
  author_input = gr.Textbox(label="Author", placeholder="Enter the author's name")
265
  system_input = gr.Textbox(label="System", placeholder="Enter the system message for the prompt",
 
273
  save_output = gr.Textbox(label="Save Status")
274
  prompts_display = gr.Textbox(label="Identified Prompts")
275
 
276
+ # State to store imported prompts
277
+ zip_import_state = gr.State([])
278
+
279
+ # Function to handle zip import
280
  def handle_zip_import(zip_file):
281
  result = import_prompts_from_zip(zip_file)
282
  if isinstance(result, list):
 
287
  else:
288
  return gr.update(value=result), [], gr.update(value=""), []
289
 
290
+ import_button.click(
291
+ fn=handle_zip_import,
292
+ inputs=[zip_file],
293
+ outputs=[import_output, prompts_dropdown, prompts_display, zip_import_state]
294
+ )
295
+
296
+ # Function to handle prompt selection from imported prompts
297
  def handle_prompt_selection(selected_title, prompts):
298
  selected_prompt = next((prompt for prompt in prompts if prompt['title'] == selected_title), None)
299
  if selected_prompt:
 
321
  outputs=[title_input, author_input, system_input, user_input, keywords_input]
322
  )
323
 
324
+ # Function to save prompt to the database
325
  def save_prompt_to_db(title, author, system, user, keywords):
326
  keyword_list = [k.strip() for k in keywords.split(',') if k.strip()]
327
+ result = insert_prompt_to_db(title, author, system, user, keyword_list)
328
+ return result
329
 
330
  save_button.click(
331
  fn=save_prompt_to_db,
332
  inputs=[title_input, author_input, system_input, user_input, keywords_input],
333
+ outputs=[save_output]
334
+ )
335
+
336
+ # Adding pagination controls to navigate prompts in the database
337
+ def on_prev_page_click(current_page, total_pages):
338
+ new_page = max(current_page - 1, 1)
339
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
340
+ page_display_text = f"Page {current_page} of {total_pages}"
341
+ return (
342
+ gr.update(choices=prompts),
343
+ gr.update(value=page_display_text),
344
+ current_page
345
+ )
346
+
347
+ def on_next_page_click(current_page, total_pages):
348
+ new_page = min(current_page + 1, total_pages)
349
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
350
+ page_display_text = f"Page {current_page} of {total_pages}"
351
+ return (
352
+ gr.update(choices=prompts),
353
+ gr.update(value=page_display_text),
354
+ current_page
355
+ )
356
+
357
+ prev_page_button.click(
358
+ fn=on_prev_page_click,
359
+ inputs=[current_page_state, total_pages_state],
360
+ outputs=[prompts_dropdown, page_display, current_page_state]
361
+ )
362
+
363
+ next_page_button.click(
364
+ fn=on_next_page_click,
365
+ inputs=[current_page_state, total_pages_state],
366
+ outputs=[prompts_dropdown, page_display, current_page_state]
367
  )
368
 
369
+ # Function to update prompts dropdown after saving to the database
370
  def update_prompt_dropdown():
371
+ prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
372
+ page_display_text = f"Page {current_page} of {total_pages}"
373
+ return (
374
+ gr.update(choices=prompts),
375
+ gr.update(visible=True),
376
+ gr.update(value=page_display_text, visible=True),
377
+ current_page,
378
+ total_pages
379
+ )
380
 
381
+ # Update the dropdown after saving
382
  save_button.click(
383
  fn=update_prompt_dropdown,
384
  inputs=[],
385
+ outputs=[prompts_dropdown, prev_page_button, page_display, current_page_state, total_pages_state]
386
  )
387
 
388
 
 
446
  except Exception as e:
447
  error_msg = f"Error scanning vault: {str(e)}\n{traceback.format_exc()}"
448
  logger.error(error_msg)
449
+ return 0, 0, [error_msg]
450
+
451
+
452
+ class RAGQABatchImporter:
453
+ def __init__(self, db_path: str):
454
+ self.db_path = Path(db_path)
455
+ self.setup_logging()
456
+ self.file_processor = FileProcessor()
457
+ self.zip_validator = ZipValidator()
458
+
459
+ def setup_logging(self):
460
+ logging.basicConfig(
461
+ level=logging.INFO,
462
+ format='%(asctime)s - %(levelname)s - %(message)s',
463
+ handlers=[
464
+ logging.FileHandler('rag_qa_import.log'),
465
+ logging.StreamHandler()
466
+ ]
467
+ )
468
+
469
+ def process_markdown_content(self, content: str) -> List[Dict[str, str]]:
470
+ """Process markdown content into a conversation format."""
471
+ messages = []
472
+ sections = content.split('\n\n')
473
+
474
+ for section in sections:
475
+ if section.strip():
476
+ messages.append({
477
+ 'role': 'user',
478
+ 'content': section.strip()
479
+ })
480
+
481
+ return messages
482
+
483
+ def process_keywords(self, db: sqlite3.Connection, conversation_id: str, keywords: str):
484
+ """Process and link keywords to a conversation."""
485
+ if not keywords:
486
+ return
487
+
488
+ keyword_list = [k.strip() for k in keywords.split(',')]
489
+ for keyword in keyword_list:
490
+ # Insert keyword if it doesn't exist
491
+ db.execute("""
492
+ INSERT OR IGNORE INTO rag_qa_keywords (keyword)
493
+ VALUES (?)
494
+ """, (keyword,))
495
+
496
+ # Get keyword ID
497
+ keyword_id = db.execute("""
498
+ SELECT id FROM rag_qa_keywords WHERE keyword = ?
499
+ """, (keyword,)).fetchone()[0]
500
+
501
+ # Link keyword to conversation
502
+ db.execute("""
503
+ INSERT INTO rag_qa_conversation_keywords
504
+ (conversation_id, keyword_id)
505
+ VALUES (?, ?)
506
+ """, (conversation_id, keyword_id))
507
+
508
+ def import_single_file(
509
+ self,
510
+ db: sqlite3.Connection,
511
+ content: str,
512
+ filename: str,
513
+ keywords: str,
514
+ custom_prompt: Optional[str] = None,
515
+ rating: Optional[int] = None
516
+ ) -> str:
517
+ """Import a single file's content into the database"""
518
+ conversation_id = str(uuid.uuid4())
519
+ current_time = datetime.now().isoformat()
520
+
521
+ # Process filename into title
522
+ title = FileProcessor.process_filename_to_title(filename)
523
+ if title.lower().endswith(('.md', '.txt')):
524
+ title = title[:-3] if title.lower().endswith('.md') else title[:-4]
525
+
526
+ # Insert conversation metadata
527
+ db.execute("""
528
+ INSERT INTO conversation_metadata
529
+ (conversation_id, created_at, last_updated, title, rating)
530
+ VALUES (?, ?, ?, ?, ?)
531
+ """, (conversation_id, current_time, current_time, title, rating))
532
+
533
+ # Process content and insert messages
534
+ messages = self.process_markdown_content(content)
535
+ for msg in messages:
536
+ db.execute("""
537
+ INSERT INTO rag_qa_chats
538
+ (conversation_id, timestamp, role, content)
539
+ VALUES (?, ?, ?, ?)
540
+ """, (conversation_id, current_time, msg['role'], msg['content']))
541
+
542
+ # Process keywords
543
+ self.process_keywords(db, conversation_id, keywords)
544
+
545
+ return conversation_id
546
+
547
+ def extract_zip(self, zip_path: str) -> List[Tuple[str, str]]:
548
+ """Extract and validate files from zip"""
549
+ is_valid, error_msg, valid_files = self.zip_validator.validate_zip_file(zip_path)
550
+ if not is_valid:
551
+ raise ValueError(error_msg)
552
+
553
+ files = []
554
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
555
+ for filename in valid_files:
556
+ with zip_ref.open(filename) as f:
557
+ content = f.read()
558
+ # Try to decode with detected encoding
559
+ try:
560
+ detected_encoding = detect(content)['encoding'] or 'utf-8'
561
+ content = content.decode(detected_encoding)
562
+ except UnicodeDecodeError:
563
+ content = content.decode('utf-8', errors='replace')
564
+
565
+ filename = os.path.basename(filename)
566
+ files.append((filename, content))
567
+ return files
568
+
569
+ def import_files(
570
+ self,
571
+ files: List[str],
572
+ keywords: str = "",
573
+ custom_prompt: Optional[str] = None,
574
+ rating: Optional[int] = None,
575
+ progress=gr.Progress()
576
+ ) -> Tuple[bool, str]:
577
+ """Import multiple files or zip files into the RAG QA database."""
578
+ try:
579
+ imported_files = []
580
+
581
+ with sqlite3.connect(self.db_path) as db:
582
+ # Process each file
583
+ for file_path in progress.tqdm(files, desc="Processing files"):
584
+ filename = os.path.basename(file_path)
585
+
586
+ # Handle zip files
587
+ if filename.lower().endswith('.zip'):
588
+ zip_files = self.extract_zip(file_path)
589
+ for zip_filename, content in progress.tqdm(zip_files, desc=f"Processing files from {filename}"):
590
+ conv_id = self.import_single_file(
591
+ db=db,
592
+ content=content,
593
+ filename=zip_filename,
594
+ keywords=keywords,
595
+ custom_prompt=custom_prompt,
596
+ rating=rating
597
+ )
598
+ imported_files.append(zip_filename)
599
+
600
+ # Handle individual markdown/text files
601
+ elif filename.lower().endswith(('.md', '.txt')):
602
+ with open(file_path, 'r', encoding='utf-8') as f:
603
+ content = f.read()
604
+
605
+ conv_id = self.import_single_file(
606
+ db=db,
607
+ content=content,
608
+ filename=filename,
609
+ keywords=keywords,
610
+ custom_prompt=custom_prompt,
611
+ rating=rating
612
+ )
613
+ imported_files.append(filename)
614
+
615
+ db.commit()
616
+
617
+ return True, f"Successfully imported {len(imported_files)} files:\n" + "\n".join(imported_files)
618
+
619
+ except Exception as e:
620
+ logging.error(f"Import failed: {str(e)}")
621
+ return False, f"Import failed: {str(e)}"
622
+
623
+
624
+ class FileProcessor:
625
+ """Handles file reading and name processing"""
626
+
627
+ VALID_EXTENSIONS = {'.md', '.txt', '.zip'}
628
+ ENCODINGS_TO_TRY = [
629
+ 'utf-8',
630
+ 'utf-16',
631
+ 'windows-1252',
632
+ 'iso-8859-1',
633
+ 'ascii'
634
+ ]
635
+
636
+ @staticmethod
637
+ def detect_encoding(file_path: str) -> str:
638
+ """Detect the file encoding using chardet"""
639
+ with open(file_path, 'rb') as file:
640
+ raw_data = file.read()
641
+ result = detect(raw_data)
642
+ return result['encoding'] or 'utf-8'
643
+
644
+ @staticmethod
645
+ def read_file_content(file_path: str) -> str:
646
+ """Read file content with automatic encoding detection"""
647
+ detected_encoding = FileProcessor.detect_encoding(file_path)
648
+
649
+ # Try detected encoding first
650
+ try:
651
+ with open(file_path, 'r', encoding=detected_encoding) as f:
652
+ return f.read()
653
+ except UnicodeDecodeError:
654
+ # If detected encoding fails, try others
655
+ for encoding in FileProcessor.ENCODINGS_TO_TRY:
656
+ try:
657
+ with open(file_path, 'r', encoding=encoding) as f:
658
+ return f.read()
659
+ except UnicodeDecodeError:
660
+ continue
661
+
662
+ # If all encodings fail, use utf-8 with error handling
663
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
664
+ return f.read()
665
+
666
+ @staticmethod
667
+ def process_filename_to_title(filename: str) -> str:
668
+ """Convert filename to a readable title"""
669
+ # Remove extension
670
+ name = os.path.splitext(filename)[0]
671
+
672
+ # Look for date patterns
673
+ date_pattern = r'(\d{4}[-_]?\d{2}[-_]?\d{2})'
674
+ date_match = re.search(date_pattern, name)
675
+ date_str = ""
676
+ if date_match:
677
+ try:
678
+ date = datetime.strptime(date_match.group(1).replace('_', '-'), '%Y-%m-%d')
679
+ date_str = date.strftime("%b %d, %Y")
680
+ name = name.replace(date_match.group(1), '').strip('-_')
681
+ except ValueError:
682
+ pass
683
+
684
+ # Replace separators with spaces
685
+ name = re.sub(r'[-_]+', ' ', name)
686
+
687
+ # Remove redundant spaces
688
+ name = re.sub(r'\s+', ' ', name).strip()
689
+
690
+ # Capitalize words, excluding certain words
691
+ exclude_words = {'a', 'an', 'the', 'in', 'on', 'at', 'to', 'for', 'of', 'with'}
692
+ words = name.split()
693
+ capitalized = []
694
+ for i, word in enumerate(words):
695
+ if i == 0 or word not in exclude_words:
696
+ capitalized.append(word.capitalize())
697
+ else:
698
+ capitalized.append(word.lower())
699
+ name = ' '.join(capitalized)
700
+
701
+ # Add date if found
702
+ if date_str:
703
+ name = f"{name} - {date_str}"
704
+
705
+ return name
706
+
707
+
708
+ class ZipValidator:
709
+ """Validates zip file contents and structure"""
710
+
711
+ MAX_ZIP_SIZE = 100 * 1024 * 1024 # 100MB
712
+ MAX_FILES = 100
713
+ VALID_EXTENSIONS = {'.md', '.txt'}
714
+
715
+ @staticmethod
716
+ def validate_zip_file(zip_path: str) -> Tuple[bool, str, List[str]]:
717
+ """
718
+ Validate zip file and its contents
719
+ Returns: (is_valid, error_message, valid_files)
720
+ """
721
+ try:
722
+ # Check zip file size
723
+ if os.path.getsize(zip_path) > ZipValidator.MAX_ZIP_SIZE:
724
+ return False, "Zip file too large (max 100MB)", []
725
+
726
+ valid_files = []
727
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
728
+ # Check number of files
729
+ if len(zip_ref.filelist) > ZipValidator.MAX_FILES:
730
+ return False, f"Too many files in zip (max {ZipValidator.MAX_FILES})", []
731
+
732
+ # Check for directory traversal attempts
733
+ for file_info in zip_ref.filelist:
734
+ if '..' in file_info.filename or file_info.filename.startswith('/'):
735
+ return False, "Invalid file paths detected", []
736
+
737
+ # Validate each file
738
+ total_size = 0
739
+ for file_info in zip_ref.filelist:
740
+ # Skip directories
741
+ if file_info.filename.endswith('/'):
742
+ continue
743
+
744
+ # Check file size
745
+ if file_info.file_size > ZipValidator.MAX_ZIP_SIZE:
746
+ return False, f"File {file_info.filename} too large", []
747
+
748
+ total_size += file_info.file_size
749
+ if total_size > ZipValidator.MAX_ZIP_SIZE:
750
+ return False, "Total uncompressed size too large", []
751
+
752
+ # Check file extension
753
+ ext = os.path.splitext(file_info.filename)[1].lower()
754
+ if ext in ZipValidator.VALID_EXTENSIONS:
755
+ valid_files.append(file_info.filename)
756
+
757
+ if not valid_files:
758
+ return False, "No valid markdown or text files found in zip", []
759
+
760
+ return True, "", valid_files
761
+
762
+ except zipfile.BadZipFile:
763
+ return False, "Invalid or corrupted zip file", []
764
+ except Exception as e:
765
+ return False, f"Error processing zip file: {str(e)}", []
766
+
767
+
768
+ def create_conversation_import_tab() -> gr.Tab:
769
+ """Create the import tab for the Gradio interface"""
770
+ with gr.Tab("Import RAG Chats") as tab:
771
+ gr.Markdown("# Import RAG Chats into the Database")
772
+ gr.Markdown("""
773
+ Import your RAG Chat markdown/text files individually or as a zip archive
774
+
775
+ Supported file types:
776
+ - Markdown (.md)
777
+ - Text (.txt)
778
+ - Zip archives containing .md or .txt files
779
+
780
+ Maximum zip file size: 100MB
781
+ Maximum files per zip: 100
782
+ """)
783
+ with gr.Row():
784
+ with gr.Column():
785
+ import_files = gr.File(
786
+ label="Upload Files",
787
+ file_types=["txt", "md", "zip"],
788
+ file_count="multiple"
789
+ )
790
+
791
+ keywords_input = gr.Textbox(
792
+ label="Keywords",
793
+ placeholder="Enter keywords to apply to all imported files (comma-separated)"
794
+ )
795
+
796
+ custom_prompt_input = gr.Textbox(
797
+ label="Custom Prompt",
798
+ placeholder="Enter a custom prompt for processing (optional)"
799
+ )
800
+
801
+ rating_input = gr.Slider(
802
+ minimum=1,
803
+ maximum=3,
804
+ step=1,
805
+ label="Rating (1-3)",
806
+ value=None
807
+ )
808
+
809
+ with gr.Column():
810
+ import_button = gr.Button("Import Files")
811
+ import_output = gr.Textbox(
812
+ label="Import Status",
813
+ lines=10
814
+ )
815
+
816
+ def handle_import(files, keywords, custom_prompt, rating):
817
+ importer = RAGQABatchImporter("rag_qa.db") # Update with your DB path
818
+ success, message = importer.import_files(
819
+ files=[f.name for f in files],
820
+ keywords=keywords,
821
+ custom_prompt=custom_prompt,
822
+ rating=rating
823
+ )
824
+ return message
825
+
826
+ import_button.click(
827
+ fn=handle_import,
828
+ inputs=[
829
+ import_files,
830
+ keywords_input,
831
+ custom_prompt_input,
832
+ rating_input
833
+ ],
834
+ outputs=import_output
835
+ )
836
+
837
+ return tab
App_Function_Libraries/Gradio_UI/Keywords.py CHANGED
@@ -4,22 +4,29 @@
4
  # The Keywords tab allows the user to add, delete, view, and export keywords from the database.
5
  #
6
  # Imports:
7
-
8
  #
9
  # External Imports
10
  import gradio as gr
 
 
 
11
  #
12
  # Internal Imports
13
  from App_Function_Libraries.DB.DB_Manager import add_keyword, delete_keyword, keywords_browser_interface, export_keywords_to_csv
14
- #
 
 
 
 
 
 
15
  #
16
  ######################################################################################################################
17
  #
18
  # Functions:
19
 
20
-
21
  def create_export_keywords_tab():
22
- with gr.TabItem("Export Keywords", visible=True):
23
  with gr.Row():
24
  with gr.Column():
25
  export_keywords_button = gr.Button("Export Keywords")
@@ -33,8 +40,8 @@ def create_export_keywords_tab():
33
  )
34
 
35
  def create_view_keywords_tab():
36
- with gr.TabItem("View Keywords", visible=True):
37
- gr.Markdown("# Browse Keywords")
38
  with gr.Column():
39
  browse_output = gr.Markdown()
40
  browse_button = gr.Button("View Existing Keywords")
@@ -42,7 +49,7 @@ def create_view_keywords_tab():
42
 
43
 
44
  def create_add_keyword_tab():
45
- with gr.TabItem("Add Keywords", visible=True):
46
  with gr.Row():
47
  with gr.Column():
48
  gr.Markdown("# Add Keywords to the Database")
@@ -54,7 +61,7 @@ def create_add_keyword_tab():
54
 
55
 
56
  def create_delete_keyword_tab():
57
- with gr.Tab("Delete Keywords", visible=True):
58
  with gr.Row():
59
  with gr.Column():
60
  gr.Markdown("# Delete Keywords from the Database")
@@ -63,3 +70,289 @@ def create_delete_keyword_tab():
63
  with gr.Row():
64
  delete_output = gr.Textbox(label="Result")
65
  delete_button.click(fn=delete_keyword, inputs=delete_input, outputs=delete_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  # The Keywords tab allows the user to add, delete, view, and export keywords from the database.
5
  #
6
  # Imports:
 
7
  #
8
  # External Imports
9
  import gradio as gr
10
+
11
+ from App_Function_Libraries.DB.Character_Chat_DB import view_char_keywords, add_char_keywords, delete_char_keyword, \
12
+ export_char_keywords_to_csv
13
  #
14
  # Internal Imports
15
  from App_Function_Libraries.DB.DB_Manager import add_keyword, delete_keyword, keywords_browser_interface, export_keywords_to_csv
16
+ from App_Function_Libraries.DB.Prompts_DB import view_prompt_keywords, delete_prompt_keyword, \
17
+ export_prompt_keywords_to_csv
18
+ from App_Function_Libraries.DB.RAG_QA_Chat_DB import view_rag_keywords, get_all_collections, \
19
+ get_keywords_for_collection, create_keyword_collection, add_keyword_to_collection, delete_rag_keyword, \
20
+ export_rag_keywords_to_csv
21
+
22
+
23
  #
24
  ######################################################################################################################
25
  #
26
  # Functions:
27
 
 
28
  def create_export_keywords_tab():
29
+ with gr.TabItem("Export MediaDB Keywords", visible=True):
30
  with gr.Row():
31
  with gr.Column():
32
  export_keywords_button = gr.Button("Export Keywords")
 
40
  )
41
 
42
  def create_view_keywords_tab():
43
+ with gr.TabItem("View MediaDB Keywords", visible=True):
44
+ gr.Markdown("# Browse MediaDB Keywords")
45
  with gr.Column():
46
  browse_output = gr.Markdown()
47
  browse_button = gr.Button("View Existing Keywords")
 
49
 
50
 
51
  def create_add_keyword_tab():
52
+ with gr.TabItem("Add MediaDB Keywords", visible=True):
53
  with gr.Row():
54
  with gr.Column():
55
  gr.Markdown("# Add Keywords to the Database")
 
61
 
62
 
63
  def create_delete_keyword_tab():
64
+ with gr.Tab("Delete MediaDB Keywords", visible=True):
65
  with gr.Row():
66
  with gr.Column():
67
  gr.Markdown("# Delete Keywords from the Database")
 
70
  with gr.Row():
71
  delete_output = gr.Textbox(label="Result")
72
  delete_button.click(fn=delete_keyword, inputs=delete_input, outputs=delete_output)
73
+
74
+ #
75
+ # End of Media DB Keyword tabs
76
+ ##########################################################
77
+
78
+
79
+ ############################################################
80
+ #
81
+ # Character DB Keyword functions
82
+
83
+ def create_character_keywords_tab():
84
+ """Creates the Character Keywords management tab"""
85
+ with gr.Tab("Character Keywords"):
86
+ gr.Markdown("# Character Keywords Management")
87
+
88
+ with gr.Tabs():
89
+ # View Character Keywords Tab
90
+ with gr.TabItem("View Keywords"):
91
+ with gr.Column():
92
+ refresh_char_keywords = gr.Button("Refresh Character Keywords")
93
+ char_keywords_output = gr.Markdown()
94
+ view_char_keywords()
95
+ refresh_char_keywords.click(
96
+ fn=view_char_keywords,
97
+ outputs=char_keywords_output
98
+ )
99
+
100
+ # Add Character Keywords Tab
101
+ with gr.TabItem("Add Keywords"):
102
+ with gr.Column():
103
+ char_name = gr.Textbox(label="Character Name")
104
+ new_keywords = gr.Textbox(label="New Keywords (comma-separated)")
105
+ add_char_keyword_btn = gr.Button("Add Keywords")
106
+ add_char_result = gr.Markdown()
107
+
108
+ add_char_keyword_btn.click(
109
+ fn=add_char_keywords,
110
+ inputs=[char_name, new_keywords],
111
+ outputs=add_char_result
112
+ )
113
+
114
+ # Delete Character Keywords Tab (New)
115
+ with gr.TabItem("Delete Keywords"):
116
+ with gr.Column():
117
+ delete_char_name = gr.Textbox(label="Character Name")
118
+ delete_char_keyword_input = gr.Textbox(label="Keyword to Delete")
119
+ delete_char_keyword_btn = gr.Button("Delete Keyword")
120
+ delete_char_result = gr.Markdown()
121
+
122
+ delete_char_keyword_btn.click(
123
+ fn=delete_char_keyword,
124
+ inputs=[delete_char_name, delete_char_keyword_input],
125
+ outputs=delete_char_result
126
+ )
127
+
128
+ # Export Character Keywords Tab (New)
129
+ with gr.TabItem("Export Keywords"):
130
+ with gr.Column():
131
+ export_char_keywords_btn = gr.Button("Export Character Keywords")
132
+ export_char_file = gr.File(label="Download Exported Keywords")
133
+ export_char_status = gr.Textbox(label="Export Status")
134
+
135
+ export_char_keywords_btn.click(
136
+ fn=export_char_keywords_to_csv,
137
+ outputs=[export_char_status, export_char_file]
138
+ )
139
+
140
+ #
141
+ # End of Character Keywords tab
142
+ ##########################################################
143
+
144
+ ############################################################
145
+ #
146
+ # RAG QA Keywords functions
147
+
148
+ def create_rag_qa_keywords_tab():
149
+ """Creates the RAG QA Keywords management tab"""
150
+ with gr.Tab("RAG QA Keywords"):
151
+ gr.Markdown("# RAG QA Keywords Management")
152
+
153
+ with gr.Tabs():
154
+ # View RAG QA Keywords Tab
155
+ with gr.TabItem("View Keywords"):
156
+ with gr.Column():
157
+ refresh_rag_keywords = gr.Button("Refresh RAG QA Keywords")
158
+ rag_keywords_output = gr.Markdown()
159
+
160
+ view_rag_keywords()
161
+
162
+ refresh_rag_keywords.click(
163
+ fn=view_rag_keywords,
164
+ outputs=rag_keywords_output
165
+ )
166
+
167
+ # Add RAG QA Keywords Tab
168
+ with gr.TabItem("Add Keywords"):
169
+ with gr.Column():
170
+ new_rag_keywords = gr.Textbox(label="New Keywords (comma-separated)")
171
+ add_rag_keyword_btn = gr.Button("Add Keywords")
172
+ add_rag_result = gr.Markdown()
173
+
174
+ add_rag_keyword_btn.click(
175
+ fn=add_keyword,
176
+ inputs=new_rag_keywords,
177
+ outputs=add_rag_result
178
+ )
179
+
180
+ # Delete RAG QA Keywords Tab (New)
181
+ with gr.TabItem("Delete Keywords"):
182
+ with gr.Column():
183
+ delete_rag_keyword_input = gr.Textbox(label="Keyword to Delete")
184
+ delete_rag_keyword_btn = gr.Button("Delete Keyword")
185
+ delete_rag_result = gr.Markdown()
186
+
187
+ delete_rag_keyword_btn.click(
188
+ fn=delete_rag_keyword,
189
+ inputs=delete_rag_keyword_input,
190
+ outputs=delete_rag_result
191
+ )
192
+
193
+ # Export RAG QA Keywords Tab (New)
194
+ with gr.TabItem("Export Keywords"):
195
+ with gr.Column():
196
+ export_rag_keywords_btn = gr.Button("Export RAG QA Keywords")
197
+ export_rag_file = gr.File(label="Download Exported Keywords")
198
+ export_rag_status = gr.Textbox(label="Export Status")
199
+
200
+ export_rag_keywords_btn.click(
201
+ fn=export_rag_keywords_to_csv,
202
+ outputs=[export_rag_status, export_rag_file]
203
+ )
204
+
205
+ #
206
+ # End of RAG QA Keywords tab
207
+ ##########################################################
208
+
209
+
210
+ ############################################################
211
+ #
212
+ # Prompt Keywords functions
213
+
214
+ def create_prompt_keywords_tab():
215
+ """Creates the Prompt Keywords management tab"""
216
+ with gr.Tab("Prompt Keywords"):
217
+ gr.Markdown("# Prompt Keywords Management")
218
+
219
+ with gr.Tabs():
220
+ # View Keywords Tab
221
+ with gr.TabItem("View Keywords"):
222
+ with gr.Column():
223
+ refresh_prompt_keywords = gr.Button("Refresh Prompt Keywords")
224
+ prompt_keywords_output = gr.Markdown()
225
+
226
+ refresh_prompt_keywords.click(
227
+ fn=view_prompt_keywords,
228
+ outputs=prompt_keywords_output
229
+ )
230
+
231
+ # Add Keywords Tab (using existing prompt management functions)
232
+ with gr.TabItem("Add Keywords"):
233
+ gr.Markdown("""
234
+ To add keywords to prompts, please use the Prompt Management interface.
235
+ Keywords can be added when creating or editing a prompt.
236
+ """)
237
+
238
+ # Delete Keywords Tab
239
+ with gr.TabItem("Delete Keywords"):
240
+ with gr.Column():
241
+ delete_prompt_keyword_input = gr.Textbox(label="Keyword to Delete")
242
+ delete_prompt_keyword_btn = gr.Button("Delete Keyword")
243
+ delete_prompt_result = gr.Markdown()
244
+
245
+ delete_prompt_keyword_btn.click(
246
+ fn=delete_prompt_keyword,
247
+ inputs=delete_prompt_keyword_input,
248
+ outputs=delete_prompt_result
249
+ )
250
+
251
+ # Export Keywords Tab
252
+ with gr.TabItem("Export Keywords"):
253
+ with gr.Column():
254
+ export_prompt_keywords_btn = gr.Button("Export Prompt Keywords")
255
+ export_prompt_status = gr.Textbox(label="Export Status", interactive=False)
256
+ export_prompt_file = gr.File(label="Download Exported Keywords", interactive=False)
257
+
258
+ def handle_export():
259
+ status, file_path = export_prompt_keywords_to_csv()
260
+ if file_path:
261
+ return status, file_path
262
+ return status, None
263
+
264
+ export_prompt_keywords_btn.click(
265
+ fn=handle_export,
266
+ outputs=[export_prompt_status, export_prompt_file]
267
+ )
268
+ #
269
+ # End of Prompt Keywords tab
270
+ ############################################################
271
+
272
+
273
+ ############################################################
274
+ #
275
+ # Meta-Keywords functions
276
+
277
+ def create_meta_keywords_tab():
278
+ """Creates the Meta-Keywords management tab"""
279
+ with gr.Tab("Meta-Keywords"):
280
+ gr.Markdown("# Meta-Keywords Management")
281
+
282
+ with gr.Tabs():
283
+ # View Meta-Keywords Tab
284
+ with gr.TabItem("View Collections"):
285
+ with gr.Column():
286
+ refresh_collections = gr.Button("Refresh Collections")
287
+ collections_output = gr.Markdown()
288
+
289
+ def view_collections():
290
+ try:
291
+ collections, _, _ = get_all_collections()
292
+ if collections:
293
+ result = "### Keyword Collections:\n"
294
+ for collection in collections:
295
+ keywords = get_keywords_for_collection(collection)
296
+ result += f"\n**{collection}**:\n"
297
+ result += "\n".join([f"- {k}" for k in keywords])
298
+ result += "\n"
299
+ return result
300
+ return "No collections found."
301
+ except Exception as e:
302
+ return f"Error retrieving collections: {str(e)}"
303
+
304
+ refresh_collections.click(
305
+ fn=view_collections,
306
+ outputs=collections_output
307
+ )
308
+
309
+ # Create Collection Tab
310
+ with gr.TabItem("Create Collection"):
311
+ with gr.Column():
312
+ collection_name = gr.Textbox(label="Collection Name")
313
+ create_collection_btn = gr.Button("Create Collection")
314
+ create_result = gr.Markdown()
315
+
316
+ def create_collection(name: str):
317
+ try:
318
+ create_keyword_collection(name)
319
+ return f"Successfully created collection: {name}"
320
+ except Exception as e:
321
+ return f"Error creating collection: {str(e)}"
322
+
323
+ create_collection_btn.click(
324
+ fn=create_collection,
325
+ inputs=collection_name,
326
+ outputs=create_result
327
+ )
328
+
329
+ # Add Keywords to Collection Tab
330
+ with gr.TabItem("Add to Collection"):
331
+ with gr.Column():
332
+ collection_select = gr.Textbox(label="Collection Name")
333
+ keywords_to_add = gr.Textbox(label="Keywords to Add (comma-separated)")
334
+ add_to_collection_btn = gr.Button("Add Keywords to Collection")
335
+ add_to_collection_result = gr.Markdown()
336
+
337
+ def add_keywords_to_collection(collection: str, keywords: str):
338
+ try:
339
+ keywords_list = [k.strip() for k in keywords.split(",") if k.strip()]
340
+ for keyword in keywords_list:
341
+ add_keyword_to_collection(collection, keyword)
342
+ return f"Successfully added {len(keywords_list)} keywords to collection {collection}"
343
+ except Exception as e:
344
+ return f"Error adding keywords to collection: {str(e)}"
345
+
346
+ add_to_collection_btn.click(
347
+ fn=add_keywords_to_collection,
348
+ inputs=[collection_select, keywords_to_add],
349
+ outputs=add_to_collection_result
350
+ )
351
+
352
+ #
353
+ # End of Meta-Keywords tab
354
+ ##########################################################
355
+
356
+ #
357
+ # End of Keywords.py
358
+ ######################################################################################################################
App_Function_Libraries/Gradio_UI/Live_Recording.py CHANGED
@@ -13,6 +13,8 @@ from App_Function_Libraries.Audio.Audio_Transcription_Lib import (record_audio,
13
  stop_recording)
14
  from App_Function_Libraries.DB.DB_Manager import add_media_to_database
15
  from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
 
 
16
  #
17
  #######################################################################################################################
18
  #
@@ -22,6 +24,16 @@ whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large",
22
  "distil-large-v2", "distil-medium.en", "distil-small.en"]
23
 
24
  def create_live_recording_tab():
 
 
 
 
 
 
 
 
 
 
25
  with gr.Tab("Live Recording and Transcription", visible=True):
26
  gr.Markdown("# Live Audio Recording and Transcription")
27
  with gr.Row():
@@ -34,6 +46,13 @@ def create_live_recording_tab():
34
  custom_title = gr.Textbox(label="Custom Title (for database)", visible=False)
35
  record_button = gr.Button("Start Recording")
36
  stop_button = gr.Button("Stop Recording")
 
 
 
 
 
 
 
37
  with gr.Column():
38
  output = gr.Textbox(label="Transcription", lines=10)
39
  audio_output = gr.Audio(label="Recorded Audio", visible=False)
 
13
  stop_recording)
14
  from App_Function_Libraries.DB.DB_Manager import add_media_to_database
15
  from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
16
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
17
+
18
  #
19
  #######################################################################################################################
20
  #
 
24
  "distil-large-v2", "distil-medium.en", "distil-small.en"]
25
 
26
  def create_live_recording_tab():
27
+ try:
28
+ default_value = None
29
+ if default_api_endpoint:
30
+ if default_api_endpoint in global_api_endpoints:
31
+ default_value = format_api_name(default_api_endpoint)
32
+ else:
33
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
34
+ except Exception as e:
35
+ logging.error(f"Error setting default API endpoint: {str(e)}")
36
+ default_value = None
37
  with gr.Tab("Live Recording and Transcription", visible=True):
38
  gr.Markdown("# Live Audio Recording and Transcription")
39
  with gr.Row():
 
46
  custom_title = gr.Textbox(label="Custom Title (for database)", visible=False)
47
  record_button = gr.Button("Start Recording")
48
  stop_button = gr.Button("Stop Recording")
49
+ # FIXME - Add a button to perform analysis/summarization on the transcription
50
+ # Refactored API selection dropdown
51
+ # api_name_input = gr.Dropdown(
52
+ # choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
53
+ # value=default_value,
54
+ # label="API for Summarization (Optional)"
55
+ # )
56
  with gr.Column():
57
  output = gr.Textbox(label="Transcription", lines=10)
58
  audio_output = gr.Audio(label="Recorded Audio", visible=False)
App_Function_Libraries/Gradio_UI/Llamafile_tab.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Llamafile_tab.py
2
+ # Description: Gradio interface for configuring and launching Llamafile with Local LLMs
3
+
4
+ # Imports
5
+ import os
6
+ import logging
7
+ from typing import Tuple, Optional
8
+ import gradio as gr
9
+
10
+
11
+ from App_Function_Libraries.Local_LLM.Local_LLM_Inference_Engine_Lib import (
12
+ download_llm_model,
13
+ llm_models,
14
+ start_llamafile,
15
+ get_gguf_llamafile_files
16
+ )
17
+ #
18
+ #######################################################################################################################
19
+ #
20
+ # Functions:
21
+
22
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
23
+ MODELS_DIR = os.path.join(BASE_DIR, "Models")
24
+
25
+ def create_chat_with_llamafile_tab():
26
+ # Function to update model path based on selection
27
+ def on_local_model_change(selected_model: str, search_directory: str) -> str:
28
+ if selected_model and isinstance(search_directory, str):
29
+ model_path = os.path.abspath(os.path.join(search_directory, selected_model))
30
+ logging.debug(f"Selected model path: {model_path}") # Debug print for selected model path
31
+ return model_path
32
+ return "Invalid selection or directory."
33
+
34
+ # Function to update the dropdown with available models
35
+ def update_dropdowns(search_directory: str) -> Tuple[dict, str]:
36
+ logging.debug(f"User-entered directory: {search_directory}") # Debug print for directory
37
+ if not os.path.isdir(search_directory):
38
+ logging.debug(f"Directory does not exist: {search_directory}") # Debug print for non-existing directory
39
+ return gr.update(choices=[], value=None), "Directory does not exist."
40
+
41
+ try:
42
+ logging.debug(f"Directory exists: {search_directory}, scanning for files...") # Confirm directory exists
43
+ model_files = get_gguf_llamafile_files(search_directory)
44
+ logging.debug("Completed scanning for model files.")
45
+ except Exception as e:
46
+ logging.error(f"Error scanning directory: {e}")
47
+ return gr.update(choices=[], value=None), f"Error scanning directory: {e}"
48
+
49
+ if not model_files:
50
+ logging.debug(f"No model files found in {search_directory}") # Debug print for no files found
51
+ return gr.update(choices=[], value=None), "No model files found in the specified directory."
52
+
53
+ # Update the dropdown choices with the model files found
54
+ logging.debug(f"Models loaded from {search_directory}: {model_files}") # Debug: Print model files loaded
55
+ return gr.update(choices=model_files, value=None), f"Models loaded from {search_directory}."
56
+
57
+
58
+
59
+ def download_preset_model(selected_model: str) -> Tuple[str, str]:
60
+ """
61
+ Downloads the selected preset model.
62
+
63
+ Args:
64
+ selected_model (str): The key of the selected preset model.
65
+
66
+ Returns:
67
+ Tuple[str, str]: Status message and the path to the downloaded model.
68
+ """
69
+ model_info = llm_models.get(selected_model)
70
+ if not model_info:
71
+ return "Invalid model selection.", ""
72
+
73
+ try:
74
+ model_path = download_llm_model(
75
+ model_name=model_info["name"],
76
+ model_url=model_info["url"],
77
+ model_filename=model_info["filename"],
78
+ model_hash=model_info["hash"]
79
+ )
80
+ return f"Model '{model_info['name']}' downloaded successfully.", model_path
81
+ except Exception as e:
82
+ logging.error(f"Error downloading model: {e}")
83
+ return f"Failed to download model: {e}", ""
84
+
85
+ with gr.TabItem("Local LLM with Llamafile", visible=True):
86
+ gr.Markdown("# Settings for Llamafile")
87
+
88
+ with gr.Row():
89
+ with gr.Column():
90
+ am_noob = gr.Checkbox(label="Enable Sane Defaults", value=False, visible=True)
91
+ advanced_mode_toggle = gr.Checkbox(label="Advanced Mode - Show All Settings", value=False)
92
+ # Advanced Inputs
93
+ verbose_checked = gr.Checkbox(label="Enable Verbose Output", value=False, visible=False)
94
+ threads_checked = gr.Checkbox(label="Set CPU Threads", value=False, visible=False)
95
+ threads_value = gr.Number(label="Number of CPU Threads", value=None, precision=0, visible=False)
96
+ threads_batched_checked = gr.Checkbox(label="Enable Batched Inference", value=False, visible=False)
97
+ threads_batched_value = gr.Number(label="Batch Size for Inference", value=None, precision=0, visible=False)
98
+ model_alias_checked = gr.Checkbox(label="Set Model Alias", value=False, visible=False)
99
+ model_alias_value = gr.Textbox(label="Model Alias", value="", visible=False)
100
+ ctx_size_checked = gr.Checkbox(label="Set Prompt Context Size", value=False, visible=False)
101
+ ctx_size_value = gr.Number(label="Prompt Context Size", value=8124, precision=0, visible=False)
102
+ ngl_checked = gr.Checkbox(label="Enable GPU Layers", value=False, visible=True)
103
+ ngl_value = gr.Number(label="Number of GPU Layers", value=None, precision=0, visible=True)
104
+ batch_size_checked = gr.Checkbox(label="Set Batch Size", value=False, visible=False)
105
+ batch_size_value = gr.Number(label="Batch Size", value=512, visible=False)
106
+ memory_f32_checked = gr.Checkbox(label="Use 32-bit Floating Point", value=False, visible=False)
107
+ numa_checked = gr.Checkbox(label="Enable NUMA", value=False, visible=False)
108
+ server_timeout_value = gr.Number(label="Server Timeout", value=600, precision=0, visible=False)
109
+ host_checked = gr.Checkbox(label="Set IP to Listen On", value=False, visible=False)
110
+ host_value = gr.Textbox(label="Host IP Address", value="", visible=False)
111
+ port_checked = gr.Checkbox(label="Set Server Port", value=False, visible=False)
112
+ port_value = gr.Number(label="Port Number", value=8080, precision=0, visible=False)
113
+ api_key_checked = gr.Checkbox(label="Set API Key", value=False, visible=False)
114
+ api_key_value = gr.Textbox(label="API Key", value="", visible=False)
115
+ http_threads_checked = gr.Checkbox(label="Set HTTP Server Threads", value=False, visible=False)
116
+ http_threads_value = gr.Number(label="Number of HTTP Server Threads", value=None, precision=0, visible=False)
117
+ hf_repo_checked = gr.Checkbox(label="Use Huggingface Repo Model", value=False, visible=False)
118
+ hf_repo_value = gr.Textbox(label="Huggingface Repo Name", value="", visible=False)
119
+ hf_file_checked = gr.Checkbox(label="Set Huggingface Model File", value=False, visible=False)
120
+ hf_file_value = gr.Textbox(label="Huggingface Model File", value="", visible=False)
121
+
122
+ with gr.Column():
123
+ # Model Selection Section
124
+ gr.Markdown("## Model Selection")
125
+
126
+ # Option 1: Select from Local Filesystem
127
+ with gr.Row():
128
+ search_directory = gr.Textbox(
129
+ label="Model Directory",
130
+ placeholder="Enter directory path (currently './Models')",
131
+ value=MODELS_DIR,
132
+ interactive=True
133
+ )
134
+
135
+ # Initial population of local models
136
+ initial_dropdown_update, _ = update_dropdowns(MODELS_DIR)
137
+ logging.debug(f"Scanning directory: {MODELS_DIR}")
138
+ refresh_button = gr.Button("Refresh Models")
139
+ local_model_dropdown = gr.Dropdown(
140
+ label="Select Model from Directory",
141
+ choices=initial_dropdown_update["choices"],
142
+ value=None
143
+ )
144
+ # Display selected model path
145
+ model_value = gr.Textbox(label="Selected Model File Path", value="", interactive=False)
146
+
147
+ # Option 2: Download Preset Models
148
+ gr.Markdown("## Download Preset Models")
149
+
150
+ preset_model_dropdown = gr.Dropdown(
151
+ label="Select a Preset Model",
152
+ choices=list(llm_models.keys()),
153
+ value=None,
154
+ interactive=True,
155
+ info="Choose a preset model to download."
156
+ )
157
+ download_preset_button = gr.Button("Download Selected Preset")
158
+
159
+ with gr.Row():
160
+ with gr.Column():
161
+ start_button = gr.Button("Start Llamafile")
162
+ stop_button = gr.Button("Stop Llamafile (doesn't work)")
163
+ output_display = gr.Markdown()
164
+
165
+
166
+ # Show/hide advanced inputs based on toggle
167
+ def update_visibility(show_advanced: bool):
168
+ components = [
169
+ verbose_checked, threads_checked, threads_value,
170
+ http_threads_checked, http_threads_value,
171
+ hf_repo_checked, hf_repo_value,
172
+ hf_file_checked, hf_file_value,
173
+ ctx_size_checked, ctx_size_value,
174
+ ngl_checked, ngl_value,
175
+ host_checked, host_value,
176
+ port_checked, port_value
177
+ ]
178
+ return [gr.update(visible=show_advanced) for _ in components]
179
+
180
+ def on_start_button_click(
181
+ am_noob: bool,
182
+ verbose_checked: bool,
183
+ threads_checked: bool,
184
+ threads_value: Optional[int],
185
+ threads_batched_checked: bool,
186
+ threads_batched_value: Optional[int],
187
+ model_alias_checked: bool,
188
+ model_alias_value: str,
189
+ http_threads_checked: bool,
190
+ http_threads_value: Optional[int],
191
+ model_value: str,
192
+ hf_repo_checked: bool,
193
+ hf_repo_value: str,
194
+ hf_file_checked: bool,
195
+ hf_file_value: str,
196
+ ctx_size_checked: bool,
197
+ ctx_size_value: Optional[int],
198
+ ngl_checked: bool,
199
+ ngl_value: Optional[int],
200
+ batch_size_checked: bool,
201
+ batch_size_value: Optional[int],
202
+ memory_f32_checked: bool,
203
+ numa_checked: bool,
204
+ server_timeout_value: Optional[int],
205
+ host_checked: bool,
206
+ host_value: str,
207
+ port_checked: bool,
208
+ port_value: Optional[int],
209
+ api_key_checked: bool,
210
+ api_key_value: str
211
+ ) -> str:
212
+ """
213
+ Event handler for the Start Llamafile button.
214
+ """
215
+ try:
216
+ result = start_llamafile(
217
+ am_noob,
218
+ verbose_checked,
219
+ threads_checked,
220
+ threads_value,
221
+ threads_batched_checked,
222
+ threads_batched_value,
223
+ model_alias_checked,
224
+ model_alias_value,
225
+ http_threads_checked,
226
+ http_threads_value,
227
+ model_value,
228
+ hf_repo_checked,
229
+ hf_repo_value,
230
+ hf_file_checked,
231
+ hf_file_value,
232
+ ctx_size_checked,
233
+ ctx_size_value,
234
+ ngl_checked,
235
+ ngl_value,
236
+ batch_size_checked,
237
+ batch_size_value,
238
+ memory_f32_checked,
239
+ numa_checked,
240
+ server_timeout_value,
241
+ host_checked,
242
+ host_value,
243
+ port_checked,
244
+ port_value,
245
+ api_key_checked,
246
+ api_key_value
247
+ )
248
+ return result
249
+ except Exception as e:
250
+ logging.error(f"Error starting Llamafile: {e}")
251
+ return f"Failed to start Llamafile: {e}"
252
+
253
+ advanced_mode_toggle.change(
254
+ fn=update_visibility,
255
+ inputs=[advanced_mode_toggle],
256
+ outputs=[
257
+ verbose_checked, threads_checked, threads_value,
258
+ http_threads_checked, http_threads_value,
259
+ hf_repo_checked, hf_repo_value,
260
+ hf_file_checked, hf_file_value,
261
+ ctx_size_checked, ctx_size_value,
262
+ ngl_checked, ngl_value,
263
+ host_checked, host_value,
264
+ port_checked, port_value
265
+ ]
266
+ )
267
+
268
+ start_button.click(
269
+ fn=on_start_button_click,
270
+ inputs=[
271
+ am_noob,
272
+ verbose_checked,
273
+ threads_checked,
274
+ threads_value,
275
+ threads_batched_checked,
276
+ threads_batched_value,
277
+ model_alias_checked,
278
+ model_alias_value,
279
+ http_threads_checked,
280
+ http_threads_value,
281
+ model_value,
282
+ hf_repo_checked,
283
+ hf_repo_value,
284
+ hf_file_checked,
285
+ hf_file_value,
286
+ ctx_size_checked,
287
+ ctx_size_value,
288
+ ngl_checked,
289
+ ngl_value,
290
+ batch_size_checked,
291
+ batch_size_value,
292
+ memory_f32_checked,
293
+ numa_checked,
294
+ server_timeout_value,
295
+ host_checked,
296
+ host_value,
297
+ port_checked,
298
+ port_value,
299
+ api_key_checked,
300
+ api_key_value
301
+ ],
302
+ outputs=output_display
303
+ )
304
+
305
+ download_preset_button.click(
306
+ fn=download_preset_model,
307
+ inputs=[preset_model_dropdown],
308
+ outputs=[output_display, model_value]
309
+ )
310
+
311
+ # Click event for refreshing models
312
+ refresh_button.click(
313
+ fn=update_dropdowns,
314
+ inputs=[search_directory], # Ensure that the directory path (string) is passed
315
+ outputs=[local_model_dropdown, output_display] # Update dropdown and status
316
+ )
317
+
318
+ # Event to update model_value when a model is selected from the dropdown
319
+ local_model_dropdown.change(
320
+ fn=on_local_model_change, # Function that calculates the model path
321
+ inputs=[local_model_dropdown, search_directory], # Inputs: selected model and directory
322
+ outputs=[model_value] # Output: Update the model_value textbox with the selected model path
323
+ )
324
+
325
+ #
326
+ #
327
+ #######################################################################################################################
App_Function_Libraries/Gradio_UI/Media_edit.py CHANGED
@@ -10,13 +10,13 @@ import gradio as gr
10
  #
11
  # Local Imports
12
  from App_Function_Libraries.DB.DB_Manager import add_prompt, update_media_content, db, add_or_update_prompt, \
13
- load_prompt_details, fetch_keywords_for_media, update_keywords_for_media
14
- from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_prompt_dropdown
15
  from App_Function_Libraries.DB.SQLite_DB import fetch_item_details
16
 
17
 
18
  def create_media_edit_tab():
19
- with gr.TabItem("Edit Existing Items", visible=True):
20
  gr.Markdown("# Search and Edit Media Items")
21
 
22
  with gr.Row():
@@ -89,7 +89,7 @@ def create_media_edit_tab():
89
 
90
 
91
  def create_media_edit_and_clone_tab():
92
- with gr.TabItem("Clone and Edit Existing Items", visible=True):
93
  gr.Markdown("# Search, Edit, and Clone Existing Items")
94
 
95
  with gr.Row():
@@ -199,6 +199,11 @@ def create_media_edit_and_clone_tab():
199
 
200
 
201
  def create_prompt_edit_tab():
 
 
 
 
 
202
  with gr.TabItem("Add & Edit Prompts", visible=True):
203
  with gr.Row():
204
  with gr.Column():
@@ -207,38 +212,145 @@ def create_prompt_edit_tab():
207
  choices=[],
208
  interactive=True
209
  )
 
 
 
210
  prompt_list_button = gr.Button("List Prompts")
211
 
212
  with gr.Column():
213
  title_input = gr.Textbox(label="Title", placeholder="Enter the prompt title")
214
- author_input = gr.Textbox(label="Author", placeholder="Enter the prompt's author", lines=3)
215
  description_input = gr.Textbox(label="Description", placeholder="Enter the prompt description", lines=3)
216
  system_prompt_input = gr.Textbox(label="System Prompt", placeholder="Enter the system prompt", lines=3)
217
  user_prompt_input = gr.Textbox(label="User Prompt", placeholder="Enter the user prompt", lines=3)
218
  add_prompt_button = gr.Button("Add/Update Prompt")
219
  add_prompt_output = gr.HTML()
220
 
221
- # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  prompt_list_button.click(
223
  fn=update_prompt_dropdown,
224
- outputs=prompt_dropdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  )
226
 
 
227
  add_prompt_button.click(
228
  fn=add_or_update_prompt,
229
  inputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input],
230
- outputs=add_prompt_output
 
 
 
 
 
 
 
 
 
 
 
231
  )
232
 
233
- # Load prompt details when selected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  prompt_dropdown.change(
235
  fn=load_prompt_details,
236
  inputs=[prompt_dropdown],
237
- outputs=[title_input, author_input, system_prompt_input, user_prompt_input]
 
 
 
 
 
 
238
  )
239
 
240
 
 
241
  def create_prompt_clone_tab():
 
 
 
 
 
242
  with gr.TabItem("Clone and Edit Prompts", visible=True):
243
  with gr.Row():
244
  with gr.Column():
@@ -248,6 +360,9 @@ def create_prompt_clone_tab():
248
  choices=[],
249
  interactive=True
250
  )
 
 
 
251
  prompt_list_button = gr.Button("List Prompts")
252
 
253
  with gr.Column():
@@ -260,19 +375,99 @@ def create_prompt_clone_tab():
260
  save_cloned_prompt_button = gr.Button("Save Cloned Prompt", visible=False)
261
  add_prompt_output = gr.HTML()
262
 
263
- # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  prompt_list_button.click(
265
  fn=update_prompt_dropdown,
266
- outputs=prompt_dropdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  )
268
 
269
  # Load prompt details when selected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  prompt_dropdown.change(
271
  fn=load_prompt_details,
272
  inputs=[prompt_dropdown],
273
  outputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input]
274
  )
275
 
 
276
  def prepare_for_cloning(selected_prompt):
277
  if selected_prompt:
278
  return gr.update(value=f"Copy of {selected_prompt}"), gr.update(visible=True)
@@ -284,18 +479,21 @@ def create_prompt_clone_tab():
284
  outputs=[title_input, save_cloned_prompt_button]
285
  )
286
 
287
- def save_cloned_prompt(title, description, system_prompt, user_prompt):
 
288
  try:
289
- result = add_prompt(title, description, system_prompt, user_prompt)
290
  if result == "Prompt added successfully.":
291
- return result, gr.update(choices=update_prompt_dropdown())
 
 
292
  else:
293
- return result, gr.update()
294
  except Exception as e:
295
- return f"Error saving cloned prompt: {str(e)}", gr.update()
296
 
297
  save_cloned_prompt_button.click(
298
  fn=save_cloned_prompt,
299
- inputs=[title_input, description_input, system_prompt_input, user_prompt_input],
300
- outputs=[add_prompt_output, prompt_dropdown]
301
- )
 
10
  #
11
  # Local Imports
12
  from App_Function_Libraries.DB.DB_Manager import add_prompt, update_media_content, db, add_or_update_prompt, \
13
+ load_prompt_details, fetch_keywords_for_media, update_keywords_for_media, fetch_prompt_details, list_prompts
14
+ from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown
15
  from App_Function_Libraries.DB.SQLite_DB import fetch_item_details
16
 
17
 
18
  def create_media_edit_tab():
19
+ with gr.TabItem("Edit Existing Items in the Media DB", visible=True):
20
  gr.Markdown("# Search and Edit Media Items")
21
 
22
  with gr.Row():
 
89
 
90
 
91
  def create_media_edit_and_clone_tab():
92
+ with gr.TabItem("Clone and Edit Existing Items in the Media DB", visible=True):
93
  gr.Markdown("# Search, Edit, and Clone Existing Items")
94
 
95
  with gr.Row():
 
199
 
200
 
201
  def create_prompt_edit_tab():
202
+ # Initialize state variables for pagination
203
+ current_page_state = gr.State(value=1)
204
+ total_pages_state = gr.State(value=1)
205
+ per_page = 10 # Number of prompts per page
206
+
207
  with gr.TabItem("Add & Edit Prompts", visible=True):
208
  with gr.Row():
209
  with gr.Column():
 
212
  choices=[],
213
  interactive=True
214
  )
215
+ next_page_button = gr.Button("Next Page", visible=False)
216
+ page_display = gr.Markdown("Page 1 of X", visible=False)
217
+ prev_page_button = gr.Button("Previous Page", visible=False)
218
  prompt_list_button = gr.Button("List Prompts")
219
 
220
  with gr.Column():
221
  title_input = gr.Textbox(label="Title", placeholder="Enter the prompt title")
222
+ author_input = gr.Textbox(label="Author", placeholder="Enter the prompt's author", lines=1)
223
  description_input = gr.Textbox(label="Description", placeholder="Enter the prompt description", lines=3)
224
  system_prompt_input = gr.Textbox(label="System Prompt", placeholder="Enter the system prompt", lines=3)
225
  user_prompt_input = gr.Textbox(label="User Prompt", placeholder="Enter the user prompt", lines=3)
226
  add_prompt_button = gr.Button("Add/Update Prompt")
227
  add_prompt_output = gr.HTML()
228
 
229
+ # Function to update the prompt dropdown with pagination
230
+ def update_prompt_dropdown(page=1):
231
+ prompts, total_pages, current_page = list_prompts(page=page, per_page=per_page)
232
+ page_display_text = f"Page {current_page} of {total_pages}"
233
+ prev_button_visible = current_page > 1
234
+ next_button_visible = current_page < total_pages
235
+ return (
236
+ gr.update(choices=prompts),
237
+ gr.update(value=page_display_text, visible=True),
238
+ gr.update(visible=prev_button_visible),
239
+ gr.update(visible=next_button_visible),
240
+ current_page,
241
+ total_pages
242
+ )
243
+
244
+ # Event handler for listing prompts
245
  prompt_list_button.click(
246
  fn=update_prompt_dropdown,
247
+ inputs=[],
248
+ outputs=[
249
+ prompt_dropdown,
250
+ page_display,
251
+ prev_page_button,
252
+ next_page_button,
253
+ current_page_state,
254
+ total_pages_state
255
+ ]
256
+ )
257
+
258
+ # Functions to handle pagination
259
+ def on_prev_page_click(current_page):
260
+ new_page = max(current_page - 1, 1)
261
+ return update_prompt_dropdown(page=new_page)
262
+
263
+ def on_next_page_click(current_page, total_pages):
264
+ new_page = min(current_page + 1, total_pages)
265
+ return update_prompt_dropdown(page=new_page)
266
+
267
+ # Event handlers for pagination buttons
268
+ prev_page_button.click(
269
+ fn=on_prev_page_click,
270
+ inputs=[current_page_state],
271
+ outputs=[
272
+ prompt_dropdown,
273
+ page_display,
274
+ prev_page_button,
275
+ next_page_button,
276
+ current_page_state,
277
+ total_pages_state
278
+ ]
279
+ )
280
+
281
+ next_page_button.click(
282
+ fn=on_next_page_click,
283
+ inputs=[current_page_state, total_pages_state],
284
+ outputs=[
285
+ prompt_dropdown,
286
+ page_display,
287
+ prev_page_button,
288
+ next_page_button,
289
+ current_page_state,
290
+ total_pages_state
291
+ ]
292
  )
293
 
294
+ # Event handler for adding or updating a prompt
295
  add_prompt_button.click(
296
  fn=add_or_update_prompt,
297
  inputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input],
298
+ outputs=[add_prompt_output]
299
+ ).then(
300
+ fn=update_prompt_dropdown,
301
+ inputs=[],
302
+ outputs=[
303
+ prompt_dropdown,
304
+ page_display,
305
+ prev_page_button,
306
+ next_page_button,
307
+ current_page_state,
308
+ total_pages_state
309
+ ]
310
  )
311
 
312
+ # Function to load prompt details when a prompt is selected
313
+ def load_prompt_details(selected_prompt):
314
+ details = fetch_prompt_details(selected_prompt)
315
+ if details:
316
+ title, author, description, system_prompt, user_prompt, keywords = details
317
+ return (
318
+ gr.update(value=title),
319
+ gr.update(value=author or ""),
320
+ gr.update(value=description or ""),
321
+ gr.update(value=system_prompt or ""),
322
+ gr.update(value=user_prompt or "")
323
+ )
324
+ else:
325
+ return (
326
+ gr.update(value=""),
327
+ gr.update(value=""),
328
+ gr.update(value=""),
329
+ gr.update(value=""),
330
+ gr.update(value="")
331
+ )
332
+
333
+ # Event handler for prompt selection change
334
  prompt_dropdown.change(
335
  fn=load_prompt_details,
336
  inputs=[prompt_dropdown],
337
+ outputs=[
338
+ title_input,
339
+ author_input,
340
+ description_input,
341
+ system_prompt_input,
342
+ user_prompt_input
343
+ ]
344
  )
345
 
346
 
347
+
348
  def create_prompt_clone_tab():
349
+ # Initialize state variables for pagination
350
+ current_page_state = gr.State(value=1)
351
+ total_pages_state = gr.State(value=1)
352
+ per_page = 10 # Number of prompts per page
353
+
354
  with gr.TabItem("Clone and Edit Prompts", visible=True):
355
  with gr.Row():
356
  with gr.Column():
 
360
  choices=[],
361
  interactive=True
362
  )
363
+ next_page_button = gr.Button("Next Page", visible=False)
364
+ page_display = gr.Markdown("Page 1 of X", visible=False)
365
+ prev_page_button = gr.Button("Previous Page", visible=False)
366
  prompt_list_button = gr.Button("List Prompts")
367
 
368
  with gr.Column():
 
375
  save_cloned_prompt_button = gr.Button("Save Cloned Prompt", visible=False)
376
  add_prompt_output = gr.HTML()
377
 
378
+ # Function to update the prompt dropdown with pagination
379
+ def update_prompt_dropdown(page=1):
380
+ prompts, total_pages, current_page = list_prompts(page=page, per_page=per_page)
381
+ page_display_text = f"Page {current_page} of {total_pages}"
382
+ prev_button_visible = current_page > 1
383
+ next_button_visible = current_page < total_pages
384
+ return (
385
+ gr.update(choices=prompts),
386
+ gr.update(value=page_display_text, visible=True),
387
+ gr.update(visible=prev_button_visible),
388
+ gr.update(visible=next_button_visible),
389
+ current_page,
390
+ total_pages
391
+ )
392
+
393
+ # Event handler for listing prompts
394
  prompt_list_button.click(
395
  fn=update_prompt_dropdown,
396
+ inputs=[],
397
+ outputs=[
398
+ prompt_dropdown,
399
+ page_display,
400
+ prev_page_button,
401
+ next_page_button,
402
+ current_page_state,
403
+ total_pages_state
404
+ ]
405
+ )
406
+
407
+ # Functions to handle pagination
408
+ def on_prev_page_click(current_page):
409
+ new_page = max(current_page - 1, 1)
410
+ return update_prompt_dropdown(page=new_page)
411
+
412
+ def on_next_page_click(current_page, total_pages):
413
+ new_page = min(current_page + 1, total_pages)
414
+ return update_prompt_dropdown(page=new_page)
415
+
416
+ # Event handlers for pagination buttons
417
+ prev_page_button.click(
418
+ fn=on_prev_page_click,
419
+ inputs=[current_page_state],
420
+ outputs=[
421
+ prompt_dropdown,
422
+ page_display,
423
+ prev_page_button,
424
+ next_page_button,
425
+ current_page_state,
426
+ total_pages_state
427
+ ]
428
+ )
429
+
430
+ next_page_button.click(
431
+ fn=on_next_page_click,
432
+ inputs=[current_page_state, total_pages_state],
433
+ outputs=[
434
+ prompt_dropdown,
435
+ page_display,
436
+ prev_page_button,
437
+ next_page_button,
438
+ current_page_state,
439
+ total_pages_state
440
+ ]
441
  )
442
 
443
  # Load prompt details when selected
444
+ def load_prompt_details(selected_prompt):
445
+ if selected_prompt:
446
+ details = fetch_prompt_details(selected_prompt)
447
+ if details:
448
+ title, author, description, system_prompt, user_prompt, keywords = details
449
+ return (
450
+ gr.update(value=title),
451
+ gr.update(value=author or ""),
452
+ gr.update(value=description or ""),
453
+ gr.update(value=system_prompt or ""),
454
+ gr.update(value=user_prompt or "")
455
+ )
456
+ return (
457
+ gr.update(value=""),
458
+ gr.update(value=""),
459
+ gr.update(value=""),
460
+ gr.update(value=""),
461
+ gr.update(value="")
462
+ )
463
+
464
  prompt_dropdown.change(
465
  fn=load_prompt_details,
466
  inputs=[prompt_dropdown],
467
  outputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input]
468
  )
469
 
470
+ # Prepare for cloning
471
  def prepare_for_cloning(selected_prompt):
472
  if selected_prompt:
473
  return gr.update(value=f"Copy of {selected_prompt}"), gr.update(visible=True)
 
479
  outputs=[title_input, save_cloned_prompt_button]
480
  )
481
 
482
+ # Function to save cloned prompt
483
+ def save_cloned_prompt(title, author, description, system_prompt, user_prompt, current_page):
484
  try:
485
+ result = add_prompt(title, author, description, system_prompt, user_prompt)
486
  if result == "Prompt added successfully.":
487
+ # After adding, refresh the prompt dropdown
488
+ prompt_dropdown_update = update_prompt_dropdown(page=current_page)
489
+ return (result, *prompt_dropdown_update)
490
  else:
491
+ return (result, gr.update(), gr.update(), gr.update(), gr.update(), current_page, total_pages_state.value)
492
  except Exception as e:
493
+ return (f"Error saving cloned prompt: {str(e)}", gr.update(), gr.update(), gr.update(), gr.update(), current_page, total_pages_state.value)
494
 
495
  save_cloned_prompt_button.click(
496
  fn=save_cloned_prompt,
497
+ inputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input, current_page_state],
498
+ outputs=[add_prompt_output, prompt_dropdown, page_display, prev_page_button, next_page_button, current_page_state, total_pages_state]
499
+ )
App_Function_Libraries/Gradio_UI/Media_wiki_tab.py CHANGED
@@ -32,6 +32,13 @@ def create_mediawiki_import_tab():
32
  value="sentences",
33
  label="Chunking Method"
34
  )
 
 
 
 
 
 
 
35
  chunk_size = gr.Slider(minimum=100, maximum=2000, value=1000, step=100, label="Chunk Size")
36
  chunk_overlap = gr.Slider(minimum=0, maximum=500, value=100, step=10, label="Chunk Overlap")
37
  # FIXME - Add checkbox for 'Enable Summarization upon ingestion' for API summarization of chunks
 
32
  value="sentences",
33
  label="Chunking Method"
34
  )
35
+ # FIXME - add API selection dropdown + Analysis/Summarization options
36
+ # Refactored API selection dropdown
37
+ # api_name_input = gr.Dropdown(
38
+ # choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
39
+ # value=default_value,
40
+ # label="API for Summarization (Optional)"
41
+ # )
42
  chunk_size = gr.Slider(minimum=100, maximum=2000, value=1000, step=100, label="Chunk Size")
43
  chunk_overlap = gr.Slider(minimum=0, maximum=500, value=100, step=10, label="Chunk Overlap")
44
  # FIXME - Add checkbox for 'Enable Summarization upon ingestion' for API summarization of chunks
App_Function_Libraries/Gradio_UI/Mind_Map_tab.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Mind_Map_tab.py
2
+ # Description: File contains functions for generation of PlantUML mindmaps for the gradio tab
3
+ #
4
+ # Imports
5
+ import re
6
+ #
7
+ # External Libraries
8
+ import gradio as gr
9
+ #
10
+ ######################################################################################################################
11
+ #
12
+ # Functions:
13
+
14
+ def parse_plantuml_mindmap(plantuml_text: str) -> dict:
15
+ """Parse PlantUML mindmap syntax into a nested dictionary structure"""
16
+ lines = [line.strip() for line in plantuml_text.split('\n')
17
+ if line.strip() and not line.strip().startswith('@')]
18
+
19
+ root = None
20
+ nodes = []
21
+ stack = []
22
+
23
+ for line in lines:
24
+ level_match = re.match(r'^([+\-*]+|\*+)', line)
25
+ if not level_match:
26
+ continue
27
+ level = len(level_match.group(0))
28
+ text = re.sub(r'^([+\-*]+|\*+)\s*', '', line).strip('[]').strip('()')
29
+ node = {'text': text, 'children': []}
30
+
31
+ while stack and stack[-1][0] >= level:
32
+ stack.pop()
33
+
34
+ if stack:
35
+ stack[-1][1]['children'].append(node)
36
+ else:
37
+ root = node
38
+
39
+ stack.append((level, node))
40
+
41
+ return root
42
+
43
+ def create_mindmap_html(plantuml_text: str) -> str:
44
+ """Convert PlantUML mindmap to HTML visualization with collapsible nodes using CSS only"""
45
+ # Parse the mindmap text into a nested structure
46
+ root_node = parse_plantuml_mindmap(plantuml_text)
47
+ if not root_node:
48
+ return "<p>No valid mindmap content provided.</p>"
49
+
50
+ html = "<style>"
51
+ html += """
52
+ details {
53
+ margin-left: 20px;
54
+ }
55
+ summary {
56
+ cursor: pointer;
57
+ padding: 5px;
58
+ border: 1px solid #333;
59
+ border-radius: 3px;
60
+ background-color: #e6f3ff;
61
+ }
62
+ .mindmap-node {
63
+ margin-left: 20px;
64
+ padding: 5px;
65
+ border: 1px solid #333;
66
+ border-radius: 3px;
67
+ }
68
+ """
69
+ html += "</style>"
70
+
71
+ colors = ['#e6f3ff', '#f0f7ff', '#f5f5f5', '#fff0f0', '#f0fff0']
72
+
73
+ def create_node_html(node, level):
74
+ bg_color = colors[(level - 1) % len(colors)]
75
+ if node['children']:
76
+ children_html = ''.join(create_node_html(child, level + 1) for child in node['children'])
77
+ return f"""
78
+ <details open>
79
+ <summary style="background-color: {bg_color};">{node['text']}</summary>
80
+ {children_html}
81
+ </details>
82
+ """
83
+ else:
84
+ return f"""
85
+ <div class="mindmap-node" style="background-color: {bg_color}; margin-left: {level * 20}px;">
86
+ {node['text']}
87
+ </div>
88
+ """
89
+
90
+ html += create_node_html(root_node, level=1)
91
+ return html
92
+
93
+ # Create Gradio interface
94
+ def create_mindmap_tab():
95
+ with gr.TabItem("PlantUML Mindmap"):
96
+ gr.Markdown("# Collapsible PlantUML Mindmap Visualizer")
97
+ gr.Markdown("Convert PlantUML mindmap syntax to a visual mindmap with collapsible nodes.")
98
+ plantuml_input = gr.Textbox(
99
+ lines=15,
100
+ label="Enter PlantUML mindmap",
101
+ placeholder="""@startmindmap
102
+ * Project Planning
103
+ ** Requirements
104
+ *** Functional Requirements
105
+ **** User Interface
106
+ **** Backend Services
107
+ *** Technical Requirements
108
+ **** Performance
109
+ **** Security
110
+ ** Timeline
111
+ *** Phase 1
112
+ *** Phase 2
113
+ ** Resources
114
+ *** Team
115
+ *** Budget
116
+ @endmindmap"""
117
+ )
118
+ submit_btn = gr.Button("Generate Mindmap")
119
+ mindmap_output = gr.HTML(label="Mindmap Output")
120
+ submit_btn.click(
121
+ fn=create_mindmap_html,
122
+ inputs=plantuml_input,
123
+ outputs=mindmap_output
124
+ )
125
+
126
+ #
127
+ # End of Mind_Map_tab.py
128
+ ######################################################################################################################
App_Function_Libraries/Gradio_UI/PDF_ingestion_tab.py CHANGED
@@ -8,9 +8,12 @@ import tempfile
8
  #
9
  # External Imports
10
  import gradio as gr
 
 
 
11
  #
12
  # Local Imports
13
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
14
  from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
15
  from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_metadata_from_pdf, extract_text_and_format_from_pdf, \
16
  process_and_cleanup_pdf
@@ -22,92 +25,258 @@ from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_metadata_from_p
22
 
23
  def create_pdf_ingestion_tab():
24
  with gr.TabItem("PDF Ingestion", visible=True):
25
- # TODO - Add functionality to extract metadata from pdf as part of conversion process in marker
26
  gr.Markdown("# Ingest PDF Files and Extract Metadata")
27
  with gr.Row():
28
  with gr.Column():
29
- pdf_file_input = gr.File(label="Uploaded PDF File", file_types=[".pdf"], visible=True)
30
- pdf_upload_button = gr.UploadButton("Click to Upload PDF", file_types=[".pdf"])
31
- pdf_title_input = gr.Textbox(label="Title (Optional)")
32
- pdf_author_input = gr.Textbox(label="Author (Optional)")
33
- pdf_keywords_input = gr.Textbox(label="Keywords (Optional, comma-separated)")
34
- with gr.Row():
35
- custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
36
- value=False,
37
- visible=True)
38
- preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
39
- value=False,
40
- visible=True)
41
- with gr.Row():
42
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
43
- choices=load_preset_prompts(),
44
- visible=False)
45
- with gr.Row():
46
- custom_prompt_input = gr.Textbox(label="Custom Prompt",
47
- placeholder="Enter custom prompt here",
48
- lines=3,
49
- visible=False)
50
- with gr.Row():
51
- system_prompt_input = gr.Textbox(label="System Prompt",
52
- value="""
53
- <s>You are a bulleted notes specialist.
54
- [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
55
- **Bulleted Note Creation Guidelines**
56
-
57
- **Headings**:
58
- - Based on referenced topics, not categories like quotes or terms
59
- - Surrounded by **bold** formatting
60
- - Not listed as bullet points
61
- - No space between headings and list items underneath
62
-
63
- **Emphasis**:
64
- - **Important terms** set in bold font
65
- - **Text ending in a colon**: also bolded
66
-
67
- **Review**:
68
- - Ensure adherence to specified format
69
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]""",
70
- lines=3,
71
- visible=False)
72
-
73
- custom_prompt_checkbox.change(
74
- fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
75
- inputs=[custom_prompt_checkbox],
76
- outputs=[custom_prompt_input, system_prompt_input]
77
  )
78
- preset_prompt_checkbox.change(
79
- fn=lambda x: gr.update(visible=x),
80
- inputs=[preset_prompt_checkbox],
81
- outputs=[preset_prompt]
82
  )
83
-
84
- def update_prompts(preset_name):
85
- prompts = update_user_prompt(preset_name)
86
- return (
87
- gr.update(value=prompts["user_prompt"], visible=True),
88
- gr.update(value=prompts["system_prompt"], visible=True)
89
- )
90
-
91
- preset_prompt.change(
92
- update_prompts,
93
- inputs=preset_prompt,
94
- outputs=[custom_prompt_input, system_prompt_input]
95
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
- pdf_ingest_button = gr.Button("Ingest PDF")
 
 
 
 
 
 
 
98
 
99
- pdf_upload_button.upload(fn=lambda file: file, inputs=pdf_upload_button, outputs=pdf_file_input)
100
  with gr.Column():
101
- pdf_result_output = gr.Textbox(label="Result")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  pdf_ingest_button.click(
104
- fn=process_and_cleanup_pdf,
105
- inputs=[pdf_file_input, pdf_title_input, pdf_author_input, pdf_keywords_input],
 
 
 
 
 
 
 
106
  outputs=pdf_result_output
107
  )
108
 
109
 
110
- def test_pdf_ingestion(pdf_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  if pdf_file is None:
112
  return "No file uploaded", ""
113
 
@@ -130,7 +299,37 @@ def test_pdf_ingestion(pdf_file):
130
  title = metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0])
131
  author = metadata.get('author', 'Unknown')
132
 
133
- result = f"PDF '{title}' by {author} processed successfully."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  return result, markdown_text
135
  except Exception as e:
136
  return f"Error ingesting PDF: {str(e)}", ""
@@ -140,12 +339,24 @@ def create_pdf_ingestion_test_tab():
140
  with gr.Row():
141
  with gr.Column():
142
  pdf_file_input = gr.File(label="Upload PDF for testing")
143
- test_button = gr.Button("Test PDF Ingestion")
 
 
144
  with gr.Column():
145
  test_output = gr.Textbox(label="Test Result")
146
  pdf_content_output = gr.Textbox(label="PDF Content", lines=200)
147
  test_button.click(
148
- fn=test_pdf_ingestion,
 
 
 
 
 
 
 
 
 
 
149
  inputs=[pdf_file_input],
150
  outputs=[test_output, pdf_content_output]
151
  )
 
8
  #
9
  # External Imports
10
  import gradio as gr
11
+ import pymupdf4llm
12
+ from docling.document_converter import DocumentConverter
13
+
14
  #
15
  # Local Imports
16
+ from App_Function_Libraries.DB.DB_Manager import list_prompts
17
  from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
18
  from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_metadata_from_pdf, extract_text_and_format_from_pdf, \
19
  process_and_cleanup_pdf
 
25
 
26
  def create_pdf_ingestion_tab():
27
  with gr.TabItem("PDF Ingestion", visible=True):
 
28
  gr.Markdown("# Ingest PDF Files and Extract Metadata")
29
  with gr.Row():
30
  with gr.Column():
31
+ # Changed to support multiple files
32
+ pdf_file_input = gr.File(
33
+ label="Uploaded PDF Files",
34
+ file_types=[".pdf"],
35
+ visible=True,
36
+ file_count="multiple"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  )
38
+ pdf_upload_button = gr.UploadButton(
39
+ "Click to Upload PDFs",
40
+ file_types=[".pdf"],
41
+ file_count="multiple"
42
  )
43
+ parser_selection = gr.Radio(
44
+ choices=["pymupdf", "pymupdf4llm", "docling"],
45
+ label="Select Parser",
46
+ value="pymupdf" # default value
 
 
 
 
 
 
 
 
47
  )
48
+ # Common metadata for all files
49
+ pdf_keywords_input = gr.Textbox(label="Keywords (Optional, comma-separated)")
50
+ # with gr.Row():
51
+ # custom_prompt_checkbox = gr.Checkbox(
52
+ # label="Use a Custom Prompt",
53
+ # value=False,
54
+ # visible=True
55
+ # )
56
+ # preset_prompt_checkbox = gr.Checkbox(
57
+ # label="Use a pre-set Prompt",
58
+ # value=False,
59
+ # visible=True
60
+ # )
61
+ # # Initialize state variables for pagination
62
+ # current_page_state = gr.State(value=1)
63
+ # total_pages_state = gr.State(value=1)
64
+ # with gr.Row():
65
+ # # Add pagination controls
66
+ # preset_prompt = gr.Dropdown(
67
+ # label="Select Preset Prompt",
68
+ # choices=[],
69
+ # visible=False
70
+ # )
71
+ # prev_page_button = gr.Button("Previous Page", visible=False)
72
+ # page_display = gr.Markdown("Page 1 of X", visible=False)
73
+ # next_page_button = gr.Button("Next Page", visible=False)
74
+ # with gr.Row():
75
+ # custom_prompt_input = gr.Textbox(
76
+ # label="Custom Prompt",
77
+ # placeholder="Enter custom prompt here",
78
+ # lines=3,
79
+ # visible=False
80
+ # )
81
+ # with gr.Row():
82
+ # system_prompt_input = gr.Textbox(
83
+ # label="System Prompt",
84
+ # value="""
85
+ # <s>You are a bulleted notes specialist.
86
+ # [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
87
+ # **Bulleted Note Creation Guidelines**
88
+ #
89
+ # **Headings**:
90
+ # - Based on referenced topics, not categories like quotes or terms
91
+ # - Surrounded by **bold** formatting
92
+ # - Not listed as bullet points
93
+ # - No space between headings and list items underneath
94
+ #
95
+ # **Emphasis**:
96
+ # - **Important terms** set in bold font
97
+ # - **Text ending in a colon**: also bolded
98
+ #
99
+ # **Review**:
100
+ # - Ensure adherence to specified format
101
+ # - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]""",
102
+ # lines=3,
103
+ # visible=False
104
+ # )
105
+ #
106
+ # custom_prompt_checkbox.change(
107
+ # fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
108
+ # inputs=[custom_prompt_checkbox],
109
+ # outputs=[custom_prompt_input, system_prompt_input]
110
+ # )
111
+ #
112
+ # def on_preset_prompt_checkbox_change(is_checked):
113
+ # if is_checked:
114
+ # prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
115
+ # page_display_text = f"Page {current_page} of {total_pages}"
116
+ # return (
117
+ # gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
118
+ # gr.update(visible=True), # prev_page_button
119
+ # gr.update(visible=True), # next_page_button
120
+ # gr.update(value=page_display_text, visible=True), # page_display
121
+ # current_page, # current_page_state
122
+ # total_pages # total_pages_state
123
+ # )
124
+ # else:
125
+ # return (
126
+ # gr.update(visible=False, interactive=False), # preset_prompt
127
+ # gr.update(visible=False), # prev_page_button
128
+ # gr.update(visible=False), # next_page_button
129
+ # gr.update(visible=False), # page_display
130
+ # 1, # current_page_state
131
+ # 1 # total_pages_state
132
+ # )
133
+ #
134
+ # preset_prompt_checkbox.change(
135
+ # fn=on_preset_prompt_checkbox_change,
136
+ # inputs=[preset_prompt_checkbox],
137
+ # outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
138
+ # )
139
+ #
140
+ # def on_prev_page_click(current_page, total_pages):
141
+ # new_page = max(current_page - 1, 1)
142
+ # prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
143
+ # page_display_text = f"Page {current_page} of {total_pages}"
144
+ # return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
145
+ #
146
+ # prev_page_button.click(
147
+ # fn=on_prev_page_click,
148
+ # inputs=[current_page_state, total_pages_state],
149
+ # outputs=[preset_prompt, page_display, current_page_state]
150
+ # )
151
+ #
152
+ # def on_next_page_click(current_page, total_pages):
153
+ # new_page = min(current_page + 1, total_pages)
154
+ # prompts, total_pages, current_page = list_prompts(page=new_page, per_page=10)
155
+ # page_display_text = f"Page {current_page} of {total_pages}"
156
+ # return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
157
+ #
158
+ # next_page_button.click(
159
+ # fn=on_next_page_click,
160
+ # inputs=[current_page_state, total_pages_state],
161
+ # outputs=[preset_prompt, page_display, current_page_state]
162
+ # )
163
+ #
164
+ # def update_prompts(preset_name):
165
+ # prompts = update_user_prompt(preset_name)
166
+ # return (
167
+ # gr.update(value=prompts["user_prompt"], visible=True),
168
+ # gr.update(value=prompts["system_prompt"], visible=True)
169
+ # )
170
+ #
171
+ # preset_prompt.change(
172
+ # update_prompts,
173
+ # inputs=preset_prompt,
174
+ # outputs=[custom_prompt_input, system_prompt_input]
175
+ # )
176
 
177
+ pdf_ingest_button = gr.Button("Ingest PDFs")
178
+
179
+ # Update the upload button handler for multiple files
180
+ pdf_upload_button.upload(
181
+ fn=lambda files: files,
182
+ inputs=pdf_upload_button,
183
+ outputs=pdf_file_input
184
+ )
185
 
 
186
  with gr.Column():
187
+ pdf_result_output = gr.DataFrame(
188
+ headers=["Filename", "Status", "Message"],
189
+ label="Processing Results"
190
+ )
191
+
192
+ # Define a new function to handle multiple PDFs
193
+ def process_multiple_pdfs(pdf_files, keywords, custom_prompt_checkbox_value, custom_prompt_text, system_prompt_text):
194
+ results = []
195
+ if pdf_files is None:
196
+ return [["No files", "Error", "No files uploaded"]]
197
+
198
+ for pdf_file in pdf_files:
199
+ try:
200
+ # Extract metadata from PDF
201
+ metadata = extract_metadata_from_pdf(pdf_file.name)
202
 
203
+ # Use custom or system prompt if checkbox is checked
204
+ if custom_prompt_checkbox_value:
205
+ prompt = custom_prompt_text
206
+ system_prompt = system_prompt_text
207
+ else:
208
+ prompt = None
209
+ system_prompt = None
210
+
211
+ # Process the PDF with prompts
212
+ result = process_and_cleanup_pdf(
213
+ pdf_file,
214
+ metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0]),
215
+ metadata.get('author', 'Unknown'),
216
+ keywords,
217
+ #prompt=prompt,
218
+ #system_prompt=system_prompt
219
+ )
220
+
221
+ results.append([
222
+ pdf_file.name,
223
+ "Success" if "successfully" in result else "Error",
224
+ result
225
+ ])
226
+ except Exception as e:
227
+ results.append([
228
+ pdf_file.name,
229
+ "Error",
230
+ str(e)
231
+ ])
232
+
233
+ return results
234
+
235
+ # Update the ingest button click handler
236
  pdf_ingest_button.click(
237
+ fn=process_multiple_pdfs,
238
+ inputs=[
239
+ pdf_file_input,
240
+ pdf_keywords_input,
241
+ parser_selection,
242
+ #custom_prompt_checkbox,
243
+ #custom_prompt_input,
244
+ #system_prompt_input
245
+ ],
246
  outputs=pdf_result_output
247
  )
248
 
249
 
250
+ def test_pymupdf4llm_pdf_ingestion(pdf_file):
251
+ if pdf_file is None:
252
+ return "No file uploaded", ""
253
+
254
+ try:
255
+ # Create a temporary directory
256
+ with tempfile.TemporaryDirectory() as temp_dir:
257
+ # Create a path for the temporary PDF file
258
+ temp_path = os.path.join(temp_dir, "temp.pdf")
259
+
260
+ # Copy the contents of the uploaded file to the temporary file
261
+ shutil.copy(pdf_file.name, temp_path)
262
+
263
+ # Extract text and convert to Markdown
264
+ markdown_text = pymupdf4llm.to_markdown(temp_path)
265
+
266
+ # Extract metadata from PDF
267
+ metadata = extract_metadata_from_pdf(temp_path)
268
+
269
+ # Use metadata for title and author if not provided
270
+ title = metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0])
271
+ author = metadata.get('author', 'Unknown')
272
+
273
+ result = f"PDF '{title}' by {author} processed successfully by pymupdf4llm."
274
+ return result, markdown_text
275
+ except Exception as e:
276
+ return f"Error ingesting PDF: {str(e)}", ""
277
+
278
+
279
+ def test_pymupdf_pdf_ingestion(pdf_file):
280
  if pdf_file is None:
281
  return "No file uploaded", ""
282
 
 
299
  title = metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0])
300
  author = metadata.get('author', 'Unknown')
301
 
302
+ result = f"PDF '{title}' by {author} processed successfully by pymupdf."
303
+ return result, markdown_text
304
+ except Exception as e:
305
+ return f"Error ingesting PDF: {str(e)}", ""
306
+
307
+
308
+ def test_docling_pdf_ingestion(pdf_file):
309
+ if pdf_file is None:
310
+ return "No file uploaded", ""
311
+
312
+ try:
313
+ # Create a temporary directory
314
+ with tempfile.TemporaryDirectory() as temp_dir:
315
+ # Create a path for the temporary PDF file
316
+ temp_path = os.path.join(temp_dir, "temp.pdf")
317
+
318
+ # Copy the contents of the uploaded file to the temporary file
319
+ shutil.copy(pdf_file.name, temp_path)
320
+
321
+ # Extract text and convert to Markdown
322
+ converter = DocumentConverter()
323
+ parsed_pdf = converter.convert(temp_path)
324
+ markdown_text = parsed_pdf.document.export_to_markdown()
325
+ # Extract metadata from PDF
326
+ metadata = extract_metadata_from_pdf(temp_path)
327
+
328
+ # Use metadata for title and author if not provided
329
+ title = metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0])
330
+ author = metadata.get('author', 'Unknown')
331
+
332
+ result = f"PDF '{title}' by {author} processed successfully by pymupdf."
333
  return result, markdown_text
334
  except Exception as e:
335
  return f"Error ingesting PDF: {str(e)}", ""
 
339
  with gr.Row():
340
  with gr.Column():
341
  pdf_file_input = gr.File(label="Upload PDF for testing")
342
+ test_button = gr.Button("Test pymupdf PDF Ingestion")
343
+ test_button_2 = gr.Button("Test pymupdf4llm PDF Ingestion")
344
+ test_button_3 = gr.Button("Test Docling PDF Ingestion")
345
  with gr.Column():
346
  test_output = gr.Textbox(label="Test Result")
347
  pdf_content_output = gr.Textbox(label="PDF Content", lines=200)
348
  test_button.click(
349
+ fn=test_pymupdf_pdf_ingestion,
350
+ inputs=[pdf_file_input],
351
+ outputs=[test_output, pdf_content_output]
352
+ )
353
+ test_button_2.click(
354
+ fn=test_pymupdf4llm_pdf_ingestion,
355
+ inputs=[pdf_file_input],
356
+ outputs=[test_output, pdf_content_output]
357
+ )
358
+ test_button_3.click(
359
+ fn=test_docling_pdf_ingestion,
360
  inputs=[pdf_file_input],
361
  outputs=[test_output, pdf_content_output]
362
  )
App_Function_Libraries/Gradio_UI/Plaintext_tab_import.py CHANGED
@@ -6,6 +6,7 @@
6
  #######################################################################################################################
7
  #
8
  # Import necessary libraries
 
9
  import os
10
  import tempfile
11
  import zipfile
@@ -16,101 +17,104 @@ from docx2txt import docx2txt
16
  from pypandoc import convert_file
17
  #
18
  # Import Local libraries
19
- from App_Function_Libraries.Gradio_UI.Import_Functionality import import_data
 
20
  #
21
  #######################################################################################################################
22
  #
23
  # Functions:
24
 
25
  def create_plain_text_import_tab():
 
 
 
 
 
 
 
 
 
 
 
26
  with gr.TabItem("Import Plain text & .docx Files", visible=True):
27
  with gr.Row():
28
  with gr.Column():
29
- gr.Markdown("# Import Markdown(`.md`)/Text(`.txt`)/rtf & `.docx` Files")
30
- gr.Markdown("Upload a single file or a zip file containing multiple files")
31
- import_file = gr.File(label="Upload file for import", file_types=[".md", ".txt", ".rtf", ".docx", ".zip"])
32
- title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content (for single files)")
33
- author_input = gr.Textbox(label="Author", placeholder="Enter the author's name (for single files)")
34
- keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords, comma-separated")
35
- system_prompt_input = gr.Textbox(label="System Prompt (for Summarization)", lines=3,
36
- value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
37
- **Bulleted Note Creation Guidelines**
38
-
39
- **Headings**:
40
- - Based on referenced topics, not categories like quotes or terms
41
- - Surrounded by **bold** formatting
42
- - Not listed as bullet points
43
- - No space between headings and list items underneath
44
 
45
- **Emphasis**:
46
- - **Important terms** set in bold font
47
- - **Text ending in a colon**: also bolded
 
 
 
48
 
49
- **Review**:
50
- - Ensure adherence to specified format
51
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]""",
52
- )
53
- custom_prompt_input = gr.Textbox(label="Custom User Prompt", placeholder="Enter a custom user prompt for summarization (optional)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
 
 
55
  api_name_input = gr.Dropdown(
56
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
57
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
58
- label="API for Auto-summarization"
59
  )
60
  api_key_input = gr.Textbox(label="API Key", type="password")
61
  import_button = gr.Button("Import File(s)")
62
- with gr.Column():
63
- import_output = gr.Textbox(label="Import Status")
64
-
65
 
66
- def import_plain_text_file(file_path, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
67
- try:
68
- # Determine the file type and convert if necessary
69
- file_extension = os.path.splitext(file_path)[1].lower()
70
- if file_extension == '.rtf':
71
- with tempfile.NamedTemporaryFile(suffix='.md', delete=False) as temp_file:
72
- convert_file(file_path, 'md', outputfile=temp_file.name)
73
- file_path = temp_file.name
74
- elif file_extension == '.docx':
75
- content = docx2txt.process(file_path)
76
- else:
77
- with open(file_path, 'r', encoding='utf-8') as file:
78
- content = file.read()
79
-
80
- # Process the content
81
- return import_data(content, title, author, keywords, system_prompt,
82
- user_prompt, auto_summarize, api_name, api_key)
83
- except Exception as e:
84
- return f"Error processing file: {str(e)}"
85
-
86
- def process_plain_text_zip_file(zip_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
87
- results = []
88
- with tempfile.TemporaryDirectory() as temp_dir:
89
- with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
90
- zip_ref.extractall(temp_dir)
91
-
92
- for filename in os.listdir(temp_dir):
93
- if filename.lower().endswith(('.md', '.txt', '.rtf', '.docx')):
94
- file_path = os.path.join(temp_dir, filename)
95
- result = import_plain_text_file(file_path, title, author, keywords, system_prompt,
96
- user_prompt, auto_summarize, api_name, api_key)
97
- results.append(f"File: {filename} - {result}")
98
-
99
- return "\n".join(results)
100
-
101
- def import_file_handler(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
102
- if file.name.lower().endswith(('.md', '.txt', '.rtf', '.docx')):
103
- return import_plain_text_file(file.name, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
104
- elif file.name.lower().endswith('.zip'):
105
- return process_plain_text_zip_file(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
106
- else:
107
- return "Unsupported file type. Please upload a .md, .txt, .rtf, .docx file or a .zip file containing these file types."
108
 
109
  import_button.click(
110
  fn=import_file_handler,
111
- inputs=[import_file, title_input, author_input, keywords_input, system_prompt_input,
112
- custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input],
 
 
 
 
 
 
 
 
113
  outputs=import_output
114
  )
115
 
116
- return import_file, title_input, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
 
 
 
 
 
6
  #######################################################################################################################
7
  #
8
  # Import necessary libraries
9
+ import logging
10
  import os
11
  import tempfile
12
  import zipfile
 
17
  from pypandoc import convert_file
18
  #
19
  # Import Local libraries
20
+ from App_Function_Libraries.Plaintext.Plaintext_Files import import_file_handler
21
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
22
  #
23
  #######################################################################################################################
24
  #
25
  # Functions:
26
 
27
  def create_plain_text_import_tab():
28
+ try:
29
+ default_value = None
30
+ if default_api_endpoint:
31
+ if default_api_endpoint in global_api_endpoints:
32
+ default_value = format_api_name(default_api_endpoint)
33
+ else:
34
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
35
+ except Exception as e:
36
+ logging.error(f"Error setting default API endpoint: {str(e)}")
37
+ default_value = None
38
+
39
  with gr.TabItem("Import Plain text & .docx Files", visible=True):
40
  with gr.Row():
41
  with gr.Column():
42
+ gr.Markdown("# Import `.md`/`.txt`/`.rtf`/`.docx` Files & `.zip` collections of them.")
43
+ gr.Markdown("Upload multiple files or a zip file containing multiple files")
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ # Updated to support multiple files
46
+ import_files = gr.File(
47
+ label="Upload files for import",
48
+ file_count="multiple",
49
+ file_types=[".md", ".txt", ".rtf", ".docx", ".zip"]
50
+ )
51
 
52
+ # Optional metadata override fields
53
+ author_input = gr.Textbox(
54
+ label="Author Override (optional)",
55
+ placeholder="Enter author name to apply to all files"
56
+ )
57
+ keywords_input = gr.Textbox(
58
+ label="Keywords",
59
+ placeholder="Enter keywords, comma-separated - will be applied to all files"
60
+ )
61
+ system_prompt_input = gr.Textbox(
62
+ label="System Prompt (for Summarization)",
63
+ lines=3,
64
+ value="""
65
+ <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
66
+ **Bulleted Note Creation Guidelines**
67
+
68
+ **Headings**:
69
+ - Based on referenced topics, not categories like quotes or terms
70
+ - Surrounded by **bold** formatting
71
+ - Not listed as bullet points
72
+ - No space between headings and list items underneath
73
+
74
+ **Emphasis**:
75
+ - **Important terms** set in bold font
76
+ - **Text ending in a colon**: also bolded
77
+
78
+ **Review**:
79
+ - Ensure adherence to specified format
80
+ - Do not reference these instructions in your response.</s>[INST]
81
+ """
82
+ )
83
+ custom_prompt_input = gr.Textbox(
84
+ label="Custom User Prompt",
85
+ placeholder="Enter a custom user prompt for summarization (optional)"
86
+ )
87
  auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
88
+
89
+ # API configuration
90
  api_name_input = gr.Dropdown(
91
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
92
+ value=default_value,
93
+ label="API for Summarization/Analysis (Optional)"
94
  )
95
  api_key_input = gr.Textbox(label="API Key", type="password")
96
  import_button = gr.Button("Import File(s)")
 
 
 
97
 
98
+ with gr.Column():
99
+ import_output = gr.Textbox(label="Import Status", lines=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  import_button.click(
102
  fn=import_file_handler,
103
+ inputs=[
104
+ import_files,
105
+ author_input,
106
+ keywords_input,
107
+ system_prompt_input,
108
+ custom_prompt_input,
109
+ auto_summarize_checkbox,
110
+ api_name_input,
111
+ api_key_input
112
+ ],
113
  outputs=import_output
114
  )
115
 
116
+ return import_files, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
117
+
118
+ #
119
+ # End of Plain_text_import.py
120
+ #######################################################################################################################
App_Function_Libraries/Gradio_UI/Podcast_tab.py CHANGED
@@ -2,23 +2,38 @@
2
  # Description: Gradio UI for ingesting podcasts into the database
3
  #
4
  # Imports
 
5
  #
6
  # External Imports
7
  import gradio as gr
8
  #
9
  # Local Imports
10
  from App_Function_Libraries.Audio.Audio_Files import process_podcast
11
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
12
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
 
13
  #
14
  ########################################################################################################################
15
  #
16
  # Functions:
17
 
18
-
19
  def create_podcast_tab():
 
 
 
 
 
 
 
 
 
 
20
  with gr.TabItem("Podcast", visible=True):
21
  gr.Markdown("# Podcast Transcription and Ingestion", visible=True)
 
 
 
 
22
  with gr.Row():
23
  with gr.Column():
24
  podcast_url_input = gr.Textbox(label="Podcast URL", placeholder="Enter the podcast URL here")
@@ -35,54 +50,130 @@ def create_podcast_tab():
35
  keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
36
 
37
  with gr.Row():
38
- podcast_custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
39
- value=False,
40
- visible=True)
41
- preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
42
- value=False,
43
- visible=True)
 
 
 
 
 
44
  with gr.Row():
45
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
46
- choices=load_preset_prompts(),
47
- visible=False)
 
 
 
48
  with gr.Row():
49
- podcast_custom_prompt_input = gr.Textbox(label="Custom Prompt",
50
- placeholder="Enter custom prompt here",
51
- lines=3,
52
- visible=False)
53
  with gr.Row():
54
- system_prompt_input = gr.Textbox(label="System Prompt",
55
- value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
56
- **Bulleted Note Creation Guidelines**
57
-
58
- **Headings**:
59
- - Based on referenced topics, not categories like quotes or terms
60
- - Surrounded by **bold** formatting
61
- - Not listed as bullet points
62
- - No space between headings and list items underneath
63
-
64
- **Emphasis**:
65
- - **Important terms** set in bold font
66
- - **Text ending in a colon**: also bolded
67
-
68
- **Review**:
69
- - Ensure adherence to specified format
70
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
71
- """,
72
- lines=3,
73
- visible=False)
 
 
 
 
 
 
 
 
 
74
 
 
75
  podcast_custom_prompt_checkbox.change(
76
  fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
77
  inputs=[podcast_custom_prompt_checkbox],
78
  outputs=[podcast_custom_prompt_input, system_prompt_input]
79
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  preset_prompt_checkbox.change(
81
- fn=lambda x: gr.update(visible=x),
82
  inputs=[preset_prompt_checkbox],
83
- outputs=[preset_prompt]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  )
85
 
 
86
  def update_prompts(preset_name):
87
  prompts = update_user_prompt(preset_name)
88
  return (
@@ -91,16 +182,16 @@ def create_podcast_tab():
91
  )
92
 
93
  preset_prompt.change(
94
- update_prompts,
95
- inputs=preset_prompt,
96
  outputs=[podcast_custom_prompt_input, system_prompt_input]
97
  )
98
 
 
99
  podcast_api_name_input = gr.Dropdown(
100
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp",
101
- "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace", "Custom-OpenAI-API"],
102
- value=None,
103
- label="API Name for Summarization (Optional)"
104
  )
105
  podcast_api_key_input = gr.Textbox(label="API Key (if required)", type="password")
106
  podcast_whisper_model_input = gr.Dropdown(choices=whisper_models, value="medium", label="Whisper Model")
@@ -151,13 +242,37 @@ def create_podcast_tab():
151
 
152
  podcast_process_button.click(
153
  fn=process_podcast,
154
- inputs=[podcast_url_input, podcast_title_input, podcast_author_input,
155
- podcast_keywords_input, podcast_custom_prompt_input, podcast_api_name_input,
156
- podcast_api_key_input, podcast_whisper_model_input, keep_original_input,
157
- enable_diarization_input, use_cookies_input, cookies_input,
158
- chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
159
- use_multi_level_chunking, chunk_language, keep_timestamps_input],
160
- outputs=[podcast_progress_output, podcast_transcription_output, podcast_summary_output,
161
- podcast_title_input, podcast_author_input, podcast_keywords_input, podcast_error_output,
162
- download_transcription, download_summary]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  )
 
2
  # Description: Gradio UI for ingesting podcasts into the database
3
  #
4
  # Imports
5
+ import logging
6
  #
7
  # External Imports
8
  import gradio as gr
9
  #
10
  # Local Imports
11
  from App_Function_Libraries.Audio.Audio_Files import process_podcast
12
+ from App_Function_Libraries.DB.DB_Manager import list_prompts
13
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
14
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
15
  #
16
  ########################################################################################################################
17
  #
18
  # Functions:
19
 
 
20
  def create_podcast_tab():
21
+ try:
22
+ default_value = None
23
+ if default_api_endpoint:
24
+ if default_api_endpoint in global_api_endpoints:
25
+ default_value = format_api_name(default_api_endpoint)
26
+ else:
27
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
28
+ except Exception as e:
29
+ logging.error(f"Error setting default API endpoint: {str(e)}")
30
+ default_value = None
31
  with gr.TabItem("Podcast", visible=True):
32
  gr.Markdown("# Podcast Transcription and Ingestion", visible=True)
33
+ # Initialize state variables for pagination
34
+ current_page_state = gr.State(value=1)
35
+ total_pages_state = gr.State(value=1)
36
+
37
  with gr.Row():
38
  with gr.Column():
39
  podcast_url_input = gr.Textbox(label="Podcast URL", placeholder="Enter the podcast URL here")
 
50
  keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
51
 
52
  with gr.Row():
53
+ podcast_custom_prompt_checkbox = gr.Checkbox(
54
+ label="Use a Custom Prompt",
55
+ value=False,
56
+ visible=True
57
+ )
58
+ preset_prompt_checkbox = gr.Checkbox(
59
+ label="Use a pre-set Prompt",
60
+ value=False,
61
+ visible=True
62
+ )
63
+
64
  with gr.Row():
65
+ # Add pagination controls
66
+ preset_prompt = gr.Dropdown(
67
+ label="Select Preset Prompt",
68
+ choices=[],
69
+ visible=False
70
+ )
71
  with gr.Row():
72
+ prev_page_button = gr.Button("Previous Page", visible=False)
73
+ page_display = gr.Markdown("Page 1 of X", visible=False)
74
+ next_page_button = gr.Button("Next Page", visible=False)
75
+
76
  with gr.Row():
77
+ podcast_custom_prompt_input = gr.Textbox(
78
+ label="Custom Prompt",
79
+ placeholder="Enter custom prompt here",
80
+ lines=10,
81
+ visible=False
82
+ )
83
+ with gr.Row():
84
+ system_prompt_input = gr.Textbox(
85
+ label="System Prompt",
86
+ value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhere to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
87
+ **Bulleted Note Creation Guidelines**
88
+
89
+ **Headings**:
90
+ - Based on referenced topics, not categories like quotes or terms
91
+ - Surrounded by **bold** formatting
92
+ - Not listed as bullet points
93
+ - No space between headings and list items underneath
94
+
95
+ **Emphasis**:
96
+ - **Important terms** set in bold font
97
+ - **Text ending in a colon**: also bolded
98
+
99
+ **Review**:
100
+ - Ensure adherence to specified format
101
+ - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
102
+ """,
103
+ lines=10,
104
+ visible=False
105
+ )
106
 
107
+ # Handle custom prompt checkbox change
108
  podcast_custom_prompt_checkbox.change(
109
  fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
110
  inputs=[podcast_custom_prompt_checkbox],
111
  outputs=[podcast_custom_prompt_input, system_prompt_input]
112
  )
113
+
114
+ # Handle preset prompt checkbox change
115
+ def on_preset_prompt_checkbox_change(is_checked):
116
+ if is_checked:
117
+ prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
118
+ page_display_text = f"Page {current_page} of {total_pages}"
119
+ return (
120
+ gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
121
+ gr.update(visible=True), # prev_page_button
122
+ gr.update(visible=True), # next_page_button
123
+ gr.update(value=page_display_text, visible=True), # page_display
124
+ current_page, # current_page_state
125
+ total_pages # total_pages_state
126
+ )
127
+ else:
128
+ return (
129
+ gr.update(visible=False, interactive=False), # preset_prompt
130
+ gr.update(visible=False), # prev_page_button
131
+ gr.update(visible=False), # next_page_button
132
+ gr.update(visible=False), # page_display
133
+ 1, # current_page_state
134
+ 1 # total_pages_state
135
+ )
136
+
137
  preset_prompt_checkbox.change(
138
+ fn=on_preset_prompt_checkbox_change,
139
  inputs=[preset_prompt_checkbox],
140
+ outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
141
+ )
142
+
143
+ # Pagination button functions
144
+ def on_prev_page_click(current_page, total_pages):
145
+ new_page = max(current_page - 1, 1)
146
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
147
+ page_display_text = f"Page {current_page} of {total_pages}"
148
+ return (
149
+ gr.update(choices=prompts),
150
+ gr.update(value=page_display_text),
151
+ current_page
152
+ )
153
+
154
+ prev_page_button.click(
155
+ fn=on_prev_page_click,
156
+ inputs=[current_page_state, total_pages_state],
157
+ outputs=[preset_prompt, page_display, current_page_state]
158
+ )
159
+
160
+ def on_next_page_click(current_page, total_pages):
161
+ new_page = min(current_page + 1, total_pages)
162
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
163
+ page_display_text = f"Page {current_page} of {total_pages}"
164
+ return (
165
+ gr.update(choices=prompts),
166
+ gr.update(value=page_display_text),
167
+ current_page
168
+ )
169
+
170
+ next_page_button.click(
171
+ fn=on_next_page_click,
172
+ inputs=[current_page_state, total_pages_state],
173
+ outputs=[preset_prompt, page_display, current_page_state]
174
  )
175
 
176
+ # Update prompts when a preset is selected
177
  def update_prompts(preset_name):
178
  prompts = update_user_prompt(preset_name)
179
  return (
 
182
  )
183
 
184
  preset_prompt.change(
185
+ fn=update_prompts,
186
+ inputs=[preset_prompt],
187
  outputs=[podcast_custom_prompt_input, system_prompt_input]
188
  )
189
 
190
+ # Refactored API selection dropdown
191
  podcast_api_name_input = gr.Dropdown(
192
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
193
+ value=default_value,
194
+ label="API for Summarization/Analysis (Optional)"
 
195
  )
196
  podcast_api_key_input = gr.Textbox(label="API Key (if required)", type="password")
197
  podcast_whisper_model_input = gr.Dropdown(choices=whisper_models, value="medium", label="Whisper Model")
 
242
 
243
  podcast_process_button.click(
244
  fn=process_podcast,
245
+ inputs=[
246
+ podcast_url_input,
247
+ podcast_title_input,
248
+ podcast_author_input,
249
+ podcast_keywords_input,
250
+ podcast_custom_prompt_input,
251
+ podcast_api_name_input,
252
+ podcast_api_key_input,
253
+ podcast_whisper_model_input,
254
+ keep_original_input,
255
+ enable_diarization_input,
256
+ use_cookies_input,
257
+ cookies_input,
258
+ chunk_method,
259
+ max_chunk_size,
260
+ chunk_overlap,
261
+ use_adaptive_chunking,
262
+ use_multi_level_chunking,
263
+ chunk_language,
264
+ keep_timestamps_input,
265
+ system_prompt_input # Include system prompt input
266
+ ],
267
+ outputs=[
268
+ podcast_progress_output,
269
+ podcast_transcription_output,
270
+ podcast_summary_output,
271
+ podcast_title_input,
272
+ podcast_author_input,
273
+ podcast_keywords_input,
274
+ podcast_error_output,
275
+ download_transcription,
276
+ download_summary
277
+ ]
278
  )
App_Function_Libraries/Gradio_UI/Prompt_Suggestion_tab.py CHANGED
@@ -1,11 +1,14 @@
1
  # Description: Gradio UI for Creating and Testing new Prompts
2
  #
3
  # Imports
 
 
4
  import gradio as gr
5
 
6
- from App_Function_Libraries.Chat import chat
7
- from App_Function_Libraries.DB.SQLite_DB import add_or_update_prompt
8
  from App_Function_Libraries.Prompt_Engineering.Prompt_Engineering import generate_prompt, test_generated_prompt
 
9
 
10
 
11
  #
@@ -18,6 +21,16 @@ from App_Function_Libraries.Prompt_Engineering.Prompt_Engineering import generat
18
 
19
  # Gradio tab for prompt suggestion and testing
20
  def create_prompt_suggestion_tab():
 
 
 
 
 
 
 
 
 
 
21
  with gr.TabItem("Prompt Suggestion/Creation", visible=True):
22
  gr.Markdown("# Generate and Test AI Prompts with the Metaprompt Approach")
23
 
@@ -30,11 +43,11 @@ def create_prompt_suggestion_tab():
30
  placeholder="E.g., CUSTOMER_COMPLAINT, COMPANY_NAME")
31
 
32
  # API-related inputs
 
33
  api_name_input = gr.Dropdown(
34
- choices=["OpenAI", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp",
35
- "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace", "Custom-OpenAI-API"],
36
- label="API Provider",
37
- value="OpenAI" # Default selection
38
  )
39
 
40
  api_key_input = gr.Textbox(label="API Key", placeholder="Enter your API key (if required)",
 
1
  # Description: Gradio UI for Creating and Testing new Prompts
2
  #
3
  # Imports
4
+ import logging
5
+
6
  import gradio as gr
7
 
8
+ from App_Function_Libraries.Chat.Chat_Functions import chat
9
+ from App_Function_Libraries.DB.DB_Manager import add_or_update_prompt
10
  from App_Function_Libraries.Prompt_Engineering.Prompt_Engineering import generate_prompt, test_generated_prompt
11
+ from App_Function_Libraries.Utils.Utils import format_api_name, global_api_endpoints, default_api_endpoint
12
 
13
 
14
  #
 
21
 
22
  # Gradio tab for prompt suggestion and testing
23
  def create_prompt_suggestion_tab():
24
+ try:
25
+ default_value = None
26
+ if default_api_endpoint:
27
+ if default_api_endpoint in global_api_endpoints:
28
+ default_value = format_api_name(default_api_endpoint)
29
+ else:
30
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
31
+ except Exception as e:
32
+ logging.error(f"Error setting default API endpoint: {str(e)}")
33
+ default_value = None
34
  with gr.TabItem("Prompt Suggestion/Creation", visible=True):
35
  gr.Markdown("# Generate and Test AI Prompts with the Metaprompt Approach")
36
 
 
43
  placeholder="E.g., CUSTOMER_COMPLAINT, COMPANY_NAME")
44
 
45
  # API-related inputs
46
+ # Refactored API selection dropdown
47
  api_name_input = gr.Dropdown(
48
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
49
+ value=default_value,
50
+ label="API for Analysis (Optional)"
 
51
  )
52
 
53
  api_key_input = gr.Textbox(label="API Key", placeholder="Enter your API key (if required)",
App_Function_Libraries/Gradio_UI/Prompts_tab.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Prompts_tab.py
2
+ # Description: This file contains the code for the prompts tab in the Gradio UI
3
+ #
4
+ # Imports
5
+ import html
6
+ import logging
7
+
8
+ #
9
+ # External Imports
10
+ import gradio as gr
11
+ #
12
+ # Local Imports
13
+ from App_Function_Libraries.DB.DB_Manager import fetch_prompt_details, list_prompts
14
+ #
15
+ ####################################################################################################
16
+ #
17
+ # Functions:
18
+
19
+ def create_prompt_view_tab():
20
+ with gr.TabItem("View Prompt Database", visible=True):
21
+ gr.Markdown("# View Prompt Database Entries")
22
+ with gr.Row():
23
+ with gr.Column():
24
+ entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
25
+ page_number = gr.Number(value=1, label="Page Number", precision=0)
26
+ view_button = gr.Button("View Page")
27
+ previous_page_button = gr.Button("Previous Page", visible=True)
28
+ next_page_button = gr.Button("Next Page", visible=True)
29
+ pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
30
+ prompt_selector = gr.Dropdown(label="Select Prompt to View", choices=[])
31
+ with gr.Column():
32
+ results_table = gr.HTML()
33
+ selected_prompt_display = gr.HTML()
34
+
35
+ # Function to view database entries
36
+ def view_database(page, entries_per_page):
37
+ try:
38
+ # Use list_prompts to get prompts and total pages
39
+ prompts, total_pages, current_page = list_prompts(page=int(page), per_page=int(entries_per_page))
40
+
41
+ table_html = "<table style='width:100%; border-collapse: collapse;'>"
42
+ table_html += "<tr><th style='border: 1px solid black; padding: 8px;'>Title</th><th style='border: 1px solid black; padding: 8px;'>Author</th></tr>"
43
+ prompt_choices = []
44
+ for prompt_name in prompts:
45
+ details = fetch_prompt_details(prompt_name)
46
+ if details:
47
+ title, author, _, _, _, _ = details
48
+ author = author or "Unknown" # Handle None author
49
+ table_html += f"<tr><td style='border: 1px solid black; padding: 8px;'>{html.escape(title)}</td><td style='border: 1px solid black; padding: 8px;'>{html.escape(author)}</td></tr>"
50
+ prompt_choices.append(prompt_name) # Using prompt_name as value
51
+ table_html += "</table>"
52
+
53
+ # Get total prompts if possible
54
+ total_prompts = total_pages * int(entries_per_page) # This might overestimate if the last page is not full
55
+
56
+ pagination = f"Page {current_page} of {total_pages} (Total prompts: {total_prompts})"
57
+
58
+ return table_html, pagination, total_pages, prompt_choices
59
+ except Exception as e:
60
+ return f"<p>Error fetching prompts: {e}</p>", "Error", 0, []
61
+
62
+ # Function to update page content
63
+ def update_page(page, entries_per_page):
64
+ results, pagination, total_pages, prompt_choices = view_database(page, entries_per_page)
65
+ page = int(page)
66
+ next_disabled = page >= total_pages
67
+ prev_disabled = page <= 1
68
+ return (
69
+ results,
70
+ pagination,
71
+ page,
72
+ gr.update(visible=True, interactive=not prev_disabled), # previous_page_button
73
+ gr.update(visible=True, interactive=not next_disabled), # next_page_button
74
+ gr.update(choices=prompt_choices)
75
+ )
76
+
77
+ # Function to go to the next page
78
+ def go_to_next_page(current_page, entries_per_page):
79
+ next_page = int(current_page) + 1
80
+ return update_page(next_page, entries_per_page)
81
+
82
+ # Function to go to the previous page
83
+ def go_to_previous_page(current_page, entries_per_page):
84
+ previous_page = max(1, int(current_page) - 1)
85
+ return update_page(previous_page, entries_per_page)
86
+
87
+ # Function to display selected prompt details
88
+ def display_selected_prompt(prompt_name):
89
+ details = fetch_prompt_details(prompt_name)
90
+ if details:
91
+ title, author, description, system_prompt, user_prompt, keywords = details
92
+ # Handle None values by converting them to empty strings
93
+ description = description or ""
94
+ system_prompt = system_prompt or ""
95
+ user_prompt = user_prompt or ""
96
+ author = author or "Unknown"
97
+ keywords = keywords or ""
98
+
99
+ html_content = f"""
100
+ <div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 20px;">
101
+ <h3>{html.escape(title)}</h3> <h4>by {html.escape(author)}</h4>
102
+ <p><strong>Description:</strong> {html.escape(description)}</p>
103
+ <div style="margin-top: 10px;">
104
+ <strong>System Prompt:</strong>
105
+ <pre style="white-space: pre-wrap; word-wrap: break-word;">{html.escape(system_prompt)}</pre>
106
+ </div>
107
+ <div style="margin-top: 10px;">
108
+ <strong>User Prompt:</strong>
109
+ <pre style="white-space: pre-wrap; word-wrap: break-word;">{html.escape(user_prompt)}</pre>
110
+ </div>
111
+ <p><strong>Keywords:</strong> {html.escape(keywords)}</p>
112
+ </div>
113
+ """
114
+ return html_content
115
+ else:
116
+ return "<p>Prompt not found.</p>"
117
+
118
+ # Event handlers
119
+ view_button.click(
120
+ fn=update_page,
121
+ inputs=[page_number, entries_per_page],
122
+ outputs=[results_table, pagination_info, page_number, previous_page_button, next_page_button, prompt_selector]
123
+ )
124
+
125
+ next_page_button.click(
126
+ fn=go_to_next_page,
127
+ inputs=[page_number, entries_per_page],
128
+ outputs=[results_table, pagination_info, page_number, previous_page_button, next_page_button, prompt_selector]
129
+ )
130
+
131
+ previous_page_button.click(
132
+ fn=go_to_previous_page,
133
+ inputs=[page_number, entries_per_page],
134
+ outputs=[results_table, pagination_info, page_number, previous_page_button, next_page_button, prompt_selector]
135
+ )
136
+
137
+ prompt_selector.change(
138
+ fn=display_selected_prompt,
139
+ inputs=[prompt_selector],
140
+ outputs=[selected_prompt_display]
141
+ )
142
+
143
+
144
+
145
+ def create_prompts_export_tab():
146
+ """Creates a tab for exporting prompts database content with multiple format options"""
147
+ with gr.TabItem("Export Prompts", visible=True):
148
+ gr.Markdown("# Export Prompts Database Content")
149
+
150
+ with gr.Row():
151
+ with gr.Column():
152
+ export_type = gr.Radio(
153
+ choices=["All Prompts", "Prompts by Keyword"],
154
+ label="Export Type",
155
+ value="All Prompts"
156
+ )
157
+
158
+ # Keyword selection for filtered export
159
+ with gr.Column(visible=False) as keyword_col:
160
+ keyword_input = gr.Textbox(
161
+ label="Enter Keywords (comma-separated)",
162
+ placeholder="Enter keywords to filter prompts..."
163
+ )
164
+
165
+ # Export format selection
166
+ export_format = gr.Radio(
167
+ choices=["CSV", "Markdown (ZIP)"],
168
+ label="Export Format",
169
+ value="CSV"
170
+ )
171
+
172
+ # Export options
173
+ include_options = gr.CheckboxGroup(
174
+ choices=[
175
+ "Include System Prompts",
176
+ "Include User Prompts",
177
+ "Include Details",
178
+ "Include Author",
179
+ "Include Keywords"
180
+ ],
181
+ label="Export Options",
182
+ value=["Include Keywords", "Include Author"]
183
+ )
184
+
185
+ # Markdown-specific options (only visible when Markdown is selected)
186
+ with gr.Column(visible=False) as markdown_options_col:
187
+ markdown_template = gr.Radio(
188
+ choices=[
189
+ "Basic Template",
190
+ "Detailed Template",
191
+ "Custom Template"
192
+ ],
193
+ label="Markdown Template",
194
+ value="Basic Template"
195
+ )
196
+ custom_template = gr.Textbox(
197
+ label="Custom Template",
198
+ placeholder="Use {title}, {author}, {details}, {system}, {user}, {keywords} as placeholders",
199
+ visible=False
200
+ )
201
+
202
+ export_button = gr.Button("Export Prompts")
203
+
204
+ with gr.Column():
205
+ export_status = gr.Textbox(label="Export Status", interactive=False)
206
+ export_file = gr.File(label="Download Export")
207
+
208
+ def update_ui_visibility(export_type, format_choice, template_choice):
209
+ """Update UI elements visibility based on selections"""
210
+ show_keywords = export_type == "Prompts by Keyword"
211
+ show_markdown_options = format_choice == "Markdown (ZIP)"
212
+ show_custom_template = template_choice == "Custom Template" and show_markdown_options
213
+
214
+ return [
215
+ gr.update(visible=show_keywords), # keyword_col
216
+ gr.update(visible=show_markdown_options), # markdown_options_col
217
+ gr.update(visible=show_custom_template) # custom_template
218
+ ]
219
+
220
+ def handle_export(export_type, keywords, export_format, options, markdown_template, custom_template):
221
+ """Handle the export process based on selected options"""
222
+ try:
223
+ # Parse options
224
+ include_system = "Include System Prompts" in options
225
+ include_user = "Include User Prompts" in options
226
+ include_details = "Include Details" in options
227
+ include_author = "Include Author" in options
228
+ include_keywords = "Include Keywords" in options
229
+
230
+ # Handle keyword filtering
231
+ keyword_list = None
232
+ if export_type == "Prompts by Keyword" and keywords:
233
+ keyword_list = [k.strip() for k in keywords.split(",") if k.strip()]
234
+
235
+ # Get the appropriate template
236
+ template = None
237
+ if export_format == "Markdown (ZIP)":
238
+ if markdown_template == "Custom Template":
239
+ template = custom_template
240
+ else:
241
+ template = markdown_template
242
+
243
+ # Perform export
244
+ from App_Function_Libraries.DB.Prompts_DB import export_prompts
245
+ status, file_path = export_prompts(
246
+ export_format=export_format.split()[0].lower(), # 'csv' or 'markdown'
247
+ filter_keywords=keyword_list,
248
+ include_system=include_system,
249
+ include_user=include_user,
250
+ include_details=include_details,
251
+ include_author=include_author,
252
+ include_keywords=include_keywords,
253
+ markdown_template=template
254
+ )
255
+
256
+ return status, file_path
257
+
258
+ except Exception as e:
259
+ error_msg = f"Export failed: {str(e)}"
260
+ logging.error(error_msg)
261
+ return error_msg, None
262
+
263
+ # Event handlers
264
+ export_type.change(
265
+ fn=lambda t, f, m: update_ui_visibility(t, f, m),
266
+ inputs=[export_type, export_format, markdown_template],
267
+ outputs=[keyword_col, markdown_options_col, custom_template]
268
+ )
269
+
270
+ export_format.change(
271
+ fn=lambda t, f, m: update_ui_visibility(t, f, m),
272
+ inputs=[export_type, export_format, markdown_template],
273
+ outputs=[keyword_col, markdown_options_col, custom_template]
274
+ )
275
+
276
+ markdown_template.change(
277
+ fn=lambda t, f, m: update_ui_visibility(t, f, m),
278
+ inputs=[export_type, export_format, markdown_template],
279
+ outputs=[keyword_col, markdown_options_col, custom_template]
280
+ )
281
+
282
+ export_button.click(
283
+ fn=handle_export,
284
+ inputs=[
285
+ export_type,
286
+ keyword_input,
287
+ export_format,
288
+ include_options,
289
+ markdown_template,
290
+ custom_template
291
+ ],
292
+ outputs=[export_status, export_file]
293
+ )
294
+
295
+ #
296
+ # End of Prompts_tab.py
297
+ ####################################################################################################
App_Function_Libraries/Gradio_UI/RAG_Chat_tab.py CHANGED
@@ -10,12 +10,26 @@ import gradio as gr
10
  # Local Imports
11
 
12
  from App_Function_Libraries.RAG.RAG_Library_2 import enhanced_rag_pipeline
 
 
 
13
  #
14
  ########################################################################################################################
15
  #
16
  # Functions:
17
 
18
  def create_rag_tab():
 
 
 
 
 
 
 
 
 
 
 
19
  with gr.TabItem("RAG Search", visible=True):
20
  gr.Markdown("# Retrieval-Augmented Generation (RAG) Search")
21
 
@@ -36,10 +50,11 @@ def create_rag_tab():
36
  visible=False
37
  )
38
 
 
39
  api_choice = gr.Dropdown(
40
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
41
- label="Select API for RAG",
42
- value="OpenAI"
43
  )
44
  search_button = gr.Button("Search")
45
 
 
10
  # Local Imports
11
 
12
  from App_Function_Libraries.RAG.RAG_Library_2 import enhanced_rag_pipeline
13
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
14
+
15
+
16
  #
17
  ########################################################################################################################
18
  #
19
  # Functions:
20
 
21
  def create_rag_tab():
22
+ try:
23
+ default_value = None
24
+ if default_api_endpoint:
25
+ if default_api_endpoint in global_api_endpoints:
26
+ default_value = format_api_name(default_api_endpoint)
27
+ else:
28
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
29
+ except Exception as e:
30
+ logging.error(f"Error setting default API endpoint: {str(e)}")
31
+ default_value = None
32
+
33
  with gr.TabItem("RAG Search", visible=True):
34
  gr.Markdown("# Retrieval-Augmented Generation (RAG) Search")
35
 
 
50
  visible=False
51
  )
52
 
53
+ # Refactored API selection dropdown
54
  api_choice = gr.Dropdown(
55
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
56
+ value=default_value,
57
+ label="API for Chat Response (Optional)"
58
  )
59
  search_button = gr.Button("Search")
60
 
App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py CHANGED
@@ -6,6 +6,7 @@ import csv
6
  import logging
7
  import json
8
  import os
 
9
  from datetime import datetime
10
  #
11
  # External Imports
@@ -14,32 +15,39 @@ import gradio as gr
14
  #
15
  # Local Imports
16
  from App_Function_Libraries.Books.Book_Ingestion_Lib import read_epub
17
- from App_Function_Libraries.DB.DB_Manager import DatabaseError, get_paginated_files, add_media_with_keywords
18
- from App_Function_Libraries.DB.RAG_QA_Chat_DB import (
19
- save_notes,
20
- add_keywords_to_note,
21
- start_new_conversation,
22
- save_message,
23
- search_conversations_by_keywords,
24
- load_chat_history,
25
- get_all_conversations,
26
- get_note_by_id,
27
- get_notes_by_keywords,
28
- get_notes_by_keyword_collection,
29
- update_note,
30
- clear_keywords_from_note, get_notes, get_keywords_for_note, delete_conversation, delete_note, execute_query,
31
- add_keywords_to_conversation, fetch_all_notes, fetch_all_conversations, fetch_conversations_by_ids,
32
- fetch_notes_by_ids,
33
- )
34
  from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_text_and_format_from_pdf
35
  from App_Function_Libraries.RAG.RAG_Library_2 import generate_answer, enhanced_rag_pipeline
36
  from App_Function_Libraries.RAG.RAG_QA_Chat import search_database, rag_qa_chat
 
 
 
 
37
  #
38
  ########################################################################################################################
39
  #
40
  # Functions:
41
 
42
  def create_rag_qa_chat_tab():
 
 
 
 
 
 
 
 
 
 
43
  with gr.TabItem("RAG QA Chat", visible=True):
44
  gr.Markdown("# RAG QA Chat")
45
 
@@ -47,18 +55,53 @@ def create_rag_qa_chat_tab():
47
  "page": 1,
48
  "context_source": "Entire Media Database",
49
  "conversation_messages": [],
 
50
  })
51
 
52
  note_state = gr.State({"note_id": None})
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  # Update the conversation list function
55
  def update_conversation_list():
56
  conversations, total_pages, total_count = get_all_conversations()
57
- choices = [f"{title} (ID: {conversation_id})" for conversation_id, title in conversations]
 
 
 
58
  return choices
59
 
60
  with gr.Row():
61
  with gr.Column(scale=1):
 
 
 
 
 
 
 
 
62
  context_source = gr.Radio(
63
  ["All Files in the Database", "Search Database", "Upload File"],
64
  label="Context Source",
@@ -71,19 +114,52 @@ def create_rag_qa_chat_tab():
71
  next_page_btn = gr.Button("Next Page")
72
  page_info = gr.HTML("Page 1")
73
  top_k_input = gr.Number(value=10, label="Maximum amount of results to use (Default: 10)", minimum=1, maximum=50, step=1, precision=0, interactive=True)
74
- keywords_input = gr.Textbox(label="Keywords (comma-separated) to filter results by)", visible=True)
75
  use_query_rewriting = gr.Checkbox(label="Use Query Rewriting", value=True)
76
  use_re_ranking = gr.Checkbox(label="Use Re-ranking", value=True)
77
- # with gr.Row():
78
- # page_number = gr.Number(value=1, label="Page", precision=0)
79
- # page_size = gr.Number(value=20, label="Items per page", precision=0)
80
- # total_pages = gr.Number(label="Total Pages", interactive=False)
 
 
 
 
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  search_query = gr.Textbox(label="Search Query", visible=False)
84
  search_button = gr.Button("Search", visible=False)
85
  search_results = gr.Dropdown(label="Search Results", choices=[], visible=False)
86
- # FIXME - Add pages for search results handling
87
  file_upload = gr.File(
88
  label="Upload File",
89
  visible=False,
@@ -95,34 +171,28 @@ def create_rag_qa_chat_tab():
95
  load_conversation = gr.Dropdown(
96
  label="Load Conversation",
97
  choices=update_conversation_list()
98
- )
99
  new_conversation = gr.Button("New Conversation")
100
  save_conversation_button = gr.Button("Save Conversation")
101
  conversation_title = gr.Textbox(
102
- label="Conversation Title", placeholder="Enter a title for the new conversation"
 
103
  )
104
  keywords = gr.Textbox(label="Keywords (comma-separated)", visible=True)
105
 
 
 
 
 
 
 
 
 
 
106
  api_choice = gr.Dropdown(
107
- choices=[
108
- "Local-LLM",
109
- "OpenAI",
110
- "Anthropic",
111
- "Cohere",
112
- "Groq",
113
- "DeepSeek",
114
- "Mistral",
115
- "OpenRouter",
116
- "Llama.cpp",
117
- "Kobold",
118
- "Ooba",
119
- "Tabbyapi",
120
- "VLLM",
121
- "ollama",
122
- "HuggingFace",
123
- ],
124
- label="Select API for RAG",
125
- value="OpenAI",
126
  )
127
 
128
  with gr.Row():
@@ -145,6 +215,8 @@ def create_rag_qa_chat_tab():
145
  clear_notes_btn = gr.Button("Clear Current Note text")
146
 
147
  new_note_btn = gr.Button("New Note")
 
 
148
  search_notes_by_keyword = gr.Textbox(label="Search Notes by Keyword")
149
  search_notes_button = gr.Button("Search Notes")
150
  note_results = gr.Dropdown(label="Notes", choices=[])
@@ -152,8 +224,58 @@ def create_rag_qa_chat_tab():
152
 
153
  loading_indicator = gr.HTML("Loading...", visible=False)
154
  status_message = gr.HTML()
 
 
 
155
 
156
  # Function Definitions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  def update_state(state, **kwargs):
159
  new_state = state.copy()
@@ -168,18 +290,28 @@ def create_rag_qa_chat_tab():
168
  outputs=[note_title, notes, note_state]
169
  )
170
 
171
- def search_notes(keywords):
172
  if keywords:
173
  keywords_list = [kw.strip() for kw in keywords.split(',')]
174
  notes_data, total_pages, total_count = get_notes_by_keywords(keywords_list)
175
- choices = [f"Note {note_id} ({timestamp})" for note_id, title, content, timestamp in notes_data]
176
- return gr.update(choices=choices)
 
 
 
 
 
 
177
  else:
178
- return gr.update(choices=[])
 
 
 
 
179
 
180
  search_notes_button.click(
181
  search_notes,
182
- inputs=[search_notes_by_keyword],
183
  outputs=[note_results]
184
  )
185
 
@@ -201,31 +333,69 @@ def create_rag_qa_chat_tab():
201
 
202
  def save_notes_function(note_title_text, notes_content, keywords_content, note_state_value, state_value):
203
  """Save the notes and associated keywords to the database."""
204
- conversation_id = state_value.get("conversation_id")
205
- note_id = note_state_value["note_id"]
206
- if conversation_id and notes_content:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  if note_id:
208
- # Update existing note
209
  update_note(note_id, note_title_text, notes_content)
210
  else:
211
- # Save new note
212
- note_id = save_notes(conversation_id, note_title_text, notes_content)
213
- note_state_value["note_id"] = note_id
 
 
 
214
  if keywords_content:
215
- # Clear existing keywords and add new ones
216
  clear_keywords_from_note(note_id)
217
- add_keywords_to_note(note_id, [kw.strip() for kw in keywords_content.split(',')])
 
 
218
 
219
- logging.info("Notes and keywords saved successfully!")
220
- return notes_content, note_state_value
221
- else:
222
- logging.warning("No conversation ID or notes to save.")
223
- return "", note_state_value
 
 
 
 
 
 
 
 
 
 
 
224
 
225
  save_notes_btn.click(
226
  save_notes_function,
227
  inputs=[note_title, notes, keywords_for_notes, note_state, state],
228
- outputs=[notes, note_state]
229
  )
230
 
231
  def clear_notes_function():
@@ -237,83 +407,112 @@ def create_rag_qa_chat_tab():
237
  outputs=[notes, note_state]
238
  )
239
 
240
- def update_conversation_list():
241
- conversations, total_pages, total_count = get_all_conversations()
242
- choices = [f"{title} (ID: {conversation_id})" for conversation_id, title in conversations]
243
- return choices
244
-
245
  # Initialize the conversation list
246
  load_conversation.choices = update_conversation_list()
247
 
248
  def load_conversation_history(selected_conversation, state_value):
249
- if selected_conversation:
250
- conversation_id = selected_conversation.split('(ID: ')[1][:-1]
 
 
 
 
 
 
 
251
  chat_data, total_pages_val, _ = load_chat_history(conversation_id, 1, 50)
252
- # Convert chat data to list of tuples (user_message, assistant_response)
 
 
 
 
253
  history = []
254
  for role, content in chat_data:
255
  if role == 'user':
256
  history.append((content, ''))
257
- else:
258
- if history:
259
- history[-1] = (history[-1][0], content)
260
- else:
261
- history.append(('', content))
262
- # Retrieve notes
 
 
 
 
 
263
  notes_content = get_notes(conversation_id)
264
- updated_state = update_state(state_value, conversation_id=conversation_id, page=1,
265
- conversation_messages=[])
266
- return history, updated_state, "\n".join(notes_content)
267
- return [], state_value, ""
 
268
 
269
  load_conversation.change(
270
  load_conversation_history,
271
  inputs=[load_conversation, state],
272
- outputs=[chatbot, state, notes]
273
  )
274
 
275
  # Modify save_conversation_function to use gr.update()
276
- def save_conversation_function(conversation_title_text, keywords_text, state_value):
277
  conversation_messages = state_value.get("conversation_messages", [])
 
278
  if not conversation_messages:
279
  return gr.update(
280
  value="<p style='color:red;'>No conversation to save.</p>"
281
- ), state_value, gr.update()
282
- # Start a new conversation in the database
283
- new_conversation_id = start_new_conversation(
284
- conversation_title_text if conversation_title_text else "Untitled Conversation"
285
- )
 
 
 
 
286
  # Save the messages
287
  for role, content in conversation_messages:
288
- save_message(new_conversation_id, role, content)
289
  # Save keywords if provided
290
  if keywords_text:
291
- add_keywords_to_conversation(new_conversation_id, [kw.strip() for kw in keywords_text.split(',')])
 
 
 
 
 
 
 
 
 
 
292
  # Update state
293
- updated_state = update_state(state_value, conversation_id=new_conversation_id)
294
  # Update the conversation list
295
  conversation_choices = update_conversation_list()
 
 
 
296
  return gr.update(
297
  value="<p style='color:green;'>Conversation saved successfully.</p>"
298
- ), updated_state, gr.update(choices=conversation_choices)
299
 
300
  save_conversation_button.click(
301
  save_conversation_function,
302
- inputs=[conversation_title, keywords, state],
303
- outputs=[status_message, state, load_conversation]
304
  )
305
 
306
  def start_new_conversation_wrapper(title, state_value):
307
- # Reset the state with no conversation_id
308
- updated_state = update_state(state_value, conversation_id=None, page=1,
309
- conversation_messages=[])
310
- # Clear the chat history
311
- return [], updated_state
312
 
313
  new_conversation.click(
314
  start_new_conversation_wrapper,
315
  inputs=[conversation_title, state],
316
- outputs=[chatbot, state]
317
  )
318
 
319
  def update_file_list(page):
@@ -328,11 +527,12 @@ def create_rag_qa_chat_tab():
328
  return update_file_list(max(1, current_page - 1))
329
 
330
  def update_context_source(choice):
 
331
  return {
332
  existing_file: gr.update(visible=choice == "Existing File"),
333
- prev_page_btn: gr.update(visible=choice == "Existing File"),
334
- next_page_btn: gr.update(visible=choice == "Existing File"),
335
- page_info: gr.update(visible=choice == "Existing File"),
336
  search_query: gr.update(visible=choice == "Search Database"),
337
  search_button: gr.update(visible=choice == "Search Database"),
338
  search_results: gr.update(visible=choice == "Search Database"),
@@ -352,17 +552,36 @@ def create_rag_qa_chat_tab():
352
  context_source.change(lambda choice: update_file_list(1) if choice == "Existing File" else (gr.update(), gr.update(), 1),
353
  inputs=[context_source], outputs=[existing_file, page_info, file_page])
354
 
355
- def perform_search(query):
356
  try:
357
- results = search_database(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  return gr.update(choices=results)
359
  except Exception as e:
360
  gr.Error(f"Error performing search: {str(e)}")
361
  return gr.update(choices=[])
362
 
 
363
  search_button.click(
364
  perform_search,
365
- inputs=[search_query],
366
  outputs=[search_results]
367
  )
368
 
@@ -384,17 +603,22 @@ Rewritten Question:"""
384
  logging.info(f"Rephrased question: {rephrased_question}")
385
  return rephrased_question.strip()
386
 
387
- def rag_qa_chat_wrapper(message, history, context_source, existing_file, search_results, file_upload,
388
- convert_to_text, keywords, api_choice, use_query_rewriting, state_value,
389
- keywords_input, top_k_input, use_re_ranking):
 
 
 
390
  try:
391
  logging.info(f"Starting rag_qa_chat_wrapper with message: {message}")
392
  logging.info(f"Context source: {context_source}")
393
  logging.info(f"API choice: {api_choice}")
394
  logging.info(f"Query rewriting: {'enabled' if use_query_rewriting else 'disabled'}")
 
395
 
396
  # Show loading indicator
397
- yield history, "", gr.update(visible=True), state_value
 
398
 
399
  conversation_id = state_value.get("conversation_id")
400
  conversation_messages = state_value.get("conversation_messages", [])
@@ -408,12 +632,12 @@ Rewritten Question:"""
408
  state_value["conversation_messages"] = conversation_messages
409
 
410
  # Ensure api_choice is a string
411
- api_choice = api_choice.value if isinstance(api_choice, gr.components.Dropdown) else api_choice
412
- logging.info(f"Resolved API choice: {api_choice}")
413
 
414
  # Only rephrase the question if it's not the first query and query rewriting is enabled
415
  if len(history) > 0 and use_query_rewriting:
416
- rephrased_question = rephrase_question(history, message, api_choice)
417
  logging.info(f"Original question: {message}")
418
  logging.info(f"Rephrased question: {rephrased_question}")
419
  else:
@@ -421,18 +645,20 @@ Rewritten Question:"""
421
  logging.info(f"Using original question: {message}")
422
 
423
  if context_source == "All Files in the Database":
424
- # Use the enhanced_rag_pipeline to search the entire database
425
- context = enhanced_rag_pipeline(rephrased_question, api_choice, keywords_input, top_k_input,
426
- use_re_ranking)
 
 
427
  logging.info(f"Using enhanced_rag_pipeline for database search")
428
  elif context_source == "Search Database":
429
  context = f"media_id:{search_results.split('(ID: ')[1][:-1]}"
430
  logging.info(f"Using search result with context: {context}")
431
- else: # Upload File
 
432
  logging.info("Processing uploaded file")
433
  if file_upload is None:
434
  raise ValueError("No file uploaded")
435
-
436
  # Process the uploaded file
437
  file_path = file_upload.name
438
  file_name = os.path.basename(file_path)
@@ -445,7 +671,6 @@ Rewritten Question:"""
445
  logging.info("Reading file content")
446
  with open(file_path, 'r', encoding='utf-8') as f:
447
  content = f.read()
448
-
449
  logging.info(f"File content length: {len(content)} characters")
450
 
451
  # Process keywords
@@ -467,18 +692,17 @@ Rewritten Question:"""
467
  author='Unknown',
468
  ingestion_date=datetime.now().strftime('%Y-%m-%d')
469
  )
470
-
471
  logging.info(f"Result from add_media_with_keywords: {result}")
472
  if isinstance(result, tuple):
473
  media_id, _ = result
474
  else:
475
  media_id = result
476
-
477
  context = f"media_id:{media_id}"
478
  logging.info(f"Context for uploaded file: {context}")
479
 
480
  logging.info("Calling rag_qa_chat function")
481
- new_history, response = rag_qa_chat(rephrased_question, history, context, api_choice)
 
482
  # Log first 100 chars of response
483
  logging.info(f"Response received from rag_qa_chat: {response[:100]}...")
484
 
@@ -490,7 +714,8 @@ Rewritten Question:"""
490
  state_value["conversation_messages"] = conversation_messages
491
 
492
  # Update the state
493
- state_value["conversation_messages"] = conversation_messages
 
494
 
495
  # Safely update history
496
  if new_history:
@@ -498,24 +723,43 @@ Rewritten Question:"""
498
  else:
499
  new_history = [(message, response)]
500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  gr.Info("Response generated successfully")
502
  logging.info("rag_qa_chat_wrapper completed successfully")
503
- yield new_history, "", gr.update(visible=False), state_value # Include state_value in outputs
 
 
504
  except ValueError as e:
505
  logging.error(f"Input error in rag_qa_chat_wrapper: {str(e)}")
506
  gr.Error(f"Input error: {str(e)}")
507
- yield history, "", gr.update(visible=False), state_value
 
508
  except DatabaseError as e:
509
  logging.error(f"Database error in rag_qa_chat_wrapper: {str(e)}")
510
  gr.Error(f"Database error: {str(e)}")
511
- yield history, "", gr.update(visible=False), state_value
 
512
  except Exception as e:
513
  logging.error(f"Unexpected error in rag_qa_chat_wrapper: {e}", exc_info=True)
514
  gr.Error("An unexpected error occurred. Please try again later.")
515
- yield history, "", gr.update(visible=False), state_value
 
516
 
517
  def clear_chat_history():
518
- return [], ""
519
 
520
  submit.click(
521
  rag_qa_chat_wrapper,
@@ -532,14 +776,17 @@ Rewritten Question:"""
532
  use_query_rewriting,
533
  state,
534
  keywords_input,
535
- top_k_input
 
 
 
536
  ],
537
- outputs=[chatbot, msg, loading_indicator, state],
538
  )
539
 
540
  clear_chat.click(
541
  clear_chat_history,
542
- outputs=[chatbot, msg]
543
  )
544
 
545
  return (
@@ -560,12 +807,10 @@ Rewritten Question:"""
560
  )
561
 
562
 
563
-
564
  def create_rag_qa_notes_management_tab():
565
  # New Management Tab
566
  with gr.TabItem("Notes Management", visible=True):
567
  gr.Markdown("# RAG QA Notes Management")
568
-
569
  management_state = gr.State({
570
  "selected_conversation_id": None,
571
  "selected_note_id": None,
@@ -574,7 +819,8 @@ def create_rag_qa_notes_management_tab():
574
  with gr.Row():
575
  with gr.Column(scale=1):
576
  # Search Notes
577
- search_notes_input = gr.Textbox(label="Search Notes by Keywords")
 
578
  search_notes_button = gr.Button("Search Notes")
579
  notes_list = gr.Dropdown(label="Notes", choices=[])
580
 
@@ -583,24 +829,34 @@ def create_rag_qa_notes_management_tab():
583
  delete_note_button = gr.Button("Delete Note")
584
  note_title_input = gr.Textbox(label="Note Title")
585
  note_content_input = gr.TextArea(label="Note Content", lines=20)
586
- note_keywords_input = gr.Textbox(label="Note Keywords (comma-separated)")
587
  save_note_button = gr.Button("Save Note")
588
  create_new_note_button = gr.Button("Create New Note")
589
  status_message = gr.HTML()
590
 
591
  # Function Definitions
592
- def search_notes(keywords):
593
  if keywords:
594
  keywords_list = [kw.strip() for kw in keywords.split(',')]
595
  notes_data, total_pages, total_count = get_notes_by_keywords(keywords_list)
596
- choices = [f"{title} (ID: {note_id})" for note_id, title, content, timestamp in notes_data]
597
- return gr.update(choices=choices)
 
 
 
 
 
 
598
  else:
599
- return gr.update(choices=[])
 
 
 
 
600
 
601
  search_notes_button.click(
602
  search_notes,
603
- inputs=[search_notes_input],
604
  outputs=[notes_list]
605
  )
606
 
@@ -664,7 +920,7 @@ def create_rag_qa_notes_management_tab():
664
  # Reset state
665
  state_value["selected_note_id"] = None
666
  # Update notes list
667
- updated_notes = search_notes("")
668
  return updated_notes, gr.update(value="Note deleted successfully."), state_value
669
  else:
670
  return gr.update(), gr.update(value="No note selected."), state_value
@@ -702,7 +958,20 @@ def create_rag_qa_chat_management_tab():
702
  with gr.Row():
703
  with gr.Column(scale=1):
704
  # Search Conversations
705
- search_conversations_input = gr.Textbox(label="Search Conversations by Keywords")
 
 
 
 
 
 
 
 
 
 
 
 
 
706
  search_conversations_button = gr.Button("Search Conversations")
707
  conversations_list = gr.Dropdown(label="Conversations", choices=[])
708
  new_conversation_button = gr.Button("New Conversation")
@@ -716,26 +985,40 @@ def create_rag_qa_chat_management_tab():
716
  status_message = gr.HTML()
717
 
718
  # Function Definitions
719
- def search_conversations(keywords):
720
- if keywords:
721
- keywords_list = [kw.strip() for kw in keywords.split(',')]
722
- conversations, total_pages, total_count = search_conversations_by_keywords(keywords_list)
723
- else:
724
- conversations, total_pages, total_count = get_all_conversations()
 
 
 
 
 
 
 
725
 
726
- # Build choices as list of titles (ensure uniqueness)
727
- choices = []
728
- mapping = {}
729
- for conversation_id, title in conversations:
730
- display_title = f"{title} (ID: {conversation_id[:8]})"
731
- choices.append(display_title)
732
- mapping[display_title] = conversation_id
 
 
733
 
734
- return gr.update(choices=choices), mapping
735
 
 
 
 
 
 
736
  search_conversations_button.click(
737
  search_conversations,
738
- inputs=[search_conversations_input],
739
  outputs=[conversations_list, conversation_mapping]
740
  )
741
 
@@ -892,19 +1175,18 @@ def create_rag_qa_chat_management_tab():
892
  ]
893
  )
894
 
895
- def delete_messages_in_conversation(conversation_id):
896
- """Helper function to delete all messages in a conversation."""
897
  try:
898
- execute_query("DELETE FROM rag_qa_chats WHERE conversation_id = ?", (conversation_id,))
899
  logging.info(f"Messages in conversation '{conversation_id}' deleted successfully.")
900
  except Exception as e:
901
  logging.error(f"Error deleting messages in conversation '{conversation_id}': {e}")
902
  raise
903
 
904
- def get_conversation_title(conversation_id):
905
  """Helper function to get the conversation title."""
906
- query = "SELECT title FROM conversation_metadata WHERE conversation_id = ?"
907
- result = execute_query(query, (conversation_id,))
908
  if result:
909
  return result[0][0]
910
  else:
@@ -1034,19 +1316,6 @@ def create_export_data_tab():
1034
  )
1035
 
1036
 
1037
-
1038
-
1039
- def update_conversation_title(conversation_id, new_title):
1040
- """Update the title of a conversation."""
1041
- try:
1042
- query = "UPDATE conversation_metadata SET title = ? WHERE conversation_id = ?"
1043
- execute_query(query, (new_title, conversation_id))
1044
- logging.info(f"Conversation '{conversation_id}' title updated to '{new_title}'")
1045
- except Exception as e:
1046
- logging.error(f"Error updating conversation title: {e}")
1047
- raise
1048
-
1049
-
1050
  def convert_file_to_text(file_path):
1051
  """Convert various file types to plain text."""
1052
  file_extension = os.path.splitext(file_path)[1].lower()
 
6
  import logging
7
  import json
8
  import os
9
+ import re
10
  from datetime import datetime
11
  #
12
  # External Imports
 
15
  #
16
  # Local Imports
17
  from App_Function_Libraries.Books.Book_Ingestion_Lib import read_epub
18
+ from App_Function_Libraries.DB.Character_Chat_DB import search_character_chat, search_character_cards
19
+ from App_Function_Libraries.DB.DB_Manager import DatabaseError, get_paginated_files, add_media_with_keywords, \
20
+ get_all_conversations, get_note_by_id, get_notes_by_keywords, start_new_conversation, update_note, save_notes, \
21
+ clear_keywords_from_note, add_keywords_to_note, load_chat_history, save_message, add_keywords_to_conversation, \
22
+ get_keywords_for_note, delete_note, search_conversations_by_keywords, get_conversation_title, delete_conversation, \
23
+ update_conversation_title, fetch_all_conversations, fetch_all_notes, fetch_conversations_by_ids, fetch_notes_by_ids, \
24
+ search_media_db, search_notes_titles, list_prompts
25
+ from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_notes, delete_messages_in_conversation, search_rag_notes, \
26
+ search_rag_chat, get_conversation_rating, set_conversation_rating
27
+ from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
 
 
 
 
 
 
 
28
  from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_text_and_format_from_pdf
29
  from App_Function_Libraries.RAG.RAG_Library_2 import generate_answer, enhanced_rag_pipeline
30
  from App_Function_Libraries.RAG.RAG_QA_Chat import search_database, rag_qa_chat
31
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name, \
32
+ load_comprehensive_config
33
+
34
+
35
  #
36
  ########################################################################################################################
37
  #
38
  # Functions:
39
 
40
  def create_rag_qa_chat_tab():
41
+ try:
42
+ default_value = None
43
+ if default_api_endpoint:
44
+ if default_api_endpoint in global_api_endpoints:
45
+ default_value = format_api_name(default_api_endpoint)
46
+ else:
47
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
48
+ except Exception as e:
49
+ logging.error(f"Error setting default API endpoint: {str(e)}")
50
+ default_value = None
51
  with gr.TabItem("RAG QA Chat", visible=True):
52
  gr.Markdown("# RAG QA Chat")
53
 
 
55
  "page": 1,
56
  "context_source": "Entire Media Database",
57
  "conversation_messages": [],
58
+ "conversation_id": None
59
  })
60
 
61
  note_state = gr.State({"note_id": None})
62
 
63
+ def auto_save_conversation(message, response, state_value, auto_save_enabled):
64
+ """Automatically save the conversation if auto-save is enabled"""
65
+ try:
66
+ if not auto_save_enabled:
67
+ return state_value
68
+
69
+ conversation_id = state_value.get("conversation_id")
70
+ if not conversation_id:
71
+ # Create new conversation with default title
72
+ title = "Auto-saved Conversation " + datetime.now().strftime("%Y-%m-%d %H:%M:%S")
73
+ conversation_id = start_new_conversation(title=title)
74
+ state_value = state_value.copy()
75
+ state_value["conversation_id"] = conversation_id
76
+
77
+ # Save the messages
78
+ save_message(conversation_id, "user", message)
79
+ save_message(conversation_id, "assistant", response)
80
+
81
+ return state_value
82
+ except Exception as e:
83
+ logging.error(f"Error in auto-save: {str(e)}")
84
+ return state_value
85
+
86
  # Update the conversation list function
87
  def update_conversation_list():
88
  conversations, total_pages, total_count = get_all_conversations()
89
+ choices = [
90
+ f"{conversation['title']} (ID: {conversation['conversation_id']}) - Rating: {conversation['rating'] or 'Not Rated'}"
91
+ for conversation in conversations
92
+ ]
93
  return choices
94
 
95
  with gr.Row():
96
  with gr.Column(scale=1):
97
+ # FIXME - Offer the user to search 2+ databases at once
98
+ database_types = ["Media DB", "RAG Chat", "RAG Notes", "Character Chat", "Character Cards"]
99
+ db_choice = gr.CheckboxGroup(
100
+ label="Select Database(s)",
101
+ choices=database_types,
102
+ value=["Media DB"],
103
+ interactive=True
104
+ )
105
  context_source = gr.Radio(
106
  ["All Files in the Database", "Search Database", "Upload File"],
107
  label="Context Source",
 
114
  next_page_btn = gr.Button("Next Page")
115
  page_info = gr.HTML("Page 1")
116
  top_k_input = gr.Number(value=10, label="Maximum amount of results to use (Default: 10)", minimum=1, maximum=50, step=1, precision=0, interactive=True)
117
+ keywords_input = gr.Textbox(label="Keywords (comma-separated) to filter results by)", value="rag_qa_default_keyword" ,visible=True)
118
  use_query_rewriting = gr.Checkbox(label="Use Query Rewriting", value=True)
119
  use_re_ranking = gr.Checkbox(label="Use Re-ranking", value=True)
120
+ config = load_comprehensive_config()
121
+ auto_save_value = config.getboolean('auto-save', 'save_character_chats', fallback=False)
122
+ auto_save_checkbox = gr.Checkbox(
123
+ label="Save chats automatically",
124
+ value=auto_save_value,
125
+ info="When enabled, conversations will be saved automatically after each message"
126
+ )
127
+
128
+ initial_prompts, total_pages, current_page = list_prompts(page=1, per_page=10)
129
 
130
+ preset_prompt_checkbox = gr.Checkbox(
131
+ label="View Custom Prompts(have to copy/paste them)",
132
+ value=False,
133
+ visible=True
134
+ )
135
+
136
+ with gr.Row(visible=False) as preset_prompt_controls:
137
+ prev_prompt_page = gr.Button("Previous")
138
+ current_prompt_page_text = gr.Text(f"Page {current_page} of {total_pages}")
139
+ next_prompt_page = gr.Button("Next")
140
+ current_prompt_page_state = gr.State(value=1)
141
+
142
+ preset_prompt = gr.Dropdown(
143
+ label="Select Preset Prompt",
144
+ choices=initial_prompts,
145
+ visible=False
146
+ )
147
+ user_prompt = gr.Textbox(
148
+ label="Custom Prompt",
149
+ placeholder="Enter custom prompt here",
150
+ lines=3,
151
+ visible=False
152
+ )
153
+
154
+ system_prompt_input = gr.Textbox(
155
+ label="System Prompt",
156
+ lines=3,
157
+ visible=False
158
+ )
159
 
160
  search_query = gr.Textbox(label="Search Query", visible=False)
161
  search_button = gr.Button("Search", visible=False)
162
  search_results = gr.Dropdown(label="Search Results", choices=[], visible=False)
 
163
  file_upload = gr.File(
164
  label="Upload File",
165
  visible=False,
 
171
  load_conversation = gr.Dropdown(
172
  label="Load Conversation",
173
  choices=update_conversation_list()
174
+ )
175
  new_conversation = gr.Button("New Conversation")
176
  save_conversation_button = gr.Button("Save Conversation")
177
  conversation_title = gr.Textbox(
178
+ label="Conversation Title",
179
+ placeholder="Enter a title for the new conversation"
180
  )
181
  keywords = gr.Textbox(label="Keywords (comma-separated)", visible=True)
182
 
183
+ # Add the rating display and input
184
+ rating_display = gr.Markdown(value="", visible=False)
185
+ rating_input = gr.Radio(
186
+ choices=["1", "2", "3"],
187
+ label="Rate this Conversation (1-3 stars)",
188
+ visible=False
189
+ )
190
+
191
+ # Refactored API selection dropdown
192
  api_choice = gr.Dropdown(
193
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
194
+ value=default_value,
195
+ label="API for Chat Response (Optional)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  )
197
 
198
  with gr.Row():
 
215
  clear_notes_btn = gr.Button("Clear Current Note text")
216
 
217
  new_note_btn = gr.Button("New Note")
218
+ # FIXME - Change from only keywords to generalized search
219
+ search_notes_title = gr.Textbox(label="Search Notes by Title")
220
  search_notes_by_keyword = gr.Textbox(label="Search Notes by Keyword")
221
  search_notes_button = gr.Button("Search Notes")
222
  note_results = gr.Dropdown(label="Notes", choices=[])
 
224
 
225
  loading_indicator = gr.HTML("Loading...", visible=False)
226
  status_message = gr.HTML()
227
+ auto_save_status = gr.HTML()
228
+
229
+
230
 
231
  # Function Definitions
232
+ def update_prompt_page(direction, current_page_val):
233
+ new_page = max(1, min(total_pages, current_page_val + direction))
234
+ prompts, _, _ = list_prompts(page=new_page, per_page=10)
235
+ return (
236
+ gr.update(choices=prompts),
237
+ gr.update(value=f"Page {new_page} of {total_pages}"),
238
+ new_page
239
+ )
240
+
241
+ def update_prompts(preset_name):
242
+ prompts = update_user_prompt(preset_name)
243
+ return (
244
+ gr.update(value=prompts["user_prompt"], visible=True),
245
+ gr.update(value=prompts["system_prompt"], visible=True)
246
+ )
247
+
248
+ def toggle_preset_prompt(checkbox_value):
249
+ return (
250
+ gr.update(visible=checkbox_value),
251
+ gr.update(visible=checkbox_value),
252
+ gr.update(visible=False),
253
+ gr.update(visible=False)
254
+ )
255
+
256
+ prev_prompt_page.click(
257
+ lambda x: update_prompt_page(-1, x),
258
+ inputs=[current_prompt_page_state],
259
+ outputs=[preset_prompt, current_prompt_page_text, current_prompt_page_state]
260
+ )
261
+
262
+ next_prompt_page.click(
263
+ lambda x: update_prompt_page(1, x),
264
+ inputs=[current_prompt_page_state],
265
+ outputs=[preset_prompt, current_prompt_page_text, current_prompt_page_state]
266
+ )
267
+
268
+ preset_prompt.change(
269
+ update_prompts,
270
+ inputs=preset_prompt,
271
+ outputs=[user_prompt, system_prompt_input]
272
+ )
273
+
274
+ preset_prompt_checkbox.change(
275
+ toggle_preset_prompt,
276
+ inputs=[preset_prompt_checkbox],
277
+ outputs=[preset_prompt, preset_prompt_controls, user_prompt, system_prompt_input]
278
+ )
279
 
280
  def update_state(state, **kwargs):
281
  new_state = state.copy()
 
290
  outputs=[note_title, notes, note_state]
291
  )
292
 
293
+ def search_notes(search_notes_title, keywords):
294
  if keywords:
295
  keywords_list = [kw.strip() for kw in keywords.split(',')]
296
  notes_data, total_pages, total_count = get_notes_by_keywords(keywords_list)
297
+ choices = [f"Note {note_id} - {title} ({timestamp})" for
298
+ note_id, title, content, timestamp, conversation_id in notes_data]
299
+ return gr.update(choices=choices, label=f"Found {total_count} notes")
300
+ elif search_notes_title:
301
+ notes_data, total_pages, total_count = search_notes_titles(search_notes_title)
302
+ choices = [f"Note {note_id} - {title} ({timestamp})" for
303
+ note_id, title, content, timestamp, conversation_id in notes_data]
304
+ return gr.update(choices=choices, label=f"Found {total_count} notes")
305
  else:
306
+ # This will now return all notes, ordered by timestamp
307
+ notes_data, total_pages, total_count = search_notes_titles("")
308
+ choices = [f"Note {note_id} - {title} ({timestamp})" for
309
+ note_id, title, content, timestamp, conversation_id in notes_data]
310
+ return gr.update(choices=choices, label=f"All notes ({total_count} total)")
311
 
312
  search_notes_button.click(
313
  search_notes,
314
+ inputs=[search_notes_title, search_notes_by_keyword],
315
  outputs=[note_results]
316
  )
317
 
 
333
 
334
  def save_notes_function(note_title_text, notes_content, keywords_content, note_state_value, state_value):
335
  """Save the notes and associated keywords to the database."""
336
+ logging.info(f"Starting save_notes_function with state: {state_value}")
337
+ logging.info(f"Note title: {note_title_text}")
338
+ logging.info(f"Notes content length: {len(notes_content) if notes_content else 0}")
339
+
340
+ try:
341
+ # Check current state
342
+ conversation_id = state_value.get("conversation_id")
343
+ logging.info(f"Current conversation_id: {conversation_id}")
344
+
345
+ # Create new conversation if none exists
346
+ if not conversation_id:
347
+ logging.info("No conversation ID found, creating new conversation")
348
+ conversation_title = note_title_text if note_title_text else "Untitled Conversation"
349
+ conversation_id = start_new_conversation(title=conversation_title)
350
+ state_value = state_value.copy() # Create a new copy of the state
351
+ state_value["conversation_id"] = conversation_id
352
+ logging.info(f"Created new conversation with ID: {conversation_id}")
353
+
354
+ if not notes_content:
355
+ logging.warning("No notes content provided")
356
+ return notes_content, note_state_value, state_value, gr.update(
357
+ value="<p style='color:red;'>Cannot save empty notes.</p>")
358
+
359
+ # Save or update note
360
+ note_id = note_state_value.get("note_id")
361
  if note_id:
362
+ logging.info(f"Updating existing note with ID: {note_id}")
363
  update_note(note_id, note_title_text, notes_content)
364
  else:
365
+ logging.info(f"Creating new note for conversation: {conversation_id}")
366
+ note_id = save_notes(conversation_id, note_title_text or "Untitled Note", notes_content)
367
+ note_state_value = {"note_id": note_id}
368
+ logging.info(f"Created new note with ID: {note_id}")
369
+
370
+ # Handle keywords
371
  if keywords_content:
372
+ logging.info("Processing keywords")
373
  clear_keywords_from_note(note_id)
374
+ keywords = [kw.strip() for kw in keywords_content.split(',')]
375
+ add_keywords_to_note(note_id, keywords)
376
+ logging.info(f"Added keywords: {keywords}")
377
 
378
+ logging.info("Notes saved successfully")
379
+ return (
380
+ notes_content,
381
+ note_state_value,
382
+ state_value,
383
+ gr.update(value="<p style='color:green;'>Notes saved successfully!</p>")
384
+ )
385
+
386
+ except Exception as e:
387
+ logging.error(f"Error in save_notes_function: {str(e)}", exc_info=True)
388
+ return (
389
+ notes_content,
390
+ note_state_value,
391
+ state_value,
392
+ gr.update(value=f"<p style='color:red;'>Error saving notes: {str(e)}</p>")
393
+ )
394
 
395
  save_notes_btn.click(
396
  save_notes_function,
397
  inputs=[note_title, notes, keywords_for_notes, note_state, state],
398
+ outputs=[notes, note_state, state, status_message]
399
  )
400
 
401
  def clear_notes_function():
 
407
  outputs=[notes, note_state]
408
  )
409
 
 
 
 
 
 
410
  # Initialize the conversation list
411
  load_conversation.choices = update_conversation_list()
412
 
413
  def load_conversation_history(selected_conversation, state_value):
414
+ try:
415
+ if not selected_conversation:
416
+ return [], state_value, "", gr.update(value="", visible=False), gr.update(visible=False)
417
+ # Extract conversation ID
418
+ match = re.search(r'\(ID: ([0-9a-fA-F\-]+)\)', selected_conversation)
419
+ if not match:
420
+ logging.error(f"Invalid conversation format: {selected_conversation}")
421
+ return [], state_value, "", gr.update(value="", visible=False), gr.update(visible=False)
422
+ conversation_id = match.group(1)
423
  chat_data, total_pages_val, _ = load_chat_history(conversation_id, 1, 50)
424
+ # Update state with valid conversation id
425
+ updated_state = state_value.copy()
426
+ updated_state["conversation_id"] = conversation_id
427
+ updated_state["conversation_messages"] = chat_data
428
+ # Format chat history
429
  history = []
430
  for role, content in chat_data:
431
  if role == 'user':
432
  history.append((content, ''))
433
+ elif history:
434
+ history[-1] = (history[-1][0], content)
435
+ # Fetch and display the conversation rating
436
+ rating = get_conversation_rating(conversation_id)
437
+ if rating is not None:
438
+ rating_text = f"**Current Rating:** {rating} star(s)"
439
+ rating_display_update = gr.update(value=rating_text, visible=True)
440
+ rating_input_update = gr.update(value=str(rating), visible=True)
441
+ else:
442
+ rating_display_update = gr.update(value="**Current Rating:** Not Rated", visible=True)
443
+ rating_input_update = gr.update(value=None, visible=True)
444
  notes_content = get_notes(conversation_id)
445
+ return history, updated_state, "\n".join(
446
+ notes_content) if notes_content else "", rating_display_update, rating_input_update
447
+ except Exception as e:
448
+ logging.error(f"Error loading conversation: {str(e)}")
449
+ return [], state_value, "", gr.update(value="", visible=False), gr.update(visible=False)
450
 
451
  load_conversation.change(
452
  load_conversation_history,
453
  inputs=[load_conversation, state],
454
+ outputs=[chatbot, state, notes, rating_display, rating_input]
455
  )
456
 
457
  # Modify save_conversation_function to use gr.update()
458
+ def save_conversation_function(conversation_title_text, keywords_text, rating_value, state_value):
459
  conversation_messages = state_value.get("conversation_messages", [])
460
+ conversation_id = state_value.get("conversation_id")
461
  if not conversation_messages:
462
  return gr.update(
463
  value="<p style='color:red;'>No conversation to save.</p>"
464
+ ), state_value, gr.update(), gr.update(value="", visible=False), gr.update(visible=False)
465
+ # Start a new conversation in the database if not existing
466
+ if not conversation_id:
467
+ conversation_id = start_new_conversation(
468
+ conversation_title_text if conversation_title_text else "Untitled Conversation"
469
+ )
470
+ else:
471
+ # Update the conversation title if it has changed
472
+ update_conversation_title(conversation_id, conversation_title_text)
473
  # Save the messages
474
  for role, content in conversation_messages:
475
+ save_message(conversation_id, role, content)
476
  # Save keywords if provided
477
  if keywords_text:
478
+ add_keywords_to_conversation(conversation_id, [kw.strip() for kw in keywords_text.split(',')])
479
+ # Save the rating if provided
480
+ try:
481
+ if rating_value:
482
+ set_conversation_rating(conversation_id, int(rating_value))
483
+ except ValueError as ve:
484
+ logging.error(f"Invalid rating value: {ve}")
485
+ return gr.update(
486
+ value=f"<p style='color:red;'>Invalid rating: {ve}</p>"
487
+ ), state_value, gr.update(), gr.update(value="", visible=False), gr.update(visible=False)
488
+
489
  # Update state
490
+ updated_state = update_state(state_value, conversation_id=conversation_id)
491
  # Update the conversation list
492
  conversation_choices = update_conversation_list()
493
+ # Reset rating display and input
494
+ rating_display_update = gr.update(value=f"**Current Rating:** {rating_value} star(s)", visible=True)
495
+ rating_input_update = gr.update(value=rating_value, visible=True)
496
  return gr.update(
497
  value="<p style='color:green;'>Conversation saved successfully.</p>"
498
+ ), updated_state, gr.update(choices=conversation_choices), rating_display_update, rating_input_update
499
 
500
  save_conversation_button.click(
501
  save_conversation_function,
502
+ inputs=[conversation_title, keywords, rating_input, state],
503
+ outputs=[status_message, state, load_conversation, rating_display, rating_input]
504
  )
505
 
506
  def start_new_conversation_wrapper(title, state_value):
507
+ # Reset the state with no conversation_id and empty conversation messages
508
+ updated_state = update_state(state_value, conversation_id=None, page=1, conversation_messages=[])
509
+ # Clear the chat history and reset rating components
510
+ return [], updated_state, gr.update(value="", visible=False), gr.update(value=None, visible=False)
 
511
 
512
  new_conversation.click(
513
  start_new_conversation_wrapper,
514
  inputs=[conversation_title, state],
515
+ outputs=[chatbot, state, rating_display, rating_input]
516
  )
517
 
518
  def update_file_list(page):
 
527
  return update_file_list(max(1, current_page - 1))
528
 
529
  def update_context_source(choice):
530
+ # Update visibility based on context source choice
531
  return {
532
  existing_file: gr.update(visible=choice == "Existing File"),
533
+ prev_page_btn: gr.update(visible=choice == "Search Database"),
534
+ next_page_btn: gr.update(visible=choice == "Search Database"),
535
+ page_info: gr.update(visible=choice == "Search Database"),
536
  search_query: gr.update(visible=choice == "Search Database"),
537
  search_button: gr.update(visible=choice == "Search Database"),
538
  search_results: gr.update(visible=choice == "Search Database"),
 
552
  context_source.change(lambda choice: update_file_list(1) if choice == "Existing File" else (gr.update(), gr.update(), 1),
553
  inputs=[context_source], outputs=[existing_file, page_info, file_page])
554
 
555
+ def perform_search(query, selected_databases, keywords):
556
  try:
557
+ results = []
558
+
559
+ # Iterate over selected database types and perform searches accordingly
560
+ for database_type in selected_databases:
561
+ if database_type == "Media DB":
562
+ # FIXME - check for existence of keywords before setting as search field
563
+ search_fields = ["title", "content", "keywords"]
564
+ results += search_media_db(query, search_fields, keywords, page=1, results_per_page=25)
565
+ elif database_type == "RAG Chat":
566
+ results += search_rag_chat(query)
567
+ elif database_type == "RAG Notes":
568
+ results += search_rag_notes(query)
569
+ elif database_type == "Character Chat":
570
+ results += search_character_chat(query)
571
+ elif database_type == "Character Cards":
572
+ results += search_character_cards(query)
573
+
574
+ # Remove duplicate results if necessary
575
+ results = list(set(results))
576
  return gr.update(choices=results)
577
  except Exception as e:
578
  gr.Error(f"Error performing search: {str(e)}")
579
  return gr.update(choices=[])
580
 
581
+ # Click Event for the DB Search Button
582
  search_button.click(
583
  perform_search,
584
+ inputs=[search_query, db_choice, keywords_input],
585
  outputs=[search_results]
586
  )
587
 
 
603
  logging.info(f"Rephrased question: {rephrased_question}")
604
  return rephrased_question.strip()
605
 
606
+ # FIXME - RAG DB selection
607
+ def rag_qa_chat_wrapper(
608
+ message, history, context_source, existing_file, search_results, file_upload,
609
+ convert_to_text, keywords, api_choice, use_query_rewriting, state_value,
610
+ keywords_input, top_k_input, use_re_ranking, db_choices, auto_save_enabled
611
+ ):
612
  try:
613
  logging.info(f"Starting rag_qa_chat_wrapper with message: {message}")
614
  logging.info(f"Context source: {context_source}")
615
  logging.info(f"API choice: {api_choice}")
616
  logging.info(f"Query rewriting: {'enabled' if use_query_rewriting else 'disabled'}")
617
+ logging.info(f"Selected DB Choices: {db_choices}")
618
 
619
  # Show loading indicator
620
+ yield history, "", gr.update(visible=True), state_value, gr.update(visible=False), gr.update(
621
+ visible=False)
622
 
623
  conversation_id = state_value.get("conversation_id")
624
  conversation_messages = state_value.get("conversation_messages", [])
 
632
  state_value["conversation_messages"] = conversation_messages
633
 
634
  # Ensure api_choice is a string
635
+ api_choice_str = api_choice.value if isinstance(api_choice, gr.components.Dropdown) else api_choice
636
+ logging.info(f"Resolved API choice: {api_choice_str}")
637
 
638
  # Only rephrase the question if it's not the first query and query rewriting is enabled
639
  if len(history) > 0 and use_query_rewriting:
640
+ rephrased_question = rephrase_question(history, message, api_choice_str)
641
  logging.info(f"Original question: {message}")
642
  logging.info(f"Rephrased question: {rephrased_question}")
643
  else:
 
645
  logging.info(f"Using original question: {message}")
646
 
647
  if context_source == "All Files in the Database":
648
+ # Use the enhanced_rag_pipeline to search the selected databases
649
+ context = enhanced_rag_pipeline(
650
+ rephrased_question, api_choice_str, keywords_input, top_k_input, use_re_ranking,
651
+ database_types=db_choices # Pass the list of selected databases
652
+ )
653
  logging.info(f"Using enhanced_rag_pipeline for database search")
654
  elif context_source == "Search Database":
655
  context = f"media_id:{search_results.split('(ID: ')[1][:-1]}"
656
  logging.info(f"Using search result with context: {context}")
657
+ else:
658
+ # Upload File
659
  logging.info("Processing uploaded file")
660
  if file_upload is None:
661
  raise ValueError("No file uploaded")
 
662
  # Process the uploaded file
663
  file_path = file_upload.name
664
  file_name = os.path.basename(file_path)
 
671
  logging.info("Reading file content")
672
  with open(file_path, 'r', encoding='utf-8') as f:
673
  content = f.read()
 
674
  logging.info(f"File content length: {len(content)} characters")
675
 
676
  # Process keywords
 
692
  author='Unknown',
693
  ingestion_date=datetime.now().strftime('%Y-%m-%d')
694
  )
 
695
  logging.info(f"Result from add_media_with_keywords: {result}")
696
  if isinstance(result, tuple):
697
  media_id, _ = result
698
  else:
699
  media_id = result
 
700
  context = f"media_id:{media_id}"
701
  logging.info(f"Context for uploaded file: {context}")
702
 
703
  logging.info("Calling rag_qa_chat function")
704
+ new_history, response = rag_qa_chat(rephrased_question, history, context, api_choice_str)
705
+
706
  # Log first 100 chars of response
707
  logging.info(f"Response received from rag_qa_chat: {response[:100]}...")
708
 
 
714
  state_value["conversation_messages"] = conversation_messages
715
 
716
  # Update the state
717
+ updated_state = auto_save_conversation(message, response, state_value, auto_save_enabled)
718
+ updated_state["conversation_messages"] = conversation_messages
719
 
720
  # Safely update history
721
  if new_history:
 
723
  else:
724
  new_history = [(message, response)]
725
 
726
+ # Get the current rating and update display
727
+ conversation_id = updated_state.get("conversation_id")
728
+ if conversation_id:
729
+ rating = get_conversation_rating(conversation_id)
730
+ if rating is not None:
731
+ rating_display_update = gr.update(value=f"**Current Rating:** {rating} star(s)", visible=True)
732
+ rating_input_update = gr.update(value=str(rating), visible=True)
733
+ else:
734
+ rating_display_update = gr.update(value="**Current Rating:** Not Rated", visible=True)
735
+ rating_input_update = gr.update(value=None, visible=True)
736
+ else:
737
+ rating_display_update = gr.update(value="", visible=False)
738
+ rating_input_update = gr.update(value=None, visible=False)
739
+
740
  gr.Info("Response generated successfully")
741
  logging.info("rag_qa_chat_wrapper completed successfully")
742
+ yield new_history, "", gr.update(
743
+ visible=False), updated_state, rating_display_update, rating_input_update
744
+
745
  except ValueError as e:
746
  logging.error(f"Input error in rag_qa_chat_wrapper: {str(e)}")
747
  gr.Error(f"Input error: {str(e)}")
748
+ yield history, "", gr.update(visible=False), state_value, gr.update(visible=False), gr.update(
749
+ visible=False)
750
  except DatabaseError as e:
751
  logging.error(f"Database error in rag_qa_chat_wrapper: {str(e)}")
752
  gr.Error(f"Database error: {str(e)}")
753
+ yield history, "", gr.update(visible=False), state_value, gr.update(visible=False), gr.update(
754
+ visible=False)
755
  except Exception as e:
756
  logging.error(f"Unexpected error in rag_qa_chat_wrapper: {e}", exc_info=True)
757
  gr.Error("An unexpected error occurred. Please try again later.")
758
+ yield history, "", gr.update(visible=False), state_value, gr.update(visible=False), gr.update(
759
+ visible=False)
760
 
761
  def clear_chat_history():
762
+ return [], "", gr.update(value="", visible=False), gr.update(value=None, visible=False)
763
 
764
  submit.click(
765
  rag_qa_chat_wrapper,
 
776
  use_query_rewriting,
777
  state,
778
  keywords_input,
779
+ top_k_input,
780
+ use_re_ranking,
781
+ db_choice,
782
+ auto_save_checkbox
783
  ],
784
+ outputs=[chatbot, msg, loading_indicator, state, rating_display, rating_input],
785
  )
786
 
787
  clear_chat.click(
788
  clear_chat_history,
789
+ outputs=[chatbot, msg, rating_display, rating_input]
790
  )
791
 
792
  return (
 
807
  )
808
 
809
 
 
810
  def create_rag_qa_notes_management_tab():
811
  # New Management Tab
812
  with gr.TabItem("Notes Management", visible=True):
813
  gr.Markdown("# RAG QA Notes Management")
 
814
  management_state = gr.State({
815
  "selected_conversation_id": None,
816
  "selected_note_id": None,
 
819
  with gr.Row():
820
  with gr.Column(scale=1):
821
  # Search Notes
822
+ search_notes_title = gr.Textbox(label="Search Notes by Title")
823
+ search_notes_by_keyword = gr.Textbox(label="Search Notes by Keywords")
824
  search_notes_button = gr.Button("Search Notes")
825
  notes_list = gr.Dropdown(label="Notes", choices=[])
826
 
 
829
  delete_note_button = gr.Button("Delete Note")
830
  note_title_input = gr.Textbox(label="Note Title")
831
  note_content_input = gr.TextArea(label="Note Content", lines=20)
832
+ note_keywords_input = gr.Textbox(label="Note Keywords (comma-separated)", value="default_note_keyword")
833
  save_note_button = gr.Button("Save Note")
834
  create_new_note_button = gr.Button("Create New Note")
835
  status_message = gr.HTML()
836
 
837
  # Function Definitions
838
+ def search_notes(search_notes_title, keywords):
839
  if keywords:
840
  keywords_list = [kw.strip() for kw in keywords.split(',')]
841
  notes_data, total_pages, total_count = get_notes_by_keywords(keywords_list)
842
+ choices = [f"Note {note_id} - {title} ({timestamp})" for
843
+ note_id, title, content, timestamp, conversation_id in notes_data]
844
+ return gr.update(choices=choices, label=f"Found {total_count} notes")
845
+ elif search_notes_title:
846
+ notes_data, total_pages, total_count = search_notes_titles(search_notes_title)
847
+ choices = [f"Note {note_id} - {title} ({timestamp})" for
848
+ note_id, title, content, timestamp, conversation_id in notes_data]
849
+ return gr.update(choices=choices, label=f"Found {total_count} notes")
850
  else:
851
+ # This will now return all notes, ordered by timestamp
852
+ notes_data, total_pages, total_count = search_notes_titles("")
853
+ choices = [f"Note {note_id} - {title} ({timestamp})" for
854
+ note_id, title, content, timestamp, conversation_id in notes_data]
855
+ return gr.update(choices=choices, label=f"All notes ({total_count} total)")
856
 
857
  search_notes_button.click(
858
  search_notes,
859
+ inputs=[search_notes_title, search_notes_by_keyword],
860
  outputs=[notes_list]
861
  )
862
 
 
920
  # Reset state
921
  state_value["selected_note_id"] = None
922
  # Update notes list
923
+ updated_notes = search_notes("", "")
924
  return updated_notes, gr.update(value="Note deleted successfully."), state_value
925
  else:
926
  return gr.update(), gr.update(value="No note selected."), state_value
 
958
  with gr.Row():
959
  with gr.Column(scale=1):
960
  # Search Conversations
961
+ with gr.Group():
962
+ gr.Markdown("## Search Conversations")
963
+ title_search = gr.Textbox(
964
+ label="Search by Title",
965
+ placeholder="Enter title to search..."
966
+ )
967
+ content_search = gr.Textbox(
968
+ label="Search in Chat Content",
969
+ placeholder="Enter text to search in messages..."
970
+ )
971
+ keyword_search = gr.Textbox(
972
+ label="Filter by Keywords (comma-separated)",
973
+ placeholder="keyword1, keyword2, ..."
974
+ )
975
  search_conversations_button = gr.Button("Search Conversations")
976
  conversations_list = gr.Dropdown(label="Conversations", choices=[])
977
  new_conversation_button = gr.Button("New Conversation")
 
985
  status_message = gr.HTML()
986
 
987
  # Function Definitions
988
+ def search_conversations(title_query, content_query, keywords):
989
+ try:
990
+ # Parse keywords if provided
991
+ keywords_list = None
992
+ if keywords and keywords.strip():
993
+ keywords_list = [kw.strip() for kw in keywords.split(',')]
994
+
995
+ # Search using existing search_conversations_by_keywords function with all criteria
996
+ results, total_pages, total_count = search_conversations_by_keywords(
997
+ keywords=keywords_list,
998
+ title_query=title_query if title_query.strip() else None,
999
+ content_query=content_query if content_query.strip() else None
1000
+ )
1001
 
1002
+ # Build choices as list of titles (ensure uniqueness)
1003
+ choices = []
1004
+ mapping = {}
1005
+ for conv in results:
1006
+ conversation_id = conv['conversation_id']
1007
+ title = conv['title']
1008
+ display_title = f"{title} (ID: {conversation_id[:8]})"
1009
+ choices.append(display_title)
1010
+ mapping[display_title] = conversation_id
1011
 
1012
+ return gr.update(choices=choices), mapping
1013
 
1014
+ except Exception as e:
1015
+ logging.error(f"Error in search_conversations: {str(e)}")
1016
+ return gr.update(choices=[]), {}
1017
+
1018
+ # Update the search button click event
1019
  search_conversations_button.click(
1020
  search_conversations,
1021
+ inputs=[title_search, content_search, keyword_search],
1022
  outputs=[conversations_list, conversation_mapping]
1023
  )
1024
 
 
1175
  ]
1176
  )
1177
 
1178
+ def delete_messages_in_conversation_wrapper(conversation_id):
1179
+ """Wrapper function to delete all messages in a conversation."""
1180
  try:
1181
+ delete_messages_in_conversation(conversation_id)
1182
  logging.info(f"Messages in conversation '{conversation_id}' deleted successfully.")
1183
  except Exception as e:
1184
  logging.error(f"Error deleting messages in conversation '{conversation_id}': {e}")
1185
  raise
1186
 
1187
+ def get_conversation_title_wrapper(conversation_id):
1188
  """Helper function to get the conversation title."""
1189
+ result = get_conversation_title(conversation_id)
 
1190
  if result:
1191
  return result[0][0]
1192
  else:
 
1316
  )
1317
 
1318
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1319
  def convert_file_to_text(file_path):
1320
  """Convert various file types to plain text."""
1321
  file_extension = os.path.splitext(file_path)[1].lower()
App_Function_Libraries/Gradio_UI/Re_summarize_tab.py CHANGED
@@ -10,19 +10,33 @@ import gradio as gr
10
  #
11
  # Local Imports
12
  from App_Function_Libraries.Chunk_Lib import improved_chunking_process
13
- from App_Function_Libraries.DB.DB_Manager import update_media_content, load_preset_prompts
14
  from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
15
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, \
16
  fetch_items_by_content, fetch_items_by_title_or_url
17
  from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_chunk
18
- from App_Function_Libraries.Utils.Utils import load_comprehensive_config
19
- #
20
  #
21
  ######################################################################################################################
22
  #
23
  # Functions:
24
 
25
  def create_resummary_tab():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  with gr.TabItem("Re-Summarize", visible=True):
27
  gr.Markdown("# Re-Summarize Existing Content")
28
  with gr.Row():
@@ -36,9 +50,10 @@ def create_resummary_tab():
36
 
37
  with gr.Row():
38
  api_name_input = gr.Dropdown(
39
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
40
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace"],
41
- value="Local-LLM", label="API Name")
 
42
  api_key_input = gr.Textbox(label="API Key", placeholder="Enter your API key here", type="password")
43
 
44
  chunking_options_checkbox = gr.Checkbox(label="Use Chunking", value=False)
@@ -55,9 +70,17 @@ def create_resummary_tab():
55
  preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
56
  value=False,
57
  visible=True)
 
 
 
 
 
 
 
 
58
  with gr.Row():
59
  preset_prompt = gr.Dropdown(label="Select Preset Prompt",
60
- choices=load_preset_prompts(),
61
  visible=False)
62
  with gr.Row():
63
  custom_prompt_input = gr.Textbox(label="Custom Prompt",
@@ -86,6 +109,15 @@ def create_resummary_tab():
86
  lines=3,
87
  visible=False)
88
 
 
 
 
 
 
 
 
 
 
89
  def update_prompts(preset_name):
90
  prompts = update_user_prompt(preset_name)
91
  return (
@@ -93,6 +125,19 @@ def create_resummary_tab():
93
  gr.update(value=prompts["system_prompt"], visible=True)
94
  )
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  preset_prompt.change(
97
  update_prompts,
98
  inputs=preset_prompt,
@@ -109,9 +154,9 @@ def create_resummary_tab():
109
  outputs=[custom_prompt_input, system_prompt_input]
110
  )
111
  preset_prompt_checkbox.change(
112
- fn=lambda x: gr.update(visible=x),
113
  inputs=[preset_prompt_checkbox],
114
- outputs=[preset_prompt]
115
  )
116
 
117
  # Connect the UI elements
@@ -140,7 +185,12 @@ def create_resummary_tab():
140
  outputs=result_output
141
  )
142
 
143
- return search_query_input, search_type_input, search_button, items_output, item_mapping, api_name_input, api_key_input, chunking_options_checkbox, chunking_options_box, chunk_method, max_chunk_size, chunk_overlap, custom_prompt_checkbox, custom_prompt_input, resummarize_button, result_output
 
 
 
 
 
144
 
145
 
146
  def update_resummarize_dropdown(search_query, search_type):
 
10
  #
11
  # Local Imports
12
  from App_Function_Libraries.Chunk_Lib import improved_chunking_process
13
+ from App_Function_Libraries.DB.DB_Manager import update_media_content, list_prompts
14
  from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
15
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, \
16
  fetch_items_by_content, fetch_items_by_title_or_url
17
  from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_chunk
18
+ from App_Function_Libraries.Utils.Utils import load_comprehensive_config, default_api_endpoint, global_api_endpoints, \
19
+ format_api_name
20
  #
21
  ######################################################################################################################
22
  #
23
  # Functions:
24
 
25
  def create_resummary_tab():
26
+ try:
27
+ default_value = None
28
+ if default_api_endpoint:
29
+ if default_api_endpoint in global_api_endpoints:
30
+ default_value = format_api_name(default_api_endpoint)
31
+ else:
32
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
33
+ except Exception as e:
34
+ logging.error(f"Error setting default API endpoint: {str(e)}")
35
+ default_value = None
36
+
37
+ # Get initial prompts for first page
38
+ initial_prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
39
+
40
  with gr.TabItem("Re-Summarize", visible=True):
41
  gr.Markdown("# Re-Summarize Existing Content")
42
  with gr.Row():
 
50
 
51
  with gr.Row():
52
  api_name_input = gr.Dropdown(
53
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
54
+ value=default_value,
55
+ label="API for Summarization/Analysis (Optional)"
56
+ )
57
  api_key_input = gr.Textbox(label="API Key", placeholder="Enter your API key here", type="password")
58
 
59
  chunking_options_checkbox = gr.Checkbox(label="Use Chunking", value=False)
 
70
  preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
71
  value=False,
72
  visible=True)
73
+
74
+ # Add pagination controls for preset prompts
75
+ with gr.Row(visible=False) as preset_prompt_controls:
76
+ prev_page = gr.Button("Previous")
77
+ current_page_text = gr.Text(f"Page {current_page} of {total_pages}")
78
+ next_page = gr.Button("Next")
79
+ current_page_state = gr.State(value=1)
80
+
81
  with gr.Row():
82
  preset_prompt = gr.Dropdown(label="Select Preset Prompt",
83
+ choices=initial_prompts,
84
  visible=False)
85
  with gr.Row():
86
  custom_prompt_input = gr.Textbox(label="Custom Prompt",
 
109
  lines=3,
110
  visible=False)
111
 
112
+ def update_prompt_page(direction, current_page_val):
113
+ new_page = max(1, min(total_pages, current_page_val + direction))
114
+ prompts, _, _ = list_prompts(page=new_page, per_page=10)
115
+ return (
116
+ gr.update(choices=prompts),
117
+ gr.update(value=f"Page {new_page} of {total_pages}"),
118
+ new_page
119
+ )
120
+
121
  def update_prompts(preset_name):
122
  prompts = update_user_prompt(preset_name)
123
  return (
 
125
  gr.update(value=prompts["system_prompt"], visible=True)
126
  )
127
 
128
+ # Connect pagination buttons
129
+ prev_page.click(
130
+ lambda x: update_prompt_page(-1, x),
131
+ inputs=[current_page_state],
132
+ outputs=[preset_prompt, current_page_text, current_page_state]
133
+ )
134
+
135
+ next_page.click(
136
+ lambda x: update_prompt_page(1, x),
137
+ inputs=[current_page_state],
138
+ outputs=[preset_prompt, current_page_text, current_page_state]
139
+ )
140
+
141
  preset_prompt.change(
142
  update_prompts,
143
  inputs=preset_prompt,
 
154
  outputs=[custom_prompt_input, system_prompt_input]
155
  )
156
  preset_prompt_checkbox.change(
157
+ fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
158
  inputs=[preset_prompt_checkbox],
159
+ outputs=[preset_prompt, preset_prompt_controls]
160
  )
161
 
162
  # Connect the UI elements
 
185
  outputs=result_output
186
  )
187
 
188
+ return (
189
+ search_query_input, search_type_input, search_button, items_output,
190
+ item_mapping, api_name_input, api_key_input, chunking_options_checkbox,
191
+ chunking_options_box, chunk_method, max_chunk_size, chunk_overlap,
192
+ custom_prompt_checkbox, custom_prompt_input, resummarize_button, result_output
193
+ )
194
 
195
 
196
  def update_resummarize_dropdown(search_query, search_type):
App_Function_Libraries/Gradio_UI/Search_Tab.py CHANGED
@@ -11,8 +11,8 @@ import gradio as gr
11
  #
12
  # Local Imports
13
  from App_Function_Libraries.DB.DB_Manager import view_database, search_and_display_items, get_all_document_versions, \
14
- fetch_item_details_single, fetch_paginated_data, fetch_item_details, get_latest_transcription
15
- from App_Function_Libraries.DB.SQLite_DB import search_prompts, get_document_version
16
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_detailed_view
17
  from App_Function_Libraries.Utils.Utils import get_database_path, format_text_with_line_breaks
18
  #
@@ -80,8 +80,8 @@ def format_as_html(content, title):
80
  """
81
 
82
  def create_search_tab():
83
- with gr.TabItem("Search / Detailed View", visible=True):
84
- gr.Markdown("# Search across all ingested items in the Database")
85
  with gr.Row():
86
  with gr.Column(scale=1):
87
  gr.Markdown("by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
@@ -150,8 +150,8 @@ def display_search_results(query):
150
 
151
 
152
  def create_search_summaries_tab():
153
- with gr.TabItem("Search/View Title+Summary", visible=True):
154
- gr.Markdown("# Search across all ingested items in the Database and review their summaries")
155
  gr.Markdown("Search by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
156
  with gr.Row():
157
  with gr.Column():
 
11
  #
12
  # Local Imports
13
  from App_Function_Libraries.DB.DB_Manager import view_database, search_and_display_items, get_all_document_versions, \
14
+ fetch_item_details_single, fetch_paginated_data, fetch_item_details, get_latest_transcription, search_prompts, \
15
+ get_document_version
16
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_detailed_view
17
  from App_Function_Libraries.Utils.Utils import get_database_path, format_text_with_line_breaks
18
  #
 
80
  """
81
 
82
  def create_search_tab():
83
+ with gr.TabItem("Media DB Search / Detailed View", visible=True):
84
+ gr.Markdown("# Search across all ingested items in the Media Database")
85
  with gr.Row():
86
  with gr.Column(scale=1):
87
  gr.Markdown("by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
 
150
 
151
 
152
  def create_search_summaries_tab():
153
+ with gr.TabItem("Media DB Search/View Title+Summary", visible=True):
154
+ gr.Markdown("# Search across all ingested items in the Media Database and review their summaries")
155
  gr.Markdown("Search by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
156
  with gr.Row():
157
  with gr.Column():
App_Function_Libraries/Gradio_UI/Semantic_Scholar_tab.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sematnic_Scholar_tab.py
2
+ # Description: contains the code to create the Semantic Scholar tab in the Gradio UI.
3
+ #
4
+ # Imports
5
+ #
6
+ # External Libraries
7
+ import gradio as gr
8
+ #
9
+ # Internal Libraries
10
+ from App_Function_Libraries.Third_Party.Semantic_Scholar import search_and_display, FIELDS_OF_STUDY, PUBLICATION_TYPES
11
+
12
+
13
+ #
14
+ ######################################################################################################################
15
+ # Functions
16
+ def create_semantic_scholar_tab():
17
+ """Create the Semantic Scholar tab for the Gradio UI"""
18
+ with gr.Tab("Semantic Scholar Search"):
19
+ with gr.Row():
20
+ with gr.Column(scale=2):
21
+ gr.Markdown("""
22
+ ## Semantic Scholar Paper Search
23
+
24
+ This interface allows you to search for academic papers using the Semantic Scholar API with advanced filtering options:
25
+
26
+ ### Search Options
27
+ - **Keywords**: Search across titles, abstracts, and other paper content
28
+ - **Year Range**: Filter papers by publication year (e.g., "2020-2023" or "2020")
29
+ - **Venue**: Filter by publication venue (journal or conference)
30
+ - **Minimum Citations**: Filter papers by minimum citation count
31
+ - **Fields of Study**: Filter papers by academic field
32
+ - **Publication Types**: Filter by type of publication
33
+ - **Open Access**: Option to show only papers with free PDF access
34
+
35
+ ### Results Include
36
+ - Paper title
37
+ - Author list
38
+ - Publication year and venue
39
+ - Citation count
40
+ - Publication types
41
+ - Abstract
42
+ - Links to PDF (when available) and Semantic Scholar page
43
+ """)
44
+ with gr.Column(scale=2):
45
+ gr.Markdown("""
46
+ ### Pagination
47
+ - 10 results per page
48
+ - Navigate through results using Previous/Next buttons
49
+ - Current page number and total results displayed
50
+
51
+ ### Usage Tips
52
+ - Combine multiple filters for more specific results
53
+ - Use specific terms for more focused results
54
+ - Try different combinations of filters if you don't find what you're looking for
55
+ """)
56
+ with gr.Row():
57
+ with gr.Column(scale=2):
58
+ search_input = gr.Textbox(
59
+ label="Search Query",
60
+ placeholder="Enter keywords to search for papers...",
61
+ lines=1
62
+ )
63
+
64
+ # Advanced search options
65
+ with gr.Row():
66
+ year_range = gr.Textbox(
67
+ label="Year Range",
68
+ placeholder="e.g., 2020-2023 or 2020",
69
+ lines=1
70
+ )
71
+ venue = gr.Textbox(
72
+ label="Venue",
73
+ placeholder="e.g., Nature, Science",
74
+ lines=1
75
+ )
76
+ min_citations = gr.Number(
77
+ label="Minimum Citations",
78
+ value=0,
79
+ minimum=0,
80
+ step=1
81
+ )
82
+
83
+ with gr.Row():
84
+ fields_of_study = gr.Dropdown(
85
+ choices=FIELDS_OF_STUDY,
86
+ label="Fields of Study",
87
+ multiselect=True,
88
+ value=[]
89
+ )
90
+ publication_types = gr.Dropdown(
91
+ choices=PUBLICATION_TYPES,
92
+ label="Publication Types",
93
+ multiselect=True,
94
+ value=[]
95
+ )
96
+
97
+ open_access_only = gr.Checkbox(
98
+ label="Open Access Only",
99
+ value=False
100
+ )
101
+
102
+ with gr.Column(scale=1):
103
+ search_button = gr.Button("Search", variant="primary")
104
+
105
+ # Pagination controls
106
+ with gr.Row():
107
+ prev_button = gr.Button("← Previous")
108
+ current_page = gr.Number(value=0, label="Page", minimum=0, step=1)
109
+ max_page = gr.Number(value=0, label="Max Page", visible=False)
110
+ next_button = gr.Button("Next →")
111
+
112
+ total_results = gr.Textbox(
113
+ label="Total Results",
114
+ value="0",
115
+ interactive=False
116
+ )
117
+
118
+ output_text = gr.Markdown(
119
+ label="Results",
120
+ value="Use the search options above to find papers."
121
+ )
122
+
123
+ def update_page(direction, current, maximum):
124
+ new_page = current + direction
125
+ if new_page < 0:
126
+ return 0
127
+ if new_page > maximum:
128
+ return maximum
129
+ return new_page
130
+
131
+ # Handle search and pagination
132
+ def search_from_button(query, fields_of_study, publication_types, year_range, venue, min_citations,
133
+ open_access_only):
134
+ """Wrapper to always search from page 0 when search button is clicked"""
135
+ return search_and_display(
136
+ query=query,
137
+ page=0, # Force page 0 for new searches
138
+ fields_of_study=fields_of_study,
139
+ publication_types=publication_types,
140
+ year_range=year_range,
141
+ venue=venue,
142
+ min_citations=min_citations,
143
+ open_access_only=open_access_only
144
+ )
145
+ normal_search = search_and_display
146
+
147
+ search_button.click(
148
+ fn=search_from_button,
149
+ inputs=[
150
+ search_input, fields_of_study, publication_types,
151
+ year_range, venue, min_citations, open_access_only
152
+ ],
153
+ outputs=[output_text, current_page, max_page, total_results]
154
+ )
155
+
156
+ prev_button.click(
157
+ fn=lambda curr, max_p: update_page(-1, curr, max_p),
158
+ inputs=[current_page, max_page],
159
+ outputs=current_page
160
+ ).then(
161
+ fn=normal_search,
162
+ inputs=[
163
+ search_input, current_page, fields_of_study, publication_types,
164
+ year_range, venue, min_citations, open_access_only
165
+ ],
166
+ outputs=[output_text, current_page, max_page, total_results]
167
+ )
168
+
169
+ next_button.click(
170
+ fn=lambda curr, max_p: update_page(1, curr, max_p),
171
+ inputs=[current_page, max_page],
172
+ outputs=current_page
173
+ ).then(
174
+ fn=normal_search,
175
+ inputs=[
176
+ search_input, current_page, fields_of_study, publication_types,
177
+ year_range, venue, min_citations, open_access_only
178
+ ],
179
+ outputs=[output_text, current_page, max_page, total_results]
180
+ )
181
+
182
+ #
183
+ # End of Semantic_Scholar_tab.py
184
+ ######################################################################################################################
App_Function_Libraries/Gradio_UI/Video_transcription_tab.py CHANGED
@@ -6,22 +6,23 @@ import json
6
  import logging
7
  import os
8
  from datetime import datetime
9
- from typing import Dict, Any
10
-
11
  #
12
  # External Imports
13
  import gradio as gr
14
  import yt_dlp
 
 
15
  #
16
  # Local Imports
17
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts, add_media_to_database, \
18
- check_media_and_whisper_model, check_existing_media, update_media_content_with_version
19
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
20
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import error_handler
21
  from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_transcription, perform_summarization, \
22
  save_transcription_and_summary
23
  from App_Function_Libraries.Utils.Utils import convert_to_seconds, safe_read_file, format_transcription, \
24
- create_download_directory, generate_unique_identifier, extract_text_from_segments
 
25
  from App_Function_Libraries.Video_DL_Ingestion_Lib import parse_and_expand_urls, extract_metadata, download_video
26
  from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
27
  # Import metrics logging
@@ -32,6 +33,16 @@ from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histo
32
  # Functions:
33
 
34
  def create_video_transcription_tab():
 
 
 
 
 
 
 
 
 
 
35
  with gr.TabItem("Video Transcription + Summarization", visible=True):
36
  gr.Markdown("# Transcribe & Summarize Videos from URLs")
37
  with gr.Row():
@@ -56,15 +67,20 @@ def create_video_transcription_tab():
56
  preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
57
  value=False,
58
  visible=True)
 
 
 
 
 
59
  with gr.Row():
 
60
  preset_prompt = gr.Dropdown(label="Select Preset Prompt",
61
- choices=load_preset_prompts(),
62
  visible=False)
63
  with gr.Row():
64
- custom_prompt_input = gr.Textbox(label="Custom Prompt",
65
- placeholder="Enter custom prompt here",
66
- lines=3,
67
- visible=False)
68
  with gr.Row():
69
  system_prompt_input = gr.Textbox(label="System Prompt",
70
  value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
@@ -87,22 +103,75 @@ def create_video_transcription_tab():
87
  lines=3,
88
  visible=False,
89
  interactive=True)
 
 
 
 
 
 
90
  custom_prompt_checkbox.change(
91
- fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
92
  inputs=[custom_prompt_checkbox],
93
  outputs=[custom_prompt_input, system_prompt_input]
94
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  preset_prompt_checkbox.change(
96
- fn=lambda x: gr.update(visible=x),
97
  inputs=[preset_prompt_checkbox],
98
- outputs=[preset_prompt]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  )
100
 
101
  def update_prompts(preset_name):
102
  prompts = update_user_prompt(preset_name)
103
  return (
104
- gr.update(value=prompts["user_prompt"], visible=True),
105
- gr.update(value=prompts["system_prompt"], visible=True)
106
  )
107
 
108
  preset_prompt.change(
@@ -111,11 +180,12 @@ def create_video_transcription_tab():
111
  outputs=[custom_prompt_input, system_prompt_input]
112
  )
113
 
 
114
  api_name_input = gr.Dropdown(
115
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
116
- "OpenRouter",
117
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace", "Custom-OpenAI-API"],
118
- value=None, label="API Name (Mandatory)")
119
  api_key_input = gr.Textbox(label="API Key (Optional - Set in Config.txt)", placeholder="Enter your API key here",
120
  type="password")
121
  keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords here (comma-separated)",
@@ -198,8 +268,7 @@ def create_video_transcription_tab():
198
  progress: gr.Progress = gr.Progress()) -> tuple:
199
  try:
200
  # Start overall processing timer
201
- proc_start_time = datetime.utcnow()
202
- # FIXME - summarize_recursively is not being used...
203
  logging.info("Entering process_videos_with_error_handling")
204
  logging.info(f"Received inputs: {inputs}")
205
 
@@ -251,8 +320,7 @@ def create_video_transcription_tab():
251
  all_summaries = ""
252
 
253
  # Start timing
254
- # FIXME - utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).
255
- start_proc = datetime.utcnow()
256
 
257
  for i in range(0, len(all_inputs), batch_size):
258
  batch = all_inputs[i:i + batch_size]
@@ -260,7 +328,7 @@ def create_video_transcription_tab():
260
 
261
  for input_item in batch:
262
  # Start individual video processing timer
263
- video_start_time = datetime.utcnow()
264
  try:
265
  start_seconds = convert_to_seconds(start_time)
266
  end_seconds = convert_to_seconds(end_time) if end_time else None
@@ -313,7 +381,7 @@ def create_video_transcription_tab():
313
  input_item, 2, whisper_model,
314
  custom_prompt,
315
  start_seconds, api_name, api_key,
316
- vad_use, False, False, False, 0.01, None, keywords, None, diarize,
317
  end_time=end_seconds,
318
  include_timestamps=timestamp_option,
319
  metadata=video_metadata,
@@ -365,7 +433,7 @@ def create_video_transcription_tab():
365
  )
366
 
367
  # Calculate processing time
368
- video_end_time = datetime.utcnow()
369
  processing_time = (video_end_time - video_start_time).total_seconds()
370
  log_histogram(
371
  metric_name="video_processing_time_seconds",
@@ -473,7 +541,7 @@ def create_video_transcription_tab():
473
  total_inputs = len(all_inputs)
474
 
475
  # End overall processing timer
476
- proc_end_time = datetime.utcnow()
477
  total_processing_time = (proc_end_time - proc_start_time).total_seconds()
478
  log_histogram(
479
  metric_name="total_processing_time_seconds",
@@ -702,8 +770,9 @@ def create_video_transcription_tab():
702
 
703
  # Perform transcription
704
  logging.info("process_url_with_metadata: Starting transcription...")
 
705
  audio_file_path, segments = perform_transcription(video_file_path, offset, whisper_model,
706
- vad_filter, diarize)
707
 
708
  if audio_file_path is None or segments is None:
709
  logging.error("process_url_with_metadata: Transcription failed or segments not available.")
@@ -771,7 +840,54 @@ def create_video_transcription_tab():
771
  # API key resolution handled at base of function if none provided
772
  api_key = api_key if api_key else None
773
  logging.info(f"process_url_with_metadata: Starting summarization with {api_name}...")
774
- summary_text = perform_summarization(api_name, full_text_with_metadata, custom_prompt, api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
775
  if summary_text is None:
776
  logging.error("Summarization failed.")
777
  return None, None, None, None, None, None
@@ -859,3 +975,7 @@ def create_video_transcription_tab():
859
  ],
860
  outputs=[progress_output, error_output, results_output, download_transcription, download_summary, confabulation_output]
861
  )
 
 
 
 
 
6
  import logging
7
  import os
8
  from datetime import datetime
 
 
9
  #
10
  # External Imports
11
  import gradio as gr
12
  import yt_dlp
13
+
14
+ from App_Function_Libraries.Chunk_Lib import improved_chunking_process
15
  #
16
  # Local Imports
17
+ from App_Function_Libraries.DB.DB_Manager import add_media_to_database, \
18
+ check_media_and_whisper_model, check_existing_media, update_media_content_with_version, list_prompts
19
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
20
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import error_handler
21
  from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_transcription, perform_summarization, \
22
  save_transcription_and_summary
23
  from App_Function_Libraries.Utils.Utils import convert_to_seconds, safe_read_file, format_transcription, \
24
+ create_download_directory, generate_unique_identifier, extract_text_from_segments, default_api_endpoint, \
25
+ global_api_endpoints, format_api_name
26
  from App_Function_Libraries.Video_DL_Ingestion_Lib import parse_and_expand_urls, extract_metadata, download_video
27
  from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
28
  # Import metrics logging
 
33
  # Functions:
34
 
35
  def create_video_transcription_tab():
36
+ try:
37
+ default_value = None
38
+ if default_api_endpoint:
39
+ if default_api_endpoint in global_api_endpoints:
40
+ default_value = format_api_name(default_api_endpoint)
41
+ else:
42
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
43
+ except Exception as e:
44
+ logging.error(f"Error setting default API endpoint: {str(e)}")
45
+ default_value = None
46
  with gr.TabItem("Video Transcription + Summarization", visible=True):
47
  gr.Markdown("# Transcribe & Summarize Videos from URLs")
48
  with gr.Row():
 
67
  preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
68
  value=False,
69
  visible=True)
70
+
71
+ # Initialize state variables for pagination
72
+ current_page_state = gr.State(value=1)
73
+ total_pages_state = gr.State(value=1)
74
+
75
  with gr.Row():
76
+ # Add pagination controls
77
  preset_prompt = gr.Dropdown(label="Select Preset Prompt",
78
+ choices=[],
79
  visible=False)
80
  with gr.Row():
81
+ prev_page_button = gr.Button("Previous Page", visible=False)
82
+ page_display = gr.Markdown("Page 1 of X", visible=False)
83
+ next_page_button = gr.Button("Next Page", visible=False)
 
84
  with gr.Row():
85
  system_prompt_input = gr.Textbox(label="System Prompt",
86
  value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
 
103
  lines=3,
104
  visible=False,
105
  interactive=True)
106
+ with gr.Row():
107
+ custom_prompt_input = gr.Textbox(label="Custom Prompt",
108
+ placeholder="Enter custom prompt here",
109
+ lines=3,
110
+ visible=False)
111
+
112
  custom_prompt_checkbox.change(
113
+ fn=lambda x: (gr.update(visible=x, interactive=x), gr.update(visible=x, interactive=x)),
114
  inputs=[custom_prompt_checkbox],
115
  outputs=[custom_prompt_input, system_prompt_input]
116
  )
117
+
118
+ def on_preset_prompt_checkbox_change(is_checked):
119
+ if is_checked:
120
+ prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
121
+ page_display_text = f"Page {current_page} of {total_pages}"
122
+ return (
123
+ gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
124
+ gr.update(visible=True), # prev_page_button
125
+ gr.update(visible=True), # next_page_button
126
+ gr.update(value=page_display_text, visible=True), # page_display
127
+ current_page, # current_page_state
128
+ total_pages # total_pages_state
129
+ )
130
+ else:
131
+ return (
132
+ gr.update(visible=False, interactive=False), # preset_prompt
133
+ gr.update(visible=False), # prev_page_button
134
+ gr.update(visible=False), # next_page_button
135
+ gr.update(visible=False), # page_display
136
+ 1, # current_page_state
137
+ 1 # total_pages_state
138
+ )
139
+
140
  preset_prompt_checkbox.change(
141
+ fn=on_preset_prompt_checkbox_change,
142
  inputs=[preset_prompt_checkbox],
143
+ outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
144
+ )
145
+
146
+ def on_prev_page_click(current_page, total_pages):
147
+ new_page = max(current_page - 1, 1)
148
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
149
+ page_display_text = f"Page {current_page} of {total_pages}"
150
+ return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
151
+
152
+ prev_page_button.click(
153
+ fn=on_prev_page_click,
154
+ inputs=[current_page_state, total_pages_state],
155
+ outputs=[preset_prompt, page_display, current_page_state]
156
+ )
157
+
158
+ def on_next_page_click(current_page, total_pages):
159
+ new_page = min(current_page + 1, total_pages)
160
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
161
+ page_display_text = f"Page {current_page} of {total_pages}"
162
+ return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
163
+
164
+ next_page_button.click(
165
+ fn=on_next_page_click,
166
+ inputs=[current_page_state, total_pages_state],
167
+ outputs=[preset_prompt, page_display, current_page_state]
168
  )
169
 
170
  def update_prompts(preset_name):
171
  prompts = update_user_prompt(preset_name)
172
  return (
173
+ gr.update(value=prompts["user_prompt"], visible=True, interactive=True),
174
+ gr.update(value=prompts["system_prompt"], visible=True, interactive=True)
175
  )
176
 
177
  preset_prompt.change(
 
180
  outputs=[custom_prompt_input, system_prompt_input]
181
  )
182
 
183
+ # Refactored API selection dropdown
184
  api_name_input = gr.Dropdown(
185
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
186
+ value=default_value,
187
+ label="API for Summarization/Analysis (Optional)"
188
+ )
189
  api_key_input = gr.Textbox(label="API Key (Optional - Set in Config.txt)", placeholder="Enter your API key here",
190
  type="password")
191
  keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords here (comma-separated)",
 
268
  progress: gr.Progress = gr.Progress()) -> tuple:
269
  try:
270
  # Start overall processing timer
271
+ proc_start_time = datetime.now()
 
272
  logging.info("Entering process_videos_with_error_handling")
273
  logging.info(f"Received inputs: {inputs}")
274
 
 
320
  all_summaries = ""
321
 
322
  # Start timing
323
+ start_proc = datetime.now()
 
324
 
325
  for i in range(0, len(all_inputs), batch_size):
326
  batch = all_inputs[i:i + batch_size]
 
328
 
329
  for input_item in batch:
330
  # Start individual video processing timer
331
+ video_start_time = datetime.now()
332
  try:
333
  start_seconds = convert_to_seconds(start_time)
334
  end_seconds = convert_to_seconds(end_time) if end_time else None
 
381
  input_item, 2, whisper_model,
382
  custom_prompt,
383
  start_seconds, api_name, api_key,
384
+ vad_use, False, False, summarize_recursively, 0.01, None, keywords, None, diarize,
385
  end_time=end_seconds,
386
  include_timestamps=timestamp_option,
387
  metadata=video_metadata,
 
433
  )
434
 
435
  # Calculate processing time
436
+ video_end_time = datetime.now()
437
  processing_time = (video_end_time - video_start_time).total_seconds()
438
  log_histogram(
439
  metric_name="video_processing_time_seconds",
 
541
  total_inputs = len(all_inputs)
542
 
543
  # End overall processing timer
544
+ proc_end_time = datetime.now()
545
  total_processing_time = (proc_end_time - proc_start_time).total_seconds()
546
  log_histogram(
547
  metric_name="total_processing_time_seconds",
 
770
 
771
  # Perform transcription
772
  logging.info("process_url_with_metadata: Starting transcription...")
773
+ logging.info(f"process_url_with_metadata: overwrite existing?: {overwrite_existing}")
774
  audio_file_path, segments = perform_transcription(video_file_path, offset, whisper_model,
775
+ vad_filter, diarize, overwrite_existing)
776
 
777
  if audio_file_path is None or segments is None:
778
  logging.error("process_url_with_metadata: Transcription failed or segments not available.")
 
840
  # API key resolution handled at base of function if none provided
841
  api_key = api_key if api_key else None
842
  logging.info(f"process_url_with_metadata: Starting summarization with {api_name}...")
843
+
844
+ # Perform Chunking if enabled
845
+ # FIXME - Setup a proper prompt for Recursive Summarization
846
+ if use_chunking:
847
+ logging.info("process_url_with_metadata: Chunking enabled. Starting chunking...")
848
+ chunked_texts = improved_chunking_process(full_text_with_metadata, chunk_options)
849
+
850
+ if chunked_texts is None:
851
+ logging.warning("Chunking failed, falling back to full text summarization")
852
+ summary_text = perform_summarization(api_name, full_text_with_metadata, custom_prompt,
853
+ api_key)
854
+ else:
855
+ logging.debug(
856
+ f"process_url_with_metadata: Chunking completed. Processing {len(chunked_texts)} chunks...")
857
+ summaries = []
858
+
859
+ if rolling_summarization:
860
+ # Perform recursive summarization on each chunk
861
+ for chunk in chunked_texts:
862
+ chunk_summary = perform_summarization(api_name, chunk['text'], custom_prompt,
863
+ api_key)
864
+ if chunk_summary:
865
+ summaries.append(
866
+ f"Chunk {chunk['metadata']['chunk_index']}/{chunk['metadata']['total_chunks']}: {chunk_summary}")
867
+ summary_text = "\n\n".join(summaries)
868
+ else:
869
+ logging.error("All chunk summarizations failed")
870
+ summary_text = None
871
+
872
+ for chunk in chunked_texts:
873
+ # Perform Non-recursive summarization on each chunk
874
+ chunk_summary = perform_summarization(api_name, chunk['text'], custom_prompt,
875
+ api_key)
876
+ if chunk_summary:
877
+ summaries.append(
878
+ f"Chunk {chunk['metadata']['chunk_index']}/{chunk['metadata']['total_chunks']}: {chunk_summary}")
879
+
880
+ if summaries:
881
+ summary_text = "\n\n".join(summaries)
882
+ logging.info(f"Successfully summarized {len(summaries)} chunks")
883
+ else:
884
+ logging.error("All chunk summarizations failed")
885
+ summary_text = None
886
+ else:
887
+ # Regular summarization without chunking
888
+ summary_text = perform_summarization(api_name, full_text_with_metadata, custom_prompt,
889
+ api_key) if api_name else None
890
+
891
  if summary_text is None:
892
  logging.error("Summarization failed.")
893
  return None, None, None, None, None, None
 
975
  ],
976
  outputs=[progress_output, error_output, results_output, download_transcription, download_summary, confabulation_output]
977
  )
978
+
979
+ #
980
+ # End of Video_transcription_tab.py
981
+ #######################################################################################################################
App_Function_Libraries/Gradio_UI/View_DB_Items_tab.py CHANGED
@@ -3,131 +3,26 @@
3
  #
4
  # Imports
5
  import html
 
 
6
  #
7
  # External Imports
8
  import gradio as gr
9
  #
10
  # Local Imports
11
  from App_Function_Libraries.DB.DB_Manager import view_database, get_all_document_versions, \
12
- fetch_paginated_data, fetch_item_details, get_latest_transcription, list_prompts, fetch_prompt_details, \
13
- load_preset_prompts
14
- from App_Function_Libraries.DB.SQLite_DB import get_document_version
 
 
 
 
15
  #
16
  ####################################################################################################
17
  #
18
  # Functions
19
 
20
- def create_prompt_view_tab():
21
- with gr.TabItem("View Prompt Database", visible=True):
22
- gr.Markdown("# View Prompt Database Entries")
23
- with gr.Row():
24
- with gr.Column():
25
- entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
26
- page_number = gr.Number(value=1, label="Page Number", precision=0)
27
- view_button = gr.Button("View Page")
28
- next_page_button = gr.Button("Next Page")
29
- previous_page_button = gr.Button("Previous Page")
30
- pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
31
- prompt_selector = gr.Dropdown(label="Select Prompt to View", choices=[])
32
- with gr.Column():
33
- results_table = gr.HTML()
34
- selected_prompt_display = gr.HTML()
35
-
36
- def view_database(page, entries_per_page):
37
- try:
38
- prompts, total_pages, current_page = list_prompts(page, entries_per_page)
39
-
40
- table_html = "<table style='width:100%; border-collapse: collapse;'>"
41
- table_html += "<tr><th style='border: 1px solid black; padding: 8px;'>Title</th><th style='border: 1px solid black; padding: 8px;'>Author</th></tr>"
42
- prompt_choices = []
43
- for prompt_name in prompts:
44
- details = fetch_prompt_details(prompt_name)
45
- if details:
46
- title, _, _, _, _, _ = details
47
- author = "Unknown" # Assuming author is not stored in the current schema
48
- table_html += f"<tr><td style='border: 1px solid black; padding: 8px;'>{html.escape(title)}</td><td style='border: 1px solid black; padding: 8px;'>{html.escape(author)}</td></tr>"
49
- prompt_choices.append((title, title)) # Using title as both label and value
50
- table_html += "</table>"
51
-
52
- total_prompts = len(load_preset_prompts()) # This might be inefficient for large datasets
53
- pagination = f"Page {current_page} of {total_pages} (Total prompts: {total_prompts})"
54
-
55
- return table_html, pagination, total_pages, prompt_choices
56
- except Exception as e:
57
- return f"<p>Error fetching prompts: {e}</p>", "Error", 0, []
58
-
59
- def update_page(page, entries_per_page):
60
- results, pagination, total_pages, prompt_choices = view_database(page, entries_per_page)
61
- next_disabled = page >= total_pages
62
- prev_disabled = page <= 1
63
- return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(
64
- interactive=not prev_disabled), gr.update(choices=prompt_choices)
65
-
66
- def go_to_next_page(current_page, entries_per_page):
67
- next_page = current_page + 1
68
- return update_page(next_page, entries_per_page)
69
-
70
- def go_to_previous_page(current_page, entries_per_page):
71
- previous_page = max(1, current_page - 1)
72
- return update_page(previous_page, entries_per_page)
73
-
74
- def display_selected_prompt(prompt_name):
75
- details = fetch_prompt_details(prompt_name)
76
- if details:
77
- title, author, description, system_prompt, user_prompt, keywords = details
78
- # Handle None values by converting them to empty strings
79
- description = description or ""
80
- system_prompt = system_prompt or ""
81
- user_prompt = user_prompt or ""
82
- author = author or "Unknown"
83
- keywords = keywords or ""
84
-
85
- html_content = f"""
86
- <div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 20px;">
87
- <h3>{html.escape(title)}</h3> <h4>by {html.escape(author)}</h4>
88
- <p><strong>Description:</strong> {html.escape(description)}</p>
89
- <div style="margin-top: 10px;">
90
- <strong>System Prompt:</strong>
91
- <pre style="white-space: pre-wrap; word-wrap: break-word;">{html.escape(system_prompt)}</pre>
92
- </div>
93
- <div style="margin-top: 10px;">
94
- <strong>User Prompt:</strong>
95
- <pre style="white-space: pre-wrap; word-wrap: break-word;">{html.escape(user_prompt)}</pre>
96
- </div>
97
- <p><strong>Keywords:</strong> {html.escape(keywords)}</p>
98
- </div>
99
- """
100
- return html_content
101
- else:
102
- return "<p>Prompt not found.</p>"
103
-
104
- view_button.click(
105
- fn=update_page,
106
- inputs=[page_number, entries_per_page],
107
- outputs=[results_table, pagination_info, page_number, next_page_button, previous_page_button,
108
- prompt_selector]
109
- )
110
-
111
- next_page_button.click(
112
- fn=go_to_next_page,
113
- inputs=[page_number, entries_per_page],
114
- outputs=[results_table, pagination_info, page_number, next_page_button, previous_page_button,
115
- prompt_selector]
116
- )
117
-
118
- previous_page_button.click(
119
- fn=go_to_previous_page,
120
- inputs=[page_number, entries_per_page],
121
- outputs=[results_table, pagination_info, page_number, next_page_button, previous_page_button,
122
- prompt_selector]
123
- )
124
-
125
- prompt_selector.change(
126
- fn=display_selected_prompt,
127
- inputs=[prompt_selector],
128
- outputs=[selected_prompt_display]
129
- )
130
-
131
  def format_as_html(content, title):
132
  escaped_content = html.escape(content)
133
  formatted_content = escaped_content.replace('\n', '<br>')
@@ -149,9 +44,9 @@ def extract_prompt_and_summary(content: str):
149
  return prompt, summary
150
 
151
 
152
- def create_view_all_with_versions_tab():
153
- with gr.TabItem("View All Items", visible=True):
154
- gr.Markdown("# View All Database Entries with Version Selection")
155
  with gr.Row():
156
  with gr.Column(scale=1):
157
  entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
@@ -280,9 +175,143 @@ def create_view_all_with_versions_tab():
280
  )
281
 
282
 
283
- def create_viewing_tab():
284
- with gr.TabItem("View Database Entries", visible=True):
285
- gr.Markdown("# View Database Entries")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  with gr.Row():
287
  with gr.Column():
288
  entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
@@ -327,5 +356,461 @@ def create_viewing_tab():
327
  outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
328
  )
329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  #
331
- ####################################################################################################
 
3
  #
4
  # Imports
5
  import html
6
+ import logging
7
+
8
  #
9
  # External Imports
10
  import gradio as gr
11
  #
12
  # Local Imports
13
  from App_Function_Libraries.DB.DB_Manager import view_database, get_all_document_versions, \
14
+ fetch_paginated_data, fetch_item_details, get_latest_transcription, list_prompts, fetch_prompt_details
15
+ from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_keywords_for_note, search_conversations_by_keywords, \
16
+ get_notes_by_keywords, get_keywords_for_conversation, get_db_connection, get_all_conversations, load_chat_history, \
17
+ get_notes
18
+ from App_Function_Libraries.DB.SQLite_DB import get_document_version, fetch_items_by_keyword, fetch_all_keywords
19
+
20
+
21
  #
22
  ####################################################################################################
23
  #
24
  # Functions
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def format_as_html(content, title):
27
  escaped_content = html.escape(content)
28
  formatted_content = escaped_content.replace('\n', '<br>')
 
44
  return prompt, summary
45
 
46
 
47
+ def create_view_all_mediadb_with_versions_tab():
48
+ with gr.TabItem("View All MediaDB Items", visible=True):
49
+ gr.Markdown("# View All Media Database Entries with Version Selection")
50
  with gr.Row():
51
  with gr.Column(scale=1):
52
  entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
 
175
  )
176
 
177
 
178
+ def create_mediadb_keyword_search_tab():
179
+ with gr.TabItem("Search MediaDB by Keyword", visible=True):
180
+ gr.Markdown("# List Media Database Items by Keyword")
181
+
182
+ with gr.Row():
183
+ with gr.Column(scale=1):
184
+ # Keyword selection dropdown - initialize with empty list, will be populated on load
185
+ keyword_dropdown = gr.Dropdown(
186
+ label="Select Keyword",
187
+ choices=fetch_all_keywords(), # Initialize with keywords on creation
188
+ value=None
189
+ )
190
+ entries_per_page = gr.Dropdown(
191
+ choices=[10, 20, 50, 100],
192
+ label="Entries per Page",
193
+ value=10
194
+ )
195
+ page_number = gr.Number(
196
+ value=1,
197
+ label="Page Number",
198
+ precision=0
199
+ )
200
+
201
+ # Navigation buttons
202
+ refresh_keywords_button = gr.Button("Refresh Keywords")
203
+ view_button = gr.Button("View Results")
204
+ next_page_button = gr.Button("Next Page")
205
+ previous_page_button = gr.Button("Previous Page")
206
+
207
+ # Pagination information
208
+ pagination_info = gr.Textbox(
209
+ label="Pagination Info",
210
+ interactive=False
211
+ )
212
+
213
+ with gr.Column(scale=2):
214
+ # Results area
215
+ results_table = gr.HTML(
216
+ label="Search Results"
217
+ )
218
+ item_details = gr.HTML(
219
+ label="Item Details",
220
+ visible=True
221
+ )
222
+
223
+ def update_keyword_choices():
224
+ try:
225
+ keywords = fetch_all_keywords()
226
+ return gr.update(choices=keywords)
227
+ except Exception as e:
228
+ return gr.update(choices=[], value=None)
229
+
230
+ def search_items(keyword, page, entries_per_page):
231
+ try:
232
+ # Calculate offset for pagination
233
+ offset = (page - 1) * entries_per_page
234
+
235
+ # Fetch items for the selected keyword
236
+ items = fetch_items_by_keyword(keyword)
237
+ total_items = len(items)
238
+ total_pages = (total_items + entries_per_page - 1) // entries_per_page
239
+
240
+ # Paginate results
241
+ paginated_items = items[offset:offset + entries_per_page]
242
+
243
+ # Generate HTML table for results
244
+ table_html = "<table style='width:100%; border-collapse: collapse;'>"
245
+ table_html += "<tr><th style='border: 1px solid black; padding: 8px;'>Title</th>"
246
+ table_html += "<th style='border: 1px solid black; padding: 8px;'>URL</th></tr>"
247
+
248
+ for item_id, title, url in paginated_items:
249
+ table_html += f"""
250
+ <tr>
251
+ <td style='border: 1px solid black; padding: 8px;'>{html.escape(title)}</td>
252
+ <td style='border: 1px solid black; padding: 8px;'>{html.escape(url)}</td>
253
+ </tr>
254
+ """
255
+ table_html += "</table>"
256
+
257
+ # Update pagination info
258
+ pagination = f"Page {page} of {total_pages} (Total items: {total_items})"
259
+
260
+ # Determine button states
261
+ next_disabled = page >= total_pages
262
+ prev_disabled = page <= 1
263
+
264
+ return (
265
+ table_html,
266
+ pagination,
267
+ gr.update(interactive=not next_disabled),
268
+ gr.update(interactive=not prev_disabled)
269
+ )
270
+ except Exception as e:
271
+ return (
272
+ f"<p>Error: {str(e)}</p>",
273
+ "Error in pagination",
274
+ gr.update(interactive=False),
275
+ gr.update(interactive=False)
276
+ )
277
+
278
+ def go_to_next_page(keyword, current_page, entries_per_page):
279
+ next_page = current_page + 1
280
+ return search_items(keyword, next_page, entries_per_page) + (next_page,)
281
+
282
+ def go_to_previous_page(keyword, current_page, entries_per_page):
283
+ previous_page = max(1, current_page - 1)
284
+ return search_items(keyword, previous_page, entries_per_page) + (previous_page,)
285
+
286
+ # Event handlers
287
+ refresh_keywords_button.click(
288
+ fn=update_keyword_choices,
289
+ inputs=[],
290
+ outputs=[keyword_dropdown]
291
+ )
292
+
293
+ view_button.click(
294
+ fn=search_items,
295
+ inputs=[keyword_dropdown, page_number, entries_per_page],
296
+ outputs=[results_table, pagination_info, next_page_button, previous_page_button]
297
+ )
298
+
299
+ next_page_button.click(
300
+ fn=go_to_next_page,
301
+ inputs=[keyword_dropdown, page_number, entries_per_page],
302
+ outputs=[results_table, pagination_info, next_page_button, previous_page_button, page_number]
303
+ )
304
+
305
+ previous_page_button.click(
306
+ fn=go_to_previous_page,
307
+ inputs=[keyword_dropdown, page_number, entries_per_page],
308
+ outputs=[results_table, pagination_info, next_page_button, previous_page_button, page_number]
309
+ )
310
+
311
+
312
+ def create_viewing_mediadb_tab():
313
+ with gr.TabItem("View Media Database Entries", visible=True):
314
+ gr.Markdown("# View Media Database Entries")
315
  with gr.Row():
316
  with gr.Column():
317
  entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
 
356
  outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
357
  )
358
 
359
+ #####################################################################
360
+ #
361
+ # RAG DB Viewing Functions:
362
+
363
+ def create_viewing_ragdb_tab():
364
+ with gr.TabItem("View RAG Database Entries", visible=True):
365
+ gr.Markdown("# View RAG Database Entries")
366
+ with gr.Row():
367
+ with gr.Column():
368
+ entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
369
+ page_number = gr.Number(value=1, label="Page Number", precision=0)
370
+ view_button = gr.Button("View Page")
371
+ next_page_button = gr.Button("Next Page")
372
+ previous_page_button = gr.Button("Previous Page")
373
+ pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
374
+ with gr.Column():
375
+ results_display = gr.HTML()
376
+
377
+ def format_conversations_table(conversations):
378
+ table_html = "<table style='width:100%; border-collapse: collapse;'>"
379
+ table_html += """
380
+ <tr>
381
+ <th style='border: 1px solid black; padding: 8px;'>Title</th>
382
+ <th style='border: 1px solid black; padding: 8px;'>Keywords</th>
383
+ <th style='border: 1px solid black; padding: 8px;'>Notes</th>
384
+ <th style='border: 1px solid black; padding: 8px;'>Rating</th>
385
+ </tr>
386
+ """
387
+
388
+ for conversation in conversations:
389
+ conv_id = conversation['conversation_id']
390
+ title = conversation['title']
391
+ rating = conversation.get('rating', '') # Use get() to handle cases where rating might not exist
392
+
393
+ keywords = get_keywords_for_conversation(conv_id)
394
+ notes = get_notes(conv_id)
395
+
396
+ table_html += f"""
397
+ <tr>
398
+ <td style='border: 1px solid black; padding: 8px;'>{html.escape(str(title))}</td>
399
+ <td style='border: 1px solid black; padding: 8px;'>{html.escape(', '.join(keywords))}</td>
400
+ <td style='border: 1px solid black; padding: 8px;'>{len(notes)} note(s)</td>
401
+ <td style='border: 1px solid black; padding: 8px;'>{html.escape(str(rating))}</td>
402
+ </tr>
403
+ """
404
+ table_html += "</table>"
405
+ return table_html
406
+
407
+ def update_page(page, entries_per_page):
408
+ try:
409
+ conversations, total_pages, total_count = get_all_conversations(page, entries_per_page)
410
+ results_html = format_conversations_table(conversations)
411
+ pagination = f"Page {page} of {total_pages} (Total conversations: {total_count})"
412
+
413
+ next_disabled = page >= total_pages
414
+ prev_disabled = page <= 1
415
+
416
+ return (
417
+ results_html,
418
+ pagination,
419
+ page,
420
+ gr.update(interactive=not next_disabled),
421
+ gr.update(interactive=not prev_disabled)
422
+ )
423
+ except Exception as e:
424
+ return (
425
+ f"<p>Error: {str(e)}</p>",
426
+ "Error in pagination",
427
+ page,
428
+ gr.update(interactive=False),
429
+ gr.update(interactive=False)
430
+ )
431
+
432
+ def go_to_next_page(current_page, entries_per_page):
433
+ return update_page(current_page + 1, entries_per_page)
434
+
435
+ def go_to_previous_page(current_page, entries_per_page):
436
+ return update_page(max(1, current_page - 1), entries_per_page)
437
+
438
+ view_button.click(
439
+ fn=update_page,
440
+ inputs=[page_number, entries_per_page],
441
+ outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
442
+ )
443
+
444
+ next_page_button.click(
445
+ fn=go_to_next_page,
446
+ inputs=[page_number, entries_per_page],
447
+ outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
448
+ )
449
+
450
+ previous_page_button.click(
451
+ fn=go_to_previous_page,
452
+ inputs=[page_number, entries_per_page],
453
+ outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
454
+ )
455
+
456
+
457
+ def create_view_all_rag_notes_tab():
458
+ with gr.TabItem("View All RAG notes/Conversation Items", visible=True):
459
+ gr.Markdown("# View All RAG Notes/Conversation Entries")
460
+ with gr.Row():
461
+ with gr.Column(scale=1):
462
+ entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
463
+ page_number = gr.Number(value=1, label="Page Number", precision=0)
464
+ view_button = gr.Button("View Page")
465
+ next_page_button = gr.Button("Next Page")
466
+ previous_page_button = gr.Button("Previous Page")
467
+ with gr.Column(scale=2):
468
+ items_output = gr.Dropdown(label="Select Conversation to View Details", choices=[])
469
+ conversation_title = gr.Textbox(label="Conversation Title", visible=True)
470
+ with gr.Row():
471
+ with gr.Column(scale=1):
472
+ pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
473
+ with gr.Column(scale=2):
474
+ keywords_output = gr.Textbox(label="Keywords", visible=True)
475
+ chat_history_output = gr.HTML(label="Chat History", visible=True)
476
+ notes_output = gr.HTML(label="Associated Notes", visible=True)
477
+
478
+ item_mapping = gr.State({})
479
+
480
+ def update_page(page, entries_per_page):
481
+ try:
482
+ conversations, total_pages, total_count = get_all_conversations(page, entries_per_page)
483
+ pagination = f"Page {page} of {total_pages} (Total conversations: {total_count})"
484
+
485
+ # Handle the dictionary structure correctly
486
+ choices = [f"{conv['title']} (ID: {conv['conversation_id']})" for conv in conversations]
487
+ new_item_mapping = {
488
+ f"{conv['title']} (ID: {conv['conversation_id']})": conv['conversation_id']
489
+ for conv in conversations
490
+ }
491
+
492
+ next_disabled = page >= total_pages
493
+ prev_disabled = page <= 1
494
+
495
+ return (
496
+ gr.update(choices=choices, value=None),
497
+ pagination,
498
+ page,
499
+ gr.update(interactive=not next_disabled),
500
+ gr.update(interactive=not prev_disabled),
501
+ "", # conversation_title
502
+ "", # keywords_output
503
+ "", # chat_history_output
504
+ "", # notes_output
505
+ new_item_mapping
506
+ )
507
+ except Exception as e:
508
+ logging.error(f"Error in update_page: {str(e)}", exc_info=True)
509
+ return (
510
+ gr.update(choices=[], value=None),
511
+ f"Error: {str(e)}",
512
+ page,
513
+ gr.update(interactive=False),
514
+ gr.update(interactive=False),
515
+ "", "", "", "",
516
+ {}
517
+ )
518
+
519
+ def format_as_html(content, title):
520
+ if content is None:
521
+ content = "No content available."
522
+ escaped_content = html.escape(str(content))
523
+ formatted_content = escaped_content.replace('\n', '<br>')
524
+ return f"""
525
+ <div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 10px;">
526
+ <h3>{title}</h3>
527
+ <div style="max-height: 700px; overflow-y: auto;">
528
+ {formatted_content}
529
+ </div>
530
+ </div>
531
+ """
532
+
533
+ def format_chat_history(messages):
534
+ html_content = "<div style='max-height: 500px; overflow-y: auto;'>"
535
+ for role, content in messages:
536
+ role_class = "assistant" if role.lower() == "assistant" else "user"
537
+ html_content += f"""
538
+ <div class='{role_class}-message' style='margin: 10px 0; padding: 10px; border-radius: 5px;
539
+ background-color: {"#f0f0f0" if role_class == "user" else "#e3f2fd"}'>
540
+ <strong>{html.escape(role)}:</strong><br>
541
+ {html.escape(content)}
542
+ </div>
543
+ """
544
+ html_content += "</div>"
545
+ return html_content
546
+
547
+ def display_conversation_details(selected_item, item_mapping):
548
+ if selected_item and item_mapping and selected_item in item_mapping:
549
+ conv_id = item_mapping[selected_item]
550
+
551
+ # Get keywords
552
+ keywords = get_keywords_for_conversation(conv_id)
553
+ keywords_text = ", ".join(keywords) if keywords else "No keywords"
554
+
555
+ # Get chat history
556
+ chat_messages, _, _ = load_chat_history(conv_id)
557
+ chat_html = format_chat_history(chat_messages)
558
+
559
+ # Get associated notes
560
+ notes = get_notes(conv_id)
561
+ notes_html = ""
562
+ for note in notes:
563
+ notes_html += format_as_html(note, "Note")
564
+ if not notes:
565
+ notes_html = "<p>No notes associated with this conversation.</p>"
566
+
567
+ return (
568
+ selected_item.split(" (ID:")[0], # Conversation title
569
+ keywords_text,
570
+ chat_html,
571
+ notes_html
572
+ )
573
+ return "", "", "", ""
574
+
575
+ view_button.click(
576
+ fn=update_page,
577
+ inputs=[page_number, entries_per_page],
578
+ outputs=[
579
+ items_output,
580
+ pagination_info,
581
+ page_number,
582
+ next_page_button,
583
+ previous_page_button,
584
+ conversation_title,
585
+ keywords_output,
586
+ chat_history_output,
587
+ notes_output,
588
+ item_mapping
589
+ ]
590
+ )
591
+
592
+ next_page_button.click(
593
+ fn=lambda page, entries: update_page(page + 1, entries),
594
+ inputs=[page_number, entries_per_page],
595
+ outputs=[items_output, pagination_info, page_number, next_page_button, previous_page_button,
596
+ conversation_title, keywords_output, chat_history_output, notes_output, item_mapping]
597
+ )
598
+
599
+ previous_page_button.click(
600
+ fn=lambda page, entries: update_page(max(1, page - 1), entries),
601
+ inputs=[page_number, entries_per_page],
602
+ outputs=[items_output, pagination_info, page_number, next_page_button, previous_page_button,
603
+ conversation_title, keywords_output, chat_history_output, notes_output, item_mapping]
604
+ )
605
+
606
+ items_output.change(
607
+ fn=display_conversation_details,
608
+ inputs=[items_output, item_mapping],
609
+ outputs=[conversation_title, keywords_output, chat_history_output, notes_output]
610
+ )
611
+
612
+
613
+ def create_ragdb_keyword_items_tab():
614
+ with gr.TabItem("View RAG Notes/Conversations by Keyword", visible=True):
615
+ gr.Markdown("# View RAG Notes and Conversations by Keyword")
616
+
617
+ with gr.Row():
618
+ with gr.Column(scale=1):
619
+ # Keyword selection
620
+ keyword_dropdown = gr.Dropdown(
621
+ label="Select Keyword",
622
+ choices=[],
623
+ value=None,
624
+ multiselect=True
625
+ )
626
+ entries_per_page = gr.Dropdown(
627
+ choices=[10, 20, 50, 100],
628
+ label="Entries per Page",
629
+ value=10
630
+ )
631
+ page_number = gr.Number(
632
+ value=1,
633
+ label="Page Number",
634
+ precision=0
635
+ )
636
+
637
+ # Navigation buttons
638
+ refresh_keywords_button = gr.Button("Refresh Keywords")
639
+ view_button = gr.Button("View Items")
640
+ next_page_button = gr.Button("Next Page")
641
+ previous_page_button = gr.Button("Previous Page")
642
+ pagination_info = gr.Textbox(
643
+ label="Pagination Info",
644
+ interactive=False
645
+ )
646
+
647
+ with gr.Column(scale=2):
648
+ # Results tabs for conversations and notes
649
+ with gr.Tabs():
650
+ with gr.Tab("Notes"):
651
+ notes_results = gr.HTML()
652
+ with gr.Tab("Conversations"):
653
+ conversation_results = gr.HTML()
654
+
655
+ def update_keyword_choices():
656
+ """Fetch all available keywords for the dropdown."""
657
+ try:
658
+ query = "SELECT keyword FROM rag_qa_keywords ORDER BY keyword"
659
+ with get_db_connection() as conn:
660
+ cursor = conn.cursor()
661
+ cursor.execute(query)
662
+ keywords = [row[0] for row in cursor.fetchall()]
663
+ return gr.update(choices=keywords)
664
+ except Exception as e:
665
+ return gr.update(choices=[], value=None)
666
+
667
+ def format_conversations_html(conversations_data):
668
+ """Format conversations data as HTML."""
669
+ if not conversations_data:
670
+ return "<p>No conversations found for selected keywords.</p>"
671
+
672
+ html_content = "<div class='results-container'>"
673
+ for conv_id, title in conversations_data:
674
+ html_content += f"""
675
+ <div style='border: 1px solid #ddd; padding: 10px; margin-bottom: 10px;'>
676
+ <h3>{html.escape(title)}</h3>
677
+ <p>Conversation ID: {html.escape(conv_id)}</p>
678
+ <p><strong>Keywords:</strong> {', '.join(html.escape(k) for k in get_keywords_for_conversation(conv_id))}</p>
679
+ </div>
680
+ """
681
+ html_content += "</div>"
682
+ return html_content
683
+
684
+ def format_notes_html(notes_data):
685
+ """Format notes data as HTML."""
686
+ if not notes_data:
687
+ return "<p>No notes found for selected keywords.</p>"
688
+
689
+ html_content = "<div class='results-container'>"
690
+ for note_id, title, content, timestamp in notes_data:
691
+ keywords = get_keywords_for_note(note_id)
692
+ html_content += f"""
693
+ <div style='border: 1px solid #ddd; padding: 10px; margin-bottom: 10px;'>
694
+ <h3>{html.escape(title)}</h3>
695
+ <p><strong>Created:</strong> {timestamp}</p>
696
+ <p><strong>Keywords:</strong> {', '.join(html.escape(k) for k in keywords)}</p>
697
+ <div style='background: #f5f5f5; padding: 10px; margin-top: 10px;'>
698
+ {html.escape(content)}
699
+ </div>
700
+ </div>
701
+ """
702
+ html_content += "</div>"
703
+ return html_content
704
+
705
+ def view_items(keywords, page, entries_per_page):
706
+ if not keywords or (isinstance(keywords, list) and len(keywords) == 0):
707
+ return (
708
+ "<p>Please select at least one keyword.</p>",
709
+ "<p>Please select at least one keyword.</p>",
710
+ "No results",
711
+ gr.update(interactive=False),
712
+ gr.update(interactive=False)
713
+ )
714
+
715
+ try:
716
+ # Ensure keywords is a list
717
+ keywords_list = keywords if isinstance(keywords, list) else [keywords]
718
+
719
+ # Get conversations for selected keywords
720
+ conversations, conv_total_pages, conv_count = search_conversations_by_keywords(
721
+ keywords_list, page, entries_per_page
722
+ )
723
+
724
+ # Get notes for selected keywords
725
+ notes, notes_total_pages, notes_count = get_notes_by_keywords(
726
+ keywords_list, page, entries_per_page
727
+ )
728
+
729
+ # Format results as HTML
730
+ conv_html = format_conversations_html(conversations)
731
+ notes_html = format_notes_html(notes)
732
+
733
+ # Create pagination info
734
+ pagination = f"Page {page} of {max(conv_total_pages, notes_total_pages)} "
735
+ pagination += f"(Conversations: {conv_count}, Notes: {notes_count})"
736
+
737
+ # Determine button states
738
+ max_pages = max(conv_total_pages, notes_total_pages)
739
+ next_disabled = page >= max_pages
740
+ prev_disabled = page <= 1
741
+
742
+ return (
743
+ conv_html,
744
+ notes_html,
745
+ pagination,
746
+ gr.update(interactive=not next_disabled),
747
+ gr.update(interactive=not prev_disabled)
748
+ )
749
+ except Exception as e:
750
+ logging.error(f"Error in view_items: {str(e)}")
751
+ return (
752
+ f"<p>Error: {str(e)}</p>",
753
+ f"<p>Error: {str(e)}</p>",
754
+ "Error in retrieval",
755
+ gr.update(interactive=False),
756
+ gr.update(interactive=False)
757
+ )
758
+
759
+ def go_to_next_page(keywords, current_page, entries_per_page):
760
+ return view_items(keywords, current_page + 1, entries_per_page)
761
+
762
+ def go_to_previous_page(keywords, current_page, entries_per_page):
763
+ return view_items(keywords, max(1, current_page - 1), entries_per_page)
764
+
765
+ # Event handlers
766
+ refresh_keywords_button.click(
767
+ fn=update_keyword_choices,
768
+ inputs=[],
769
+ outputs=[keyword_dropdown]
770
+ )
771
+
772
+ view_button.click(
773
+ fn=view_items,
774
+ inputs=[keyword_dropdown, page_number, entries_per_page],
775
+ outputs=[
776
+ conversation_results,
777
+ notes_results,
778
+ pagination_info,
779
+ next_page_button,
780
+ previous_page_button
781
+ ]
782
+ )
783
+
784
+ next_page_button.click(
785
+ fn=go_to_next_page,
786
+ inputs=[keyword_dropdown, page_number, entries_per_page],
787
+ outputs=[
788
+ conversation_results,
789
+ notes_results,
790
+ pagination_info,
791
+ next_page_button,
792
+ previous_page_button
793
+ ]
794
+ )
795
+
796
+ previous_page_button.click(
797
+ fn=go_to_previous_page,
798
+ inputs=[keyword_dropdown, page_number, entries_per_page],
799
+ outputs=[
800
+ conversation_results,
801
+ notes_results,
802
+ pagination_info,
803
+ next_page_button,
804
+ previous_page_button
805
+ ]
806
+ )
807
+
808
+ # Initialize keyword dropdown on page load
809
+ keyword_dropdown.value = update_keyword_choices()
810
+
811
+ #
812
+ # End of RAG DB Viewing tabs
813
+ ################################################################
814
+
815
  #
816
+ #######################################################################################################################
App_Function_Libraries/Gradio_UI/Website_scraping_tab.py CHANGED
@@ -1,554 +1,754 @@
1
- # Website_scraping_tab.py
2
- # Gradio UI for scraping websites
3
- #
4
- # Imports
5
- import asyncio
6
- import json
7
- import logging
8
- import os
9
- import random
10
- from concurrent.futures import ThreadPoolExecutor
11
- from typing import Optional, List, Dict, Any
12
- from urllib.parse import urlparse, urljoin
13
-
14
- #
15
- # External Imports
16
- import gradio as gr
17
- from playwright.async_api import TimeoutError, async_playwright
18
- from playwright.sync_api import sync_playwright
19
-
20
- #
21
- # Local Imports
22
- from App_Function_Libraries.Web_Scraping.Article_Extractor_Lib import scrape_from_sitemap, scrape_by_url_level, scrape_article
23
- from App_Function_Libraries.Web_Scraping.Article_Summarization_Lib import scrape_and_summarize_multiple
24
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
25
- from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
26
- from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize
27
-
28
-
29
- #
30
- ########################################################################################################################
31
- #
32
- # Functions:
33
-
34
- def get_url_depth(url: str) -> int:
35
- return len(urlparse(url).path.strip('/').split('/'))
36
-
37
-
38
- def sync_recursive_scrape(url_input, max_pages, max_depth, progress_callback, delay=1.0):
39
- def run_async_scrape():
40
- loop = asyncio.new_event_loop()
41
- asyncio.set_event_loop(loop)
42
- return loop.run_until_complete(
43
- recursive_scrape(url_input, max_pages, max_depth, progress_callback, delay)
44
- )
45
-
46
- with ThreadPoolExecutor() as executor:
47
- future = executor.submit(run_async_scrape)
48
- return future.result()
49
-
50
-
51
- async def recursive_scrape(
52
- base_url: str,
53
- max_pages: int,
54
- max_depth: int,
55
- progress_callback: callable,
56
- delay: float = 1.0,
57
- resume_file: str = 'scrape_progress.json',
58
- user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
59
- ) -> List[Dict]:
60
- async def save_progress():
61
- temp_file = resume_file + ".tmp"
62
- with open(temp_file, 'w') as f:
63
- json.dump({
64
- 'visited': list(visited),
65
- 'to_visit': to_visit,
66
- 'scraped_articles': scraped_articles,
67
- 'pages_scraped': pages_scraped
68
- }, f)
69
- os.replace(temp_file, resume_file) # Atomic replace
70
-
71
- def is_valid_url(url: str) -> bool:
72
- return url.startswith("http") and len(url) > 0
73
-
74
- # Load progress if resume file exists
75
- if os.path.exists(resume_file):
76
- with open(resume_file, 'r') as f:
77
- progress_data = json.load(f)
78
- visited = set(progress_data['visited'])
79
- to_visit = progress_data['to_visit']
80
- scraped_articles = progress_data['scraped_articles']
81
- pages_scraped = progress_data['pages_scraped']
82
- else:
83
- visited = set()
84
- to_visit = [(base_url, 0)] # (url, depth)
85
- scraped_articles = []
86
- pages_scraped = 0
87
-
88
- try:
89
- async with async_playwright() as p:
90
- browser = await p.chromium.launch(headless=True)
91
- context = await browser.new_context(user_agent=user_agent)
92
-
93
- try:
94
- while to_visit and pages_scraped < max_pages:
95
- current_url, current_depth = to_visit.pop(0)
96
-
97
- if current_url in visited or current_depth > max_depth:
98
- continue
99
-
100
- visited.add(current_url)
101
-
102
- # Update progress
103
- progress_callback(f"Scraping page {pages_scraped + 1}/{max_pages}: {current_url}")
104
-
105
- try:
106
- await asyncio.sleep(random.uniform(delay * 0.8, delay * 1.2))
107
-
108
- # This function should be implemented to handle asynchronous scraping
109
- article_data = await scrape_article_async(context, current_url)
110
-
111
- if article_data and article_data['extraction_successful']:
112
- scraped_articles.append(article_data)
113
- pages_scraped += 1
114
-
115
- # If we haven't reached max depth, add child links to to_visit
116
- if current_depth < max_depth:
117
- page = await context.new_page()
118
- await page.goto(current_url)
119
- await page.wait_for_load_state("networkidle")
120
-
121
- links = await page.eval_on_selector_all('a[href]',
122
- "(elements) => elements.map(el => el.href)")
123
- for link in links:
124
- child_url = urljoin(base_url, link)
125
- if is_valid_url(child_url) and child_url.startswith(
126
- base_url) and child_url not in visited and should_scrape_url(child_url):
127
- to_visit.append((child_url, current_depth + 1))
128
-
129
- await page.close()
130
-
131
- except Exception as e:
132
- logging.error(f"Error scraping {current_url}: {str(e)}")
133
-
134
- # Save progress periodically (e.g., every 10 pages)
135
- if pages_scraped % 10 == 0:
136
- await save_progress()
137
-
138
- finally:
139
- await browser.close()
140
-
141
- finally:
142
- # These statements are now guaranteed to be reached after the scraping is done
143
- await save_progress()
144
-
145
- # Remove the progress file when scraping is completed successfully
146
- if os.path.exists(resume_file):
147
- os.remove(resume_file)
148
-
149
- # Final progress update
150
- progress_callback(f"Scraping completed. Total pages scraped: {pages_scraped}")
151
-
152
- return scraped_articles
153
-
154
-
155
- async def scrape_article_async(context, url: str) -> Dict[str, Any]:
156
- page = await context.new_page()
157
- try:
158
- await page.goto(url)
159
- await page.wait_for_load_state("networkidle")
160
-
161
- title = await page.title()
162
- content = await page.content()
163
-
164
- return {
165
- 'url': url,
166
- 'title': title,
167
- 'content': content,
168
- 'extraction_successful': True
169
- }
170
- except Exception as e:
171
- logging.error(f"Error scraping article {url}: {str(e)}")
172
- return {
173
- 'url': url,
174
- 'extraction_successful': False,
175
- 'error': str(e)
176
- }
177
- finally:
178
- await page.close()
179
-
180
-
181
- def scrape_article_sync(url: str) -> Dict[str, Any]:
182
- with sync_playwright() as p:
183
- browser = p.chromium.launch(headless=True)
184
- page = browser.new_page()
185
- try:
186
- page.goto(url)
187
- page.wait_for_load_state("networkidle")
188
-
189
- title = page.title()
190
- content = page.content()
191
-
192
- return {
193
- 'url': url,
194
- 'title': title,
195
- 'content': content,
196
- 'extraction_successful': True
197
- }
198
- except Exception as e:
199
- logging.error(f"Error scraping article {url}: {str(e)}")
200
- return {
201
- 'url': url,
202
- 'extraction_successful': False,
203
- 'error': str(e)
204
- }
205
- finally:
206
- browser.close()
207
-
208
-
209
- def should_scrape_url(url: str) -> bool:
210
- parsed_url = urlparse(url)
211
- path = parsed_url.path.lower()
212
-
213
- # List of patterns to exclude
214
- exclude_patterns = [
215
- '/tag/', '/category/', '/author/', '/search/', '/page/',
216
- 'wp-content', 'wp-includes', 'wp-json', 'wp-admin',
217
- 'login', 'register', 'cart', 'checkout', 'account',
218
- '.jpg', '.png', '.gif', '.pdf', '.zip'
219
- ]
220
-
221
- # Check if the URL contains any exclude patterns
222
- if any(pattern in path for pattern in exclude_patterns):
223
- return False
224
-
225
- # Add more sophisticated checks here
226
- # For example, you might want to only include URLs with certain patterns
227
- include_patterns = ['/article/', '/post/', '/blog/']
228
- if any(pattern in path for pattern in include_patterns):
229
- return True
230
-
231
- # By default, return True if no exclusion or inclusion rules matched
232
- return True
233
-
234
-
235
- async def scrape_with_retry(url: str, max_retries: int = 3, retry_delay: float = 5.0):
236
- for attempt in range(max_retries):
237
- try:
238
- return await scrape_article(url)
239
- except TimeoutError:
240
- if attempt < max_retries - 1:
241
- logging.warning(f"Timeout error scraping {url}. Retrying in {retry_delay} seconds...")
242
- await asyncio.sleep(retry_delay)
243
- else:
244
- logging.error(f"Failed to scrape {url} after {max_retries} attempts.")
245
- return None
246
- except Exception as e:
247
- logging.error(f"Error scraping {url}: {str(e)}")
248
- return None
249
-
250
-
251
- def create_website_scraping_tab():
252
- with gr.TabItem("Website Scraping", visible=True):
253
- gr.Markdown("# Scrape Websites & Summarize Articles")
254
- with gr.Row():
255
- with gr.Column():
256
- scrape_method = gr.Radio(
257
- ["Individual URLs", "Sitemap", "URL Level", "Recursive Scraping"],
258
- label="Scraping Method",
259
- value="Individual URLs"
260
- )
261
- url_input = gr.Textbox(
262
- label="Article URLs or Base URL",
263
- placeholder="Enter article URLs here, one per line, or base URL for sitemap/URL level/recursive scraping",
264
- lines=5
265
- )
266
- url_level = gr.Slider(
267
- minimum=1,
268
- maximum=10,
269
- step=1,
270
- label="URL Level (for URL Level scraping)",
271
- value=2,
272
- visible=False
273
- )
274
- max_pages = gr.Slider(
275
- minimum=1,
276
- maximum=100,
277
- step=1,
278
- label="Maximum Pages to Scrape (for Recursive Scraping)",
279
- value=10,
280
- visible=False
281
- )
282
- max_depth = gr.Slider(
283
- minimum=1,
284
- maximum=10,
285
- step=1,
286
- label="Maximum Depth (for Recursive Scraping)",
287
- value=3,
288
- visible=False
289
- )
290
- custom_article_title_input = gr.Textbox(
291
- label="Custom Article Titles (Optional, one per line)",
292
- placeholder="Enter custom titles for the articles, one per line",
293
- lines=5
294
- )
295
- with gr.Row():
296
- summarize_checkbox = gr.Checkbox(label="Summarize Articles", value=False)
297
- custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt", value=False, visible=True)
298
- preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt", value=False, visible=True)
299
- with gr.Row():
300
- temp_slider = gr.Slider(0.1, 2.0, 0.7, label="Temperature")
301
- with gr.Row():
302
- preset_prompt = gr.Dropdown(
303
- label="Select Preset Prompt",
304
- choices=load_preset_prompts(),
305
- visible=False
306
- )
307
- with gr.Row():
308
- website_custom_prompt_input = gr.Textbox(
309
- label="Custom Prompt",
310
- placeholder="Enter custom prompt here",
311
- lines=3,
312
- visible=False
313
- )
314
- with gr.Row():
315
- system_prompt_input = gr.Textbox(
316
- label="System Prompt",
317
- value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
318
- **Bulleted Note Creation Guidelines**
319
-
320
- **Headings**:
321
- - Based on referenced topics, not categories like quotes or terms
322
- - Surrounded by **bold** formatting
323
- - Not listed as bullet points
324
- - No space between headings and list items underneath
325
-
326
- **Emphasis**:
327
- - **Important terms** set in bold font
328
- - **Text ending in a colon**: also bolded
329
-
330
- **Review**:
331
- - Ensure adherence to specified format
332
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
333
- """,
334
- lines=3,
335
- visible=False
336
- )
337
-
338
- api_name_input = gr.Dropdown(
339
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
340
- "OpenRouter",
341
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
342
- "Custom-OpenAI-API"],
343
- value=None,
344
- label="API Name (Mandatory for Summarization)"
345
- )
346
- api_key_input = gr.Textbox(
347
- label="API Key (Mandatory if API Name is specified)",
348
- placeholder="Enter your API key here; Ignore if using Local API or Built-in API",
349
- type="password"
350
- )
351
- keywords_input = gr.Textbox(
352
- label="Keywords",
353
- placeholder="Enter keywords here (comma-separated)",
354
- value="default,no_keyword_set",
355
- visible=True
356
- )
357
-
358
- scrape_button = gr.Button("Scrape and Summarize")
359
- with gr.Column():
360
- progress_output = gr.Textbox(label="Progress", lines=3)
361
- result_output = gr.Textbox(label="Result", lines=20)
362
-
363
- def update_ui_for_scrape_method(method):
364
- url_level_update = gr.update(visible=(method == "URL Level"))
365
- max_pages_update = gr.update(visible=(method == "Recursive Scraping"))
366
- max_depth_update = gr.update(visible=(method == "Recursive Scraping"))
367
- url_input_update = gr.update(
368
- label="Article URLs" if method == "Individual URLs" else "Base URL",
369
- placeholder="Enter article URLs here, one per line" if method == "Individual URLs" else "Enter the base URL for scraping"
370
- )
371
- return url_level_update, max_pages_update, max_depth_update, url_input_update
372
-
373
- scrape_method.change(
374
- fn=update_ui_for_scrape_method,
375
- inputs=[scrape_method],
376
- outputs=[url_level, max_pages, max_depth, url_input]
377
- )
378
-
379
- custom_prompt_checkbox.change(
380
- fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
381
- inputs=[custom_prompt_checkbox],
382
- outputs=[website_custom_prompt_input, system_prompt_input]
383
- )
384
- preset_prompt_checkbox.change(
385
- fn=lambda x: gr.update(visible=x),
386
- inputs=[preset_prompt_checkbox],
387
- outputs=[preset_prompt]
388
- )
389
-
390
- def update_prompts(preset_name):
391
- prompts = update_user_prompt(preset_name)
392
- return (
393
- gr.update(value=prompts["user_prompt"], visible=True),
394
- gr.update(value=prompts["system_prompt"], visible=True)
395
- )
396
-
397
- preset_prompt.change(
398
- update_prompts,
399
- inputs=preset_prompt,
400
- outputs=[website_custom_prompt_input, system_prompt_input]
401
- )
402
-
403
- async def scrape_and_summarize_wrapper(
404
- scrape_method: str,
405
- url_input: str,
406
- url_level: Optional[int],
407
- max_pages: int,
408
- max_depth: int,
409
- summarize_checkbox: bool,
410
- custom_prompt: Optional[str],
411
- api_name: Optional[str],
412
- api_key: Optional[str],
413
- keywords: str,
414
- custom_titles: Optional[str],
415
- system_prompt: Optional[str],
416
- temperature: float = 0.7,
417
- progress: gr.Progress = gr.Progress()
418
- ) -> str:
419
- try:
420
- result: List[Dict[str, Any]] = []
421
-
422
- if scrape_method == "Individual URLs":
423
- result = await scrape_and_summarize_multiple(url_input, custom_prompt, api_name, api_key, keywords,
424
- custom_titles, system_prompt)
425
- elif scrape_method == "Sitemap":
426
- result = await asyncio.to_thread(scrape_from_sitemap, url_input)
427
- elif scrape_method == "URL Level":
428
- if url_level is None:
429
- return convert_json_to_markdown(
430
- json.dumps({"error": "URL level is required for URL Level scraping."}))
431
- result = await asyncio.to_thread(scrape_by_url_level, url_input, url_level)
432
- elif scrape_method == "Recursive Scraping":
433
- result = await recursive_scrape(url_input, max_pages, max_depth, progress.update, delay=1.0)
434
- else:
435
- return convert_json_to_markdown(json.dumps({"error": f"Unknown scraping method: {scrape_method}"}))
436
-
437
- # Ensure result is always a list of dictionaries
438
- if isinstance(result, dict):
439
- result = [result]
440
- elif isinstance(result, list):
441
- if all(isinstance(item, str) for item in result):
442
- # Convert list of strings to list of dictionaries
443
- result = [{"content": item} for item in result]
444
- elif not all(isinstance(item, dict) for item in result):
445
- raise ValueError("Not all items in result are dictionaries or strings")
446
- else:
447
- raise ValueError(f"Unexpected result type: {type(result)}")
448
-
449
- # Ensure all items in result are dictionaries
450
- if not all(isinstance(item, dict) for item in result):
451
- raise ValueError("Not all items in result are dictionaries")
452
-
453
- if summarize_checkbox:
454
- total_articles = len(result)
455
- for i, article in enumerate(result):
456
- progress.update(f"Summarizing article {i + 1}/{total_articles}")
457
- content = article.get('content', '')
458
- if content:
459
- summary = await asyncio.to_thread(summarize, content, custom_prompt, api_name, api_key,
460
- temperature, system_prompt)
461
- article['summary'] = summary
462
- else:
463
- article['summary'] = "No content available to summarize."
464
-
465
- # Concatenate all content
466
- all_content = "\n\n".join(
467
- [f"# {article.get('title', 'Untitled')}\n\n{article.get('content', '')}\n\n" +
468
- (f"Summary: {article.get('summary', '')}" if summarize_checkbox else "")
469
- for article in result])
470
-
471
- # Collect all unique URLs
472
- all_urls = list(set(article.get('url', '') for article in result if article.get('url')))
473
-
474
- # Structure the output for the entire website collection
475
- website_collection = {
476
- "base_url": url_input,
477
- "scrape_method": scrape_method,
478
- "summarization_performed": summarize_checkbox,
479
- "api_used": api_name if summarize_checkbox else None,
480
- "keywords": keywords if summarize_checkbox else None,
481
- "url_level": url_level if scrape_method == "URL Level" else None,
482
- "max_pages": max_pages if scrape_method == "Recursive Scraping" else None,
483
- "max_depth": max_depth if scrape_method == "Recursive Scraping" else None,
484
- "total_articles_scraped": len(result),
485
- "urls_scraped": all_urls,
486
- "content": all_content
487
- }
488
-
489
- # Convert the JSON to markdown and return
490
- return convert_json_to_markdown(json.dumps(website_collection, indent=2))
491
- except Exception as e:
492
- return convert_json_to_markdown(json.dumps({"error": f"An error occurred: {str(e)}"}))
493
-
494
- # Update the scrape_button.click to include the temperature parameter
495
- scrape_button.click(
496
- fn=lambda *args: asyncio.run(scrape_and_summarize_wrapper(*args)),
497
- inputs=[scrape_method, url_input, url_level, max_pages, max_depth, summarize_checkbox,
498
- website_custom_prompt_input, api_name_input, api_key_input, keywords_input,
499
- custom_article_title_input, system_prompt_input, temp_slider],
500
- outputs=[result_output]
501
- )
502
-
503
-
504
- def convert_json_to_markdown(json_str: str) -> str:
505
- """
506
- Converts the JSON output from the scraping process into a markdown format.
507
-
508
- Args:
509
- json_str (str): JSON-formatted string containing the website collection data
510
-
511
- Returns:
512
- str: Markdown-formatted string of the website collection data
513
- """
514
- try:
515
- # Parse the JSON string
516
- data = json.loads(json_str)
517
-
518
- # Check if there's an error in the JSON
519
- if "error" in data:
520
- return f"# Error\n\n{data['error']}"
521
-
522
- # Start building the markdown string
523
- markdown = f"# Website Collection: {data['base_url']}\n\n"
524
-
525
- # Add metadata
526
- markdown += "## Metadata\n\n"
527
- markdown += f"- **Scrape Method:** {data['scrape_method']}\n"
528
- markdown += f"- **API Used:** {data['api_used']}\n"
529
- markdown += f"- **Keywords:** {data['keywords']}\n"
530
- if data['url_level'] is not None:
531
- markdown += f"- **URL Level:** {data['url_level']}\n"
532
- markdown += f"- **Total Articles Scraped:** {data['total_articles_scraped']}\n\n"
533
-
534
- # Add URLs scraped
535
- markdown += "## URLs Scraped\n\n"
536
- for url in data['urls_scraped']:
537
- markdown += f"- {url}\n"
538
- markdown += "\n"
539
-
540
- # Add the content
541
- markdown += "## Content\n\n"
542
- markdown += data['content']
543
-
544
- return markdown
545
-
546
- except json.JSONDecodeError:
547
- return "# Error\n\nInvalid JSON string provided."
548
- except KeyError as e:
549
- return f"# Error\n\nMissing key in JSON data: {str(e)}"
550
- except Exception as e:
551
- return f"# Error\n\nAn unexpected error occurred: {str(e)}"
552
- #
553
- # End of File
554
- ########################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Website_scraping_tab.py
2
+ # Gradio UI for scraping websites
3
+ #
4
+ # Imports
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ import os
9
+ import random
10
+ from concurrent.futures import ThreadPoolExecutor
11
+ from typing import Optional, List, Dict, Any
12
+ from urllib.parse import urlparse, urljoin
13
+
14
+ #
15
+ # External Imports
16
+ import gradio as gr
17
+ from playwright.async_api import TimeoutError, async_playwright
18
+ from playwright.sync_api import sync_playwright
19
+
20
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
21
+ #
22
+ # Local Imports
23
+ from App_Function_Libraries.Web_Scraping.Article_Extractor_Lib import scrape_from_sitemap, scrape_by_url_level, \
24
+ scrape_article, collect_bookmarks, scrape_and_summarize_multiple, collect_urls_from_file
25
+ from App_Function_Libraries.DB.DB_Manager import list_prompts
26
+ from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
27
+ from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize
28
+
29
+
30
+ #
31
+ ########################################################################################################################
32
+ #
33
+ # Functions:
34
+
35
+ def get_url_depth(url: str) -> int:
36
+ return len(urlparse(url).path.strip('/').split('/'))
37
+
38
+
39
+ def sync_recursive_scrape(url_input, max_pages, max_depth, progress_callback, delay=1.0, custom_cookies=None):
40
+ def run_async_scrape():
41
+ loop = asyncio.new_event_loop()
42
+ asyncio.set_event_loop(loop)
43
+ return loop.run_until_complete(
44
+ recursive_scrape(url_input, max_pages, max_depth, progress_callback, delay, custom_cookies=custom_cookies)
45
+ )
46
+
47
+ with ThreadPoolExecutor() as executor:
48
+ future = executor.submit(run_async_scrape)
49
+ return future.result()
50
+
51
+
52
+ async def recursive_scrape(
53
+ base_url: str,
54
+ max_pages: int,
55
+ max_depth: int,
56
+ progress_callback: callable,
57
+ delay: float = 1.0,
58
+ resume_file: str = 'scrape_progress.json',
59
+ user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
60
+ custom_cookies: Optional[List[Dict[str, Any]]] = None
61
+ ) -> List[Dict]:
62
+ async def save_progress():
63
+ temp_file = resume_file + ".tmp"
64
+ with open(temp_file, 'w') as f:
65
+ json.dump({
66
+ 'visited': list(visited),
67
+ 'to_visit': to_visit,
68
+ 'scraped_articles': scraped_articles,
69
+ 'pages_scraped': pages_scraped
70
+ }, f)
71
+ os.replace(temp_file, resume_file) # Atomic replace
72
+
73
+ def is_valid_url(url: str) -> bool:
74
+ return url.startswith("http") and len(url) > 0
75
+
76
+ # Load progress if resume file exists
77
+ if os.path.exists(resume_file):
78
+ with open(resume_file, 'r') as f:
79
+ progress_data = json.load(f)
80
+ visited = set(progress_data['visited'])
81
+ to_visit = progress_data['to_visit']
82
+ scraped_articles = progress_data['scraped_articles']
83
+ pages_scraped = progress_data['pages_scraped']
84
+ else:
85
+ visited = set()
86
+ to_visit = [(base_url, 0)] # (url, depth)
87
+ scraped_articles = []
88
+ pages_scraped = 0
89
+
90
+ try:
91
+ async with async_playwright() as p:
92
+ browser = await p.chromium.launch(headless=True)
93
+ context = await browser.new_context(user_agent=user_agent)
94
+
95
+ # Set custom cookies if provided
96
+ if custom_cookies:
97
+ await context.add_cookies(custom_cookies)
98
+
99
+ try:
100
+ while to_visit and pages_scraped < max_pages:
101
+ current_url, current_depth = to_visit.pop(0)
102
+
103
+ if current_url in visited or current_depth > max_depth:
104
+ continue
105
+
106
+ visited.add(current_url)
107
+
108
+ # Update progress
109
+ progress_callback(f"Scraping page {pages_scraped + 1}/{max_pages}: {current_url}")
110
+
111
+ try:
112
+ await asyncio.sleep(random.uniform(delay * 0.8, delay * 1.2))
113
+
114
+ # This function should be implemented to handle asynchronous scraping
115
+ article_data = await scrape_article_async(context, current_url)
116
+
117
+ if article_data and article_data['extraction_successful']:
118
+ scraped_articles.append(article_data)
119
+ pages_scraped += 1
120
+
121
+ # If we haven't reached max depth, add child links to to_visit
122
+ if current_depth < max_depth:
123
+ page = await context.new_page()
124
+ await page.goto(current_url)
125
+ await page.wait_for_load_state("networkidle")
126
+
127
+ links = await page.eval_on_selector_all('a[href]',
128
+ "(elements) => elements.map(el => el.href)")
129
+ for link in links:
130
+ child_url = urljoin(base_url, link)
131
+ if is_valid_url(child_url) and child_url.startswith(
132
+ base_url) and child_url not in visited and should_scrape_url(child_url):
133
+ to_visit.append((child_url, current_depth + 1))
134
+
135
+ await page.close()
136
+
137
+ except Exception as e:
138
+ logging.error(f"Error scraping {current_url}: {str(e)}")
139
+
140
+ # Save progress periodically (e.g., every 10 pages)
141
+ if pages_scraped % 10 == 0:
142
+ await save_progress()
143
+
144
+ finally:
145
+ await browser.close()
146
+
147
+ finally:
148
+ # These statements are now guaranteed to be reached after the scraping is done
149
+ await save_progress()
150
+
151
+ # Remove the progress file when scraping is completed successfully
152
+ if os.path.exists(resume_file):
153
+ os.remove(resume_file)
154
+
155
+ # Final progress update
156
+ progress_callback(f"Scraping completed. Total pages scraped: {pages_scraped}")
157
+
158
+ return scraped_articles
159
+
160
+
161
+ async def scrape_article_async(context, url: str) -> Dict[str, Any]:
162
+ page = await context.new_page()
163
+ try:
164
+ await page.goto(url)
165
+ await page.wait_for_load_state("networkidle")
166
+
167
+ title = await page.title()
168
+ content = await page.content()
169
+
170
+ return {
171
+ 'url': url,
172
+ 'title': title,
173
+ 'content': content,
174
+ 'extraction_successful': True
175
+ }
176
+ except Exception as e:
177
+ logging.error(f"Error scraping article {url}: {str(e)}")
178
+ return {
179
+ 'url': url,
180
+ 'extraction_successful': False,
181
+ 'error': str(e)
182
+ }
183
+ finally:
184
+ await page.close()
185
+
186
+
187
+ def scrape_article_sync(url: str) -> Dict[str, Any]:
188
+ with sync_playwright() as p:
189
+ browser = p.chromium.launch(headless=True)
190
+ page = browser.new_page()
191
+ try:
192
+ page.goto(url)
193
+ page.wait_for_load_state("networkidle")
194
+
195
+ title = page.title()
196
+ content = page.content()
197
+
198
+ return {
199
+ 'url': url,
200
+ 'title': title,
201
+ 'content': content,
202
+ 'extraction_successful': True
203
+ }
204
+ except Exception as e:
205
+ logging.error(f"Error scraping article {url}: {str(e)}")
206
+ return {
207
+ 'url': url,
208
+ 'extraction_successful': False,
209
+ 'error': str(e)
210
+ }
211
+ finally:
212
+ browser.close()
213
+
214
+
215
+ def should_scrape_url(url: str) -> bool:
216
+ parsed_url = urlparse(url)
217
+ path = parsed_url.path.lower()
218
+
219
+ # List of patterns to exclude
220
+ exclude_patterns = [
221
+ '/tag/', '/category/', '/author/', '/search/', '/page/',
222
+ 'wp-content', 'wp-includes', 'wp-json', 'wp-admin',
223
+ 'login', 'register', 'cart', 'checkout', 'account',
224
+ '.jpg', '.png', '.gif', '.pdf', '.zip'
225
+ ]
226
+
227
+ # Check if the URL contains any exclude patterns
228
+ if any(pattern in path for pattern in exclude_patterns):
229
+ return False
230
+
231
+ # Add more sophisticated checks here
232
+ # For example, you might want to only include URLs with certain patterns
233
+ include_patterns = ['/article/', '/post/', '/blog/']
234
+ if any(pattern in path for pattern in include_patterns):
235
+ return True
236
+
237
+ # By default, return True if no exclusion or inclusion rules matched
238
+ return True
239
+
240
+
241
+ async def scrape_with_retry(url: str, max_retries: int = 3, retry_delay: float = 5.0):
242
+ for attempt in range(max_retries):
243
+ try:
244
+ return await scrape_article(url)
245
+ except TimeoutError:
246
+ if attempt < max_retries - 1:
247
+ logging.warning(f"Timeout error scraping {url}. Retrying in {retry_delay} seconds...")
248
+ await asyncio.sleep(retry_delay)
249
+ else:
250
+ logging.error(f"Failed to scrape {url} after {max_retries} attempts.")
251
+ return None
252
+ except Exception as e:
253
+ logging.error(f"Error scraping {url}: {str(e)}")
254
+ return None
255
+
256
+
257
+ def create_website_scraping_tab():
258
+ try:
259
+ default_value = None
260
+ if default_api_endpoint:
261
+ if default_api_endpoint in global_api_endpoints:
262
+ default_value = format_api_name(default_api_endpoint)
263
+ else:
264
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
265
+ except Exception as e:
266
+ logging.error(f"Error setting default API endpoint: {str(e)}")
267
+ default_value = None
268
+ with gr.TabItem("Website Scraping", visible=True):
269
+ gr.Markdown("# Scrape Websites & Summarize Articles")
270
+ with gr.Row():
271
+ with gr.Column():
272
+ scrape_method = gr.Radio(
273
+ ["Individual URLs", "Sitemap", "URL Level", "Recursive Scraping"],
274
+ label="Scraping Method",
275
+ value="Individual URLs"
276
+ )
277
+ url_input = gr.Textbox(
278
+ label="Article URLs or Base URL",
279
+ placeholder="Enter article URLs here, one per line, or base URL for sitemap/URL level/recursive scraping",
280
+ lines=5
281
+ )
282
+ url_level = gr.Slider(
283
+ minimum=1,
284
+ maximum=10,
285
+ step=1,
286
+ label="URL Level (for URL Level scraping)",
287
+ value=2,
288
+ visible=False
289
+ )
290
+ max_pages = gr.Slider(
291
+ minimum=1,
292
+ maximum=100,
293
+ step=1,
294
+ label="Maximum Pages to Scrape (for Recursive Scraping)",
295
+ value=10,
296
+ visible=False
297
+ )
298
+ max_depth = gr.Slider(
299
+ minimum=1,
300
+ maximum=10,
301
+ step=1,
302
+ label="Maximum Depth (for Recursive Scraping)",
303
+ value=3,
304
+ visible=False
305
+ )
306
+ custom_article_title_input = gr.Textbox(
307
+ label="Custom Article Titles (Optional, one per line)",
308
+ placeholder="Enter custom titles for the articles, one per line",
309
+ lines=5
310
+ )
311
+ with gr.Row():
312
+ summarize_checkbox = gr.Checkbox(label="Summarize/Analyze Articles", value=False)
313
+ custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt", value=False, visible=True)
314
+ preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt", value=False, visible=True)
315
+ with gr.Row():
316
+ temp_slider = gr.Slider(0.1, 2.0, 0.7, label="Temperature")
317
+
318
+ # Initialize state variables for pagination
319
+ current_page_state = gr.State(value=1)
320
+ total_pages_state = gr.State(value=1)
321
+ with gr.Row():
322
+ # Add pagination controls
323
+ preset_prompt = gr.Dropdown(
324
+ label="Select Preset Prompt",
325
+ choices=[],
326
+ visible=False
327
+ )
328
+ with gr.Row():
329
+ prev_page_button = gr.Button("Previous Page", visible=False)
330
+ page_display = gr.Markdown("Page 1 of X", visible=False)
331
+ next_page_button = gr.Button("Next Page", visible=False)
332
+
333
+ with gr.Row():
334
+ website_custom_prompt_input = gr.Textbox(
335
+ label="Custom Prompt",
336
+ placeholder="Enter custom prompt here",
337
+ lines=3,
338
+ visible=False
339
+ )
340
+ with gr.Row():
341
+ system_prompt_input = gr.Textbox(
342
+ label="System Prompt",
343
+ value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
344
+ **Bulleted Note Creation Guidelines**
345
+
346
+ **Headings**:
347
+ - Based on referenced topics, not categories like quotes or terms
348
+ - Surrounded by **bold** formatting
349
+ - Not listed as bullet points
350
+ - No space between headings and list items underneath
351
+
352
+ **Emphasis**:
353
+ - **Important terms** set in bold font
354
+ - **Text ending in a colon**: also bolded
355
+
356
+ **Review**:
357
+ - Ensure adherence to specified format
358
+ - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
359
+ """,
360
+ lines=3,
361
+ visible=False
362
+ )
363
+
364
+ # Refactored API selection dropdown
365
+ api_name_input = gr.Dropdown(
366
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
367
+ value=default_value,
368
+ label="API for Summarization/Analysis (Optional)"
369
+ )
370
+ api_key_input = gr.Textbox(
371
+ label="API Key (Mandatory if API Name is specified)",
372
+ placeholder="Enter your API key here; Ignore if using Local API or Built-in API",
373
+ type="password"
374
+ )
375
+ custom_cookies_input = gr.Textbox(
376
+ label="Custom Cookies (JSON format)",
377
+ placeholder="Enter custom cookies in JSON format",
378
+ lines=3,
379
+ visible=True
380
+ )
381
+ keywords_input = gr.Textbox(
382
+ label="Keywords",
383
+ placeholder="Enter keywords here (comma-separated)",
384
+ value="default,no_keyword_set",
385
+ visible=True
386
+ )
387
+ bookmarks_file_input = gr.File(
388
+ label="Upload Bookmarks File/CSV",
389
+ type="filepath",
390
+ file_types=[".json", ".html", ".csv"], # Added .csv
391
+ visible=True
392
+ )
393
+ gr.Markdown("""
394
+ Supported file formats:
395
+ - Chrome/Edge bookmarks (JSON)
396
+ - Firefox bookmarks (HTML)
397
+ - CSV file with 'url' column (optionally 'title' or 'name' column)
398
+ """)
399
+ parsed_urls_output = gr.Textbox(
400
+ label="Parsed URLs",
401
+ placeholder="URLs will be displayed here after uploading a file.",
402
+ lines=10,
403
+ interactive=False,
404
+ visible=False
405
+ )
406
+
407
+ scrape_button = gr.Button("Scrape and Summarize")
408
+
409
+ with gr.Column():
410
+ progress_output = gr.Textbox(label="Progress", lines=3)
411
+ result_output = gr.Textbox(label="Result", lines=20)
412
+
413
+ def update_ui_for_scrape_method(method):
414
+ url_level_update = gr.update(visible=(method == "URL Level"))
415
+ max_pages_update = gr.update(visible=(method == "Recursive Scraping"))
416
+ max_depth_update = gr.update(visible=(method == "Recursive Scraping"))
417
+ url_input_update = gr.update(
418
+ label="Article URLs" if method == "Individual URLs" else "Base URL",
419
+ placeholder="Enter article URLs here, one per line" if method == "Individual URLs" else "Enter the base URL for scraping"
420
+ )
421
+ return url_level_update, max_pages_update, max_depth_update, url_input_update
422
+
423
+ scrape_method.change(
424
+ fn=update_ui_for_scrape_method,
425
+ inputs=[scrape_method],
426
+ outputs=[url_level, max_pages, max_depth, url_input]
427
+ )
428
+
429
+ custom_prompt_checkbox.change(
430
+ fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
431
+ inputs=[custom_prompt_checkbox],
432
+ outputs=[website_custom_prompt_input, system_prompt_input]
433
+ )
434
+
435
+ def on_preset_prompt_checkbox_change(is_checked):
436
+ if is_checked:
437
+ prompts, total_pages, current_page = list_prompts(page=1, per_page=20)
438
+ page_display_text = f"Page {current_page} of {total_pages}"
439
+ return (
440
+ gr.update(visible=True, interactive=True, choices=prompts), # preset_prompt
441
+ gr.update(visible=True), # prev_page_button
442
+ gr.update(visible=True), # next_page_button
443
+ gr.update(value=page_display_text, visible=True), # page_display
444
+ current_page, # current_page_state
445
+ total_pages # total_pages_state
446
+ )
447
+ else:
448
+ return (
449
+ gr.update(visible=False, interactive=False), # preset_prompt
450
+ gr.update(visible=False), # prev_page_button
451
+ gr.update(visible=False), # next_page_button
452
+ gr.update(visible=False), # page_display
453
+ 1, # current_page_state
454
+ 1 # total_pages_state
455
+ )
456
+
457
+ preset_prompt_checkbox.change(
458
+ fn=on_preset_prompt_checkbox_change,
459
+ inputs=[preset_prompt_checkbox],
460
+ outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state]
461
+ )
462
+
463
+ def on_prev_page_click(current_page, total_pages):
464
+ new_page = max(current_page - 1, 1)
465
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
466
+ page_display_text = f"Page {current_page} of {total_pages}"
467
+ return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
468
+
469
+ prev_page_button.click(
470
+ fn=on_prev_page_click,
471
+ inputs=[current_page_state, total_pages_state],
472
+ outputs=[preset_prompt, page_display, current_page_state]
473
+ )
474
+
475
+ def on_next_page_click(current_page, total_pages):
476
+ new_page = min(current_page + 1, total_pages)
477
+ prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20)
478
+ page_display_text = f"Page {current_page} of {total_pages}"
479
+ return gr.update(choices=prompts), gr.update(value=page_display_text), current_page
480
+
481
+ next_page_button.click(
482
+ fn=on_next_page_click,
483
+ inputs=[current_page_state, total_pages_state],
484
+ outputs=[preset_prompt, page_display, current_page_state]
485
+ )
486
+
487
+ def update_prompts(preset_name):
488
+ prompts = update_user_prompt(preset_name)
489
+ return (
490
+ gr.update(value=prompts["user_prompt"], visible=True),
491
+ gr.update(value=prompts["system_prompt"], visible=True)
492
+ )
493
+
494
+ preset_prompt.change(
495
+ update_prompts,
496
+ inputs=[preset_prompt],
497
+ outputs=[website_custom_prompt_input, system_prompt_input]
498
+ )
499
+
500
+ def parse_bookmarks(file_path):
501
+ """
502
+ Parses the uploaded bookmarks file and extracts URLs.
503
+
504
+ Args:
505
+ file_path (str): Path to the uploaded bookmarks file.
506
+
507
+ Returns:
508
+ str: Formatted string of extracted URLs or error message.
509
+ """
510
+ try:
511
+ bookmarks = collect_bookmarks(file_path)
512
+ # Extract URLs
513
+ urls = []
514
+ for value in bookmarks.values():
515
+ if isinstance(value, list):
516
+ urls.extend(value)
517
+ elif isinstance(value, str):
518
+ urls.append(value)
519
+ if not urls:
520
+ return "No URLs found in the bookmarks file."
521
+ # Format URLs for display
522
+ formatted_urls = "\n".join(urls)
523
+ return formatted_urls
524
+ except Exception as e:
525
+ logging.error(f"Error parsing bookmarks file: {str(e)}")
526
+ return f"Error parsing bookmarks file: {str(e)}"
527
+
528
+ def show_parsed_urls(urls_file):
529
+ """
530
+ Determines whether to show the parsed URLs output.
531
+
532
+ Args:
533
+ urls_file: Uploaded file object.
534
+
535
+ Returns:
536
+ Tuple indicating visibility and content of parsed_urls_output.
537
+ """
538
+ if urls_file is None:
539
+ return gr.update(visible=False), ""
540
+
541
+ file_path = urls_file.name
542
+ try:
543
+ # Use the unified collect_urls_from_file function
544
+ parsed_urls = collect_urls_from_file(file_path)
545
+
546
+ # Format the URLs for display
547
+ formatted_urls = []
548
+ for name, urls in parsed_urls.items():
549
+ if isinstance(urls, list):
550
+ for url in urls:
551
+ formatted_urls.append(f"{name}: {url}")
552
+ else:
553
+ formatted_urls.append(f"{name}: {urls}")
554
+
555
+ return gr.update(visible=True), "\n".join(formatted_urls)
556
+ except Exception as e:
557
+ return gr.update(visible=True), f"Error parsing file: {str(e)}"
558
+
559
+ # Connect the parsing function to the file upload event
560
+ bookmarks_file_input.change(
561
+ fn=show_parsed_urls,
562
+ inputs=[bookmarks_file_input],
563
+ outputs=[parsed_urls_output, parsed_urls_output]
564
+ )
565
+
566
+ async def scrape_and_summarize_wrapper(
567
+ scrape_method: str,
568
+ url_input: str,
569
+ url_level: Optional[int],
570
+ max_pages: int,
571
+ max_depth: int,
572
+ summarize_checkbox: bool,
573
+ custom_prompt: Optional[str],
574
+ api_name: Optional[str],
575
+ api_key: Optional[str],
576
+ keywords: str,
577
+ custom_titles: Optional[str],
578
+ system_prompt: Optional[str],
579
+ temperature: float,
580
+ custom_cookies: Optional[str],
581
+ bookmarks_file,
582
+ progress: gr.Progress = gr.Progress()
583
+ ) -> str:
584
+ try:
585
+ result: List[Dict[str, Any]] = []
586
+
587
+ # Handle bookmarks file if provided
588
+ if bookmarks_file is not None:
589
+ bookmarks = collect_bookmarks(bookmarks_file.name)
590
+ # Extract URLs from bookmarks
591
+ urls_from_bookmarks = []
592
+ for value in bookmarks.values():
593
+ if isinstance(value, list):
594
+ urls_from_bookmarks.extend(value)
595
+ elif isinstance(value, str):
596
+ urls_from_bookmarks.append(value)
597
+ if scrape_method == "Individual URLs":
598
+ url_input = "\n".join(urls_from_bookmarks)
599
+ else:
600
+ if urls_from_bookmarks:
601
+ url_input = urls_from_bookmarks[0]
602
+ else:
603
+ return convert_json_to_markdown(json.dumps({"error": "No URLs found in the bookmarks file."}))
604
+
605
+ # Handle custom cookies
606
+ custom_cookies_list = None
607
+ if custom_cookies:
608
+ try:
609
+ custom_cookies_list = json.loads(custom_cookies)
610
+ if not isinstance(custom_cookies_list, list):
611
+ custom_cookies_list = [custom_cookies_list]
612
+ except json.JSONDecodeError as e:
613
+ return convert_json_to_markdown(json.dumps({"error": f"Invalid JSON format for custom cookies: {e}"}))
614
+
615
+ if scrape_method == "Individual URLs":
616
+ result = await scrape_and_summarize_multiple(url_input, custom_prompt, api_name, api_key, keywords,
617
+ custom_titles, system_prompt, summarize_checkbox, custom_cookies=custom_cookies_list)
618
+ elif scrape_method == "Sitemap":
619
+ result = await asyncio.to_thread(scrape_from_sitemap, url_input)
620
+ elif scrape_method == "URL Level":
621
+ if url_level is None:
622
+ return convert_json_to_markdown(
623
+ json.dumps({"error": "URL level is required for URL Level scraping."}))
624
+ result = await asyncio.to_thread(scrape_by_url_level, url_input, url_level)
625
+ elif scrape_method == "Recursive Scraping":
626
+ result = await recursive_scrape(url_input, max_pages, max_depth, progress.update, delay=1.0,
627
+ custom_cookies=custom_cookies_list)
628
+ else:
629
+ return convert_json_to_markdown(json.dumps({"error": f"Unknown scraping method: {scrape_method}"}))
630
+
631
+ # Ensure result is always a list of dictionaries
632
+ if isinstance(result, dict):
633
+ result = [result]
634
+ elif isinstance(result, list):
635
+ if all(isinstance(item, str) for item in result):
636
+ # Convert list of strings to list of dictionaries
637
+ result = [{"content": item} for item in result]
638
+ elif not all(isinstance(item, dict) for item in result):
639
+ raise ValueError("Not all items in result are dictionaries or strings")
640
+ else:
641
+ raise ValueError(f"Unexpected result type: {type(result)}")
642
+
643
+ # Ensure all items in result are dictionaries
644
+ if not all(isinstance(item, dict) for item in result):
645
+ raise ValueError("Not all items in result are dictionaries")
646
+
647
+ if summarize_checkbox:
648
+ total_articles = len(result)
649
+ for i, article in enumerate(result):
650
+ progress.update(f"Summarizing article {i + 1}/{total_articles}")
651
+ content = article.get('content', '')
652
+ if content:
653
+ summary = await asyncio.to_thread(summarize, content, custom_prompt, api_name, api_key,
654
+ temperature, system_prompt)
655
+ article['summary'] = summary
656
+ else:
657
+ article['summary'] = "No content available to summarize."
658
+
659
+ # Concatenate all content
660
+ all_content = "\n\n".join(
661
+ [f"# {article.get('title', 'Untitled')}\n\n{article.get('content', '')}\n\n" +
662
+ (f"Summary: {article.get('summary', '')}" if summarize_checkbox else "")
663
+ for article in result])
664
+
665
+ # Collect all unique URLs
666
+ all_urls = list(set(article.get('url', '') for article in result if article.get('url')))
667
+
668
+ # Structure the output for the entire website collection
669
+ website_collection = {
670
+ "base_url": url_input,
671
+ "scrape_method": scrape_method,
672
+ "summarization_performed": summarize_checkbox,
673
+ "api_used": api_name if summarize_checkbox else None,
674
+ "keywords": keywords if summarize_checkbox else None,
675
+ "url_level": url_level if scrape_method == "URL Level" else None,
676
+ "max_pages": max_pages if scrape_method == "Recursive Scraping" else None,
677
+ "max_depth": max_depth if scrape_method == "Recursive Scraping" else None,
678
+ "total_articles_scraped": len(result),
679
+ "urls_scraped": all_urls,
680
+ "content": all_content
681
+ }
682
+
683
+ # Convert the JSON to markdown and return
684
+ return convert_json_to_markdown(json.dumps(website_collection, indent=2))
685
+ except Exception as e:
686
+ return convert_json_to_markdown(json.dumps({"error": f"An error occurred: {str(e)}"}))
687
+
688
+ # Update the scrape_button.click to include the temperature parameter
689
+ scrape_button.click(
690
+ fn=lambda *args: asyncio.run(scrape_and_summarize_wrapper(*args)),
691
+ inputs=[scrape_method, url_input, url_level, max_pages, max_depth, summarize_checkbox,
692
+ website_custom_prompt_input, api_name_input, api_key_input, keywords_input,
693
+ custom_article_title_input, system_prompt_input, temp_slider,
694
+ custom_cookies_input, bookmarks_file_input],
695
+ outputs=[result_output]
696
+ )
697
+
698
+
699
+ def convert_json_to_markdown(json_str: str) -> str:
700
+ """
701
+ Converts the JSON output from the scraping process into a markdown format.
702
+
703
+ Args:
704
+ json_str (str): JSON-formatted string containing the website collection data
705
+
706
+ Returns:
707
+ str: Markdown-formatted string of the website collection data
708
+ """
709
+ try:
710
+ # Parse the JSON string
711
+ data = json.loads(json_str)
712
+
713
+ # Check if there's an error in the JSON
714
+ if "error" in data:
715
+ return f"# Error\n\n{data['error']}"
716
+
717
+ # Start building the markdown string
718
+ markdown = f"# Website Collection: {data['base_url']}\n\n"
719
+
720
+ # Add metadata
721
+ markdown += "## Metadata\n\n"
722
+ markdown += f"- **Scrape Method:** {data['scrape_method']}\n"
723
+ markdown += f"- **API Used:** {data['api_used']}\n"
724
+ markdown += f"- **Keywords:** {data['keywords']}\n"
725
+ if data.get('url_level') is not None:
726
+ markdown += f"- **URL Level:** {data['url_level']}\n"
727
+ if data.get('max_pages') is not None:
728
+ markdown += f"- **Maximum Pages:** {data['max_pages']}\n"
729
+ if data.get('max_depth') is not None:
730
+ markdown += f"- **Maximum Depth:** {data['max_depth']}\n"
731
+ markdown += f"- **Total Articles Scraped:** {data['total_articles_scraped']}\n\n"
732
+
733
+ # Add URLs Scraped
734
+ markdown += "## URLs Scraped\n\n"
735
+ for url in data['urls_scraped']:
736
+ markdown += f"- {url}\n"
737
+ markdown += "\n"
738
+
739
+ # Add the content
740
+ markdown += "## Content\n\n"
741
+ markdown += data['content']
742
+
743
+ return markdown
744
+
745
+ except json.JSONDecodeError:
746
+ return "# Error\n\nInvalid JSON string provided."
747
+ except KeyError as e:
748
+ return f"# Error\n\nMissing key in JSON data: {str(e)}"
749
+ except Exception as e:
750
+ return f"# Error\n\nAn unexpected error occurred: {str(e)}"
751
+
752
+ #
753
+ # End of File
754
+ ########################################################################################################################
App_Function_Libraries/Gradio_UI/Workflows_tab.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Chat_Workflows.py
2
+ # Description: Gradio UI for Chat Workflows
3
+ #
4
+ # Imports
5
+ import json
6
+ import logging
7
+ from pathlib import Path
8
+ #
9
+ # External Imports
10
+ import gradio as gr
11
+ #
12
+ # Local Imports
13
+ from App_Function_Libraries.Gradio_UI.Chat_ui import chat_wrapper, search_conversations, \
14
+ load_conversation
15
+ from App_Function_Libraries.Chat.Chat_Functions import save_chat_history_to_db_wrapper
16
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
17
+ #
18
+ ############################################################################################################
19
+ #
20
+ # Functions:
21
+
22
+ # Load workflows from a JSON file
23
+ json_path = Path('./Helper_Scripts/Workflows/Workflows.json')
24
+ with json_path.open('r') as f:
25
+ workflows = json.load(f)
26
+
27
+
28
+ def chat_workflows_tab():
29
+ try:
30
+ default_value = None
31
+ if default_api_endpoint:
32
+ if default_api_endpoint in global_api_endpoints:
33
+ default_value = format_api_name(default_api_endpoint)
34
+ else:
35
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
36
+ except Exception as e:
37
+ logging.error(f"Error setting default API endpoint: {str(e)}")
38
+ default_value = None
39
+ with gr.TabItem("Chat Workflows", visible=True):
40
+ gr.Markdown("# Workflows using LLMs")
41
+ chat_history = gr.State([])
42
+ media_content = gr.State({})
43
+ selected_parts = gr.State([])
44
+ conversation_id = gr.State(None)
45
+ workflow_state = gr.State({"current_step": 0, "max_steps": 0, "conversation_id": None})
46
+
47
+ with gr.Row():
48
+ with gr.Column():
49
+ workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
50
+ # Refactored API selection dropdown
51
+ api_selector = gr.Dropdown(
52
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
53
+ value=default_value,
54
+ label="API for Interaction (Optional)"
55
+ )
56
+ api_key_input = gr.Textbox(label="API Key (optional)", type="password")
57
+ temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
58
+ save_conversation = gr.Checkbox(label="Save Conversation", value=False)
59
+ with gr.Column():
60
+ gr.Markdown("Placeholder")
61
+ with gr.Row():
62
+ with gr.Column():
63
+ conversation_search = gr.Textbox(label="Search Conversations")
64
+ search_conversations_btn = gr.Button("Search Conversations")
65
+ with gr.Column():
66
+ previous_conversations = gr.Dropdown(label="Select Conversation", choices=[], interactive=True)
67
+ load_conversations_btn = gr.Button("Load Selected Conversation")
68
+ with gr.Row():
69
+ with gr.Column():
70
+ context_input = gr.Textbox(label="Initial Context", lines=5)
71
+ chatbot = gr.Chatbot(label="Workflow Chat")
72
+ msg = gr.Textbox(label="Your Input")
73
+ submit_btn = gr.Button("Submit")
74
+ clear_btn = gr.Button("Clear Chat")
75
+ chat_media_name = gr.Textbox(label="Custom Chat Name(optional)")
76
+ save_btn = gr.Button("Save Chat to Database")
77
+ save_status = gr.Textbox(label="Save Status", interactive=False)
78
+
79
+ def update_workflow_ui(workflow_name):
80
+ if not workflow_name:
81
+ return {"current_step": 0, "max_steps": 0, "conversation_id": None}, "", []
82
+ selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None)
83
+ if selected_workflow:
84
+ num_prompts = len(selected_workflow['prompts'])
85
+ context = selected_workflow.get('context', '')
86
+ first_prompt = selected_workflow['prompts'][0]
87
+ initial_chat = [(None, f"{first_prompt}")]
88
+ logging.info(f"Initializing workflow: {workflow_name} with {num_prompts} steps")
89
+ return {"current_step": 0, "max_steps": num_prompts, "conversation_id": None}, context, initial_chat
90
+ else:
91
+ logging.error(f"Selected workflow not found: {workflow_name}")
92
+ return {"current_step": 0, "max_steps": 0, "conversation_id": None}, "", []
93
+
94
+ def process_workflow_step(message, history, context, workflow_name, api_endpoint, api_key, workflow_state,
95
+ save_conv, temp):
96
+ logging.info(f"Process workflow step called with message: {message}")
97
+ logging.info(f"Current workflow state: {workflow_state}")
98
+ try:
99
+ selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None)
100
+ if not selected_workflow:
101
+ logging.error(f"Selected workflow not found: {workflow_name}")
102
+ return history, workflow_state, gr.update(interactive=True)
103
+
104
+ current_step = workflow_state["current_step"]
105
+ max_steps = workflow_state["max_steps"]
106
+
107
+ logging.info(f"Current step: {current_step}, Max steps: {max_steps}")
108
+
109
+ if current_step >= max_steps:
110
+ logging.info("Workflow completed, disabling input")
111
+ return history, workflow_state, gr.update(interactive=False)
112
+
113
+ prompt = selected_workflow['prompts'][current_step]
114
+ full_message = f"{context}\n\nStep {current_step + 1}: {prompt}\nUser: {message}"
115
+
116
+ logging.info(f"Calling chat_wrapper with full_message: {full_message[:100]}...")
117
+ bot_message, new_history, new_conversation_id = chat_wrapper(
118
+ full_message, history, media_content.value, selected_parts.value,
119
+ api_endpoint, api_key, "", workflow_state["conversation_id"],
120
+ save_conv, temp, "You are a helpful assistant guiding through a workflow."
121
+ )
122
+
123
+ logging.info(f"Received bot_message: {bot_message[:100]}...")
124
+
125
+ next_step = current_step + 1
126
+ new_workflow_state = {
127
+ "current_step": next_step,
128
+ "max_steps": max_steps,
129
+ "conversation_id": new_conversation_id
130
+ }
131
+
132
+ if next_step >= max_steps:
133
+ logging.info("Workflow completed after this step")
134
+ return new_history, new_workflow_state, gr.update(interactive=False)
135
+ else:
136
+ next_prompt = selected_workflow['prompts'][next_step]
137
+ new_history.append((None, f"Step {next_step + 1}: {next_prompt}"))
138
+ logging.info(f"Moving to next step: {next_step}")
139
+ return new_history, new_workflow_state, gr.update(interactive=True)
140
+ except Exception as e:
141
+ logging.error(f"Error in process_workflow_step: {str(e)}")
142
+ return history, workflow_state, gr.update(interactive=True)
143
+
144
+ workflow_selector.change(
145
+ update_workflow_ui,
146
+ inputs=[workflow_selector],
147
+ outputs=[workflow_state, context_input, chatbot]
148
+ )
149
+
150
+ submit_btn.click(
151
+ process_workflow_step,
152
+ inputs=[msg, chatbot, context_input, workflow_selector, api_selector, api_key_input, workflow_state,
153
+ save_conversation, temperature],
154
+ outputs=[chatbot, workflow_state, msg]
155
+ ).then(
156
+ lambda: gr.update(value=""),
157
+ outputs=[msg]
158
+ )
159
+
160
+ clear_btn.click(
161
+ lambda: ([], {"current_step": 0, "max_steps": 0, "conversation_id": None}, ""),
162
+ outputs=[chatbot, workflow_state, context_input]
163
+ )
164
+
165
+ save_btn.click(
166
+ save_chat_history_to_db_wrapper,
167
+ inputs=[chatbot, conversation_id, media_content, chat_media_name],
168
+ outputs=[conversation_id, save_status]
169
+ )
170
+
171
+ search_conversations_btn.click(
172
+ search_conversations,
173
+ inputs=[conversation_search],
174
+ outputs=[previous_conversations]
175
+ )
176
+
177
+ load_conversations_btn.click(
178
+ lambda: ([], {"current_step": 0, "max_steps": 0, "conversation_id": None}, ""),
179
+ outputs=[chatbot, workflow_state, context_input]
180
+ ).then(
181
+ load_conversation,
182
+ inputs=[previous_conversations],
183
+ outputs=[chatbot, conversation_id]
184
+ )
185
+
186
+ return workflow_selector, api_selector, api_key_input, context_input, chatbot, msg, submit_btn, clear_btn, save_btn
187
+
188
+ #
189
+ # End of script
190
+ ############################################################################################################
App_Function_Libraries/Gradio_UI/Writing_tab.py CHANGED
@@ -4,11 +4,16 @@
4
  # Imports
5
  #
6
  # External Imports
 
 
7
  import gradio as gr
8
  import textstat
9
  #
10
  # Local Imports
11
  from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
 
 
 
12
  #
13
  ########################################################################################################################
14
  #
@@ -42,6 +47,16 @@ def grammar_style_check(input_text, custom_prompt, api_name, api_key, system_pro
42
 
43
  def create_grammar_style_check_tab():
44
  with gr.TabItem("Grammar and Style Check", visible=True):
 
 
 
 
 
 
 
 
 
 
45
  with gr.Row():
46
  with gr.Column():
47
  gr.Markdown("# Grammar and Style Check")
@@ -74,11 +89,11 @@ def create_grammar_style_check_tab():
74
  inputs=[custom_prompt_checkbox],
75
  outputs=[custom_prompt_input, system_prompt_input]
76
  )
 
77
  api_name_input = gr.Dropdown(
78
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
79
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace", "Custom-OpenAI-API"],
80
- value=None,
81
- label="API for Grammar Check"
82
  )
83
  api_key_input = gr.Textbox(label="API Key (if not set in Config_Files/config.txt)", placeholder="Enter your API key here",
84
  type="password")
@@ -302,63 +317,63 @@ def create_document_feedback_tab():
302
  with gr.Row():
303
  compare_button = gr.Button("Compare Feedback")
304
 
305
- feedback_history = gr.State([])
306
-
307
- def add_custom_persona(name, description):
308
- updated_choices = persona_dropdown.choices + [name]
309
- persona_prompts[name] = f"As {name}, {description}, provide feedback on the following text:"
310
- return gr.update(choices=updated_choices)
311
-
312
- def update_feedback_history(current_text, persona, feedback):
313
- # Ensure feedback_history.value is initialized and is a list
314
- if feedback_history.value is None:
315
- feedback_history.value = []
316
-
317
- history = feedback_history.value
318
-
319
- # Append the new entry to the history
320
- history.append({"text": current_text, "persona": persona, "feedback": feedback})
321
-
322
- # Keep only the last 5 entries in the history
323
- feedback_history.value = history[-10:]
324
-
325
- # Generate and return the updated HTML
326
- return generate_feedback_history_html(feedback_history.value)
327
-
328
- def compare_feedback(text, selected_personas, api_name, api_key):
329
- results = []
330
- for persona in selected_personas:
331
- feedback = generate_writing_feedback(text, persona, "Overall", api_name, api_key)
332
- results.append(f"### {persona}'s Feedback:\n{feedback}\n\n")
333
- return "\n".join(results)
334
-
335
- add_custom_persona_button.click(
336
- fn=add_custom_persona,
337
- inputs=[custom_persona_name, custom_persona_description],
338
- outputs=persona_dropdown
339
- )
340
-
341
- get_feedback_button.click(
342
- fn=lambda text, persona, aspect, api_name, api_key: (
343
- generate_writing_feedback(text, persona, aspect, api_name, api_key),
344
- calculate_readability(text),
345
- update_feedback_history(text, persona, generate_writing_feedback(text, persona, aspect, api_name, api_key))
346
- ),
347
- inputs=[input_text, persona_dropdown, aspect_dropdown, api_name_input, api_key_input],
348
- outputs=[feedback_output, readability_output, feedback_history_display]
349
- )
350
-
351
- compare_button.click(
352
- fn=compare_feedback,
353
- inputs=[input_text, compare_personas, api_name_input, api_key_input],
354
- outputs=feedback_output
355
- )
356
-
357
- generate_prompt_button.click(
358
- fn=generate_writing_prompt,
359
- inputs=[persona_dropdown, api_name_input, api_key_input],
360
- outputs=input_text
361
- )
362
 
363
  return input_text, feedback_output, readability_output, feedback_history_display
364
 
 
4
  # Imports
5
  #
6
  # External Imports
7
+ import logging
8
+
9
  import gradio as gr
10
  import textstat
11
  #
12
  # Local Imports
13
  from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
14
+ from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name
15
+
16
+
17
  #
18
  ########################################################################################################################
19
  #
 
47
 
48
  def create_grammar_style_check_tab():
49
  with gr.TabItem("Grammar and Style Check", visible=True):
50
+ try:
51
+ default_value = None
52
+ if default_api_endpoint:
53
+ if default_api_endpoint in global_api_endpoints:
54
+ default_value = format_api_name(default_api_endpoint)
55
+ else:
56
+ logging.warning(f"Default API endpoint '{default_api_endpoint}' not found in global_api_endpoints")
57
+ except Exception as e:
58
+ logging.error(f"Error setting default API endpoint: {str(e)}")
59
+ default_value = None
60
  with gr.Row():
61
  with gr.Column():
62
  gr.Markdown("# Grammar and Style Check")
 
89
  inputs=[custom_prompt_checkbox],
90
  outputs=[custom_prompt_input, system_prompt_input]
91
  )
92
+ # Refactored API selection dropdown
93
  api_name_input = gr.Dropdown(
94
+ choices=["None"] + [format_api_name(api) for api in global_api_endpoints],
95
+ value=default_value,
96
+ label="API for Analysis (Optional)"
 
97
  )
98
  api_key_input = gr.Textbox(label="API Key (if not set in Config_Files/config.txt)", placeholder="Enter your API key here",
99
  type="password")
 
317
  with gr.Row():
318
  compare_button = gr.Button("Compare Feedback")
319
 
320
+ feedback_history = gr.State([])
321
+
322
+ def add_custom_persona(name, description):
323
+ updated_choices = persona_dropdown.choices + [name]
324
+ persona_prompts[name] = f"As {name}, {description}, provide feedback on the following text:"
325
+ return gr.update(choices=updated_choices)
326
+
327
+ def update_feedback_history(current_text, persona, feedback):
328
+ # Ensure feedback_history.value is initialized and is a list
329
+ if feedback_history.value is None:
330
+ feedback_history.value = []
331
+
332
+ history = feedback_history.value
333
+
334
+ # Append the new entry to the history
335
+ history.append({"text": current_text, "persona": persona, "feedback": feedback})
336
+
337
+ # Keep only the last 5 entries in the history
338
+ feedback_history.value = history[-10:]
339
+
340
+ # Generate and return the updated HTML
341
+ return generate_feedback_history_html(feedback_history.value)
342
+
343
+ def compare_feedback(text, selected_personas, api_name, api_key):
344
+ results = []
345
+ for persona in selected_personas:
346
+ feedback = generate_writing_feedback(text, persona, "Overall", api_name, api_key)
347
+ results.append(f"### {persona}'s Feedback:\n{feedback}\n\n")
348
+ return "\n".join(results)
349
+
350
+ add_custom_persona_button.click(
351
+ fn=add_custom_persona,
352
+ inputs=[custom_persona_name, custom_persona_description],
353
+ outputs=persona_dropdown
354
+ )
355
+
356
+ get_feedback_button.click(
357
+ fn=lambda text, persona, aspect, api_name, api_key: (
358
+ generate_writing_feedback(text, persona, aspect, api_name, api_key),
359
+ calculate_readability(text),
360
+ update_feedback_history(text, persona, generate_writing_feedback(text, persona, aspect, api_name, api_key))
361
+ ),
362
+ inputs=[input_text, persona_dropdown, aspect_dropdown, api_name_input, api_key_input],
363
+ outputs=[feedback_output, readability_output, feedback_history_display]
364
+ )
365
+
366
+ compare_button.click(
367
+ fn=compare_feedback,
368
+ inputs=[input_text, compare_personas, api_name_input, api_key_input],
369
+ outputs=feedback_output
370
+ )
371
+
372
+ generate_prompt_button.click(
373
+ fn=generate_writing_prompt,
374
+ inputs=[persona_dropdown, api_name_input, api_key_input],
375
+ outputs=input_text
376
+ )
377
 
378
  return input_text, feedback_output, readability_output, feedback_history_display
379