oceansweep commited on
Commit
301516a
1 Parent(s): e2a1cf9

Delete App_Function_Libraries

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. App_Function_Libraries/Audio/Audio_Files.py +0 -786
  2. App_Function_Libraries/Audio/Audio_Transcription_Lib.py +0 -335
  3. App_Function_Libraries/Audio/Diarization_Lib.py +0 -275
  4. App_Function_Libraries/Audio/__init__.py +0 -0
  5. App_Function_Libraries/Benchmarks_Evaluations/Confabulation_check.py +0 -81
  6. App_Function_Libraries/Benchmarks_Evaluations/__init__.py +0 -0
  7. App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py +0 -498
  8. App_Function_Libraries/Books/Book_Ingestion_Lib.py +0 -577
  9. App_Function_Libraries/Books/__init__.py +0 -0
  10. App_Function_Libraries/Character_Chat/Character_Chat_Lib.py +0 -607
  11. App_Function_Libraries/Character_Chat/__init__.py +0 -0
  12. App_Function_Libraries/Chat.py +0 -439
  13. App_Function_Libraries/Chat_related_functions.py +0 -41
  14. App_Function_Libraries/Chunk_Lib.py +0 -1051
  15. App_Function_Libraries/DB/Character_Chat_DB.py +0 -702
  16. App_Function_Libraries/DB/DB_Manager.py +0 -991
  17. App_Function_Libraries/DB/RAG_QA_Chat_DB.py +0 -461
  18. App_Function_Libraries/DB/SQLite_DB.py +0 -0
  19. App_Function_Libraries/DB/Test_SQLite_DB.py +0 -202
  20. App_Function_Libraries/DB/__init__.py +0 -0
  21. App_Function_Libraries/Databases/chroma_db/chroma.sqlite3 +0 -0
  22. App_Function_Libraries/Databases/media_summary.db +0 -3
  23. App_Function_Libraries/Databases/prompts.db +0 -0
  24. App_Function_Libraries/Gradio_Related.py +0 -421
  25. App_Function_Libraries/Gradio_UI/Arxiv_tab.py +0 -230
  26. App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py +0 -167
  27. App_Function_Libraries/Gradio_UI/Backup_Functionality.py +0 -71
  28. App_Function_Libraries/Gradio_UI/Book_Ingestion_tab.py +0 -100
  29. App_Function_Libraries/Gradio_UI/Character_Chat_tab.py +0 -1848
  30. App_Function_Libraries/Gradio_UI/Character_Interaction_tab.py +0 -837
  31. App_Function_Libraries/Gradio_UI/Character_interaction_tab.py +0 -511
  32. App_Function_Libraries/Gradio_UI/Chat_Workflows.py +0 -178
  33. App_Function_Libraries/Gradio_UI/Chat_ui.py +0 -1185
  34. App_Function_Libraries/Gradio_UI/Config_tab.py +0 -51
  35. App_Function_Libraries/Gradio_UI/Embeddings_tab.py +0 -508
  36. App_Function_Libraries/Gradio_UI/Evaluations_Benchmarks_tab.py +0 -60
  37. App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py +0 -313
  38. App_Function_Libraries/Gradio_UI/Export_Functionality.py +0 -266
  39. App_Function_Libraries/Gradio_UI/Gradio_Shared.py +0 -285
  40. App_Function_Libraries/Gradio_UI/Import_Functionality.py +0 -388
  41. App_Function_Libraries/Gradio_UI/Introduction_tab.py +0 -167
  42. App_Function_Libraries/Gradio_UI/Keywords.py +0 -65
  43. App_Function_Libraries/Gradio_UI/Live_Recording.py +0 -142
  44. App_Function_Libraries/Gradio_UI/Llamafile_tab.py +0 -312
  45. App_Function_Libraries/Gradio_UI/MMLU_Pro_tab.py +0 -115
  46. App_Function_Libraries/Gradio_UI/Media_edit.py +0 -301
  47. App_Function_Libraries/Gradio_UI/Media_wiki_tab.py +0 -340
  48. App_Function_Libraries/Gradio_UI/PDF_ingestion_tab.py +0 -152
  49. App_Function_Libraries/Gradio_UI/Plaintext_tab_import.py +0 -116
  50. App_Function_Libraries/Gradio_UI/Podcast_tab.py +0 -163
App_Function_Libraries/Audio/Audio_Files.py DELETED
@@ -1,786 +0,0 @@
1
- # Audio_Files.py
2
- #########################################
3
- # Audio Processing Library
4
- # This library is used to download or load audio files from a local directory.
5
- #
6
- ####
7
- #
8
- # Functions:
9
- #
10
- # download_audio_file(url, save_path)
11
- # process_audio(
12
- # process_audio_file(audio_url, audio_file, whisper_model="small.en", api_name=None, api_key=None)
13
- #
14
- #
15
- #########################################
16
- # Imports
17
- import json
18
- import logging
19
- import os
20
- import subprocess
21
- import tempfile
22
- import time
23
- import uuid
24
- from datetime import datetime
25
- from pathlib import Path
26
- #
27
- # External Imports
28
- import requests
29
- import yt_dlp
30
- #
31
- # Local Imports
32
- from App_Function_Libraries.DB.DB_Manager import add_media_with_keywords, \
33
- check_media_and_whisper_model
34
- from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
35
- from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
36
- from App_Function_Libraries.Utils.Utils import downloaded_files, \
37
- sanitize_filename, generate_unique_id, temp_files
38
- from App_Function_Libraries.Video_DL_Ingestion_Lib import extract_metadata
39
- from App_Function_Libraries.Audio.Audio_Transcription_Lib import speech_to_text
40
- from App_Function_Libraries.Chunk_Lib import improved_chunking_process
41
- #
42
- #######################################################################################################################
43
- # Function Definitions
44
- #
45
-
46
- MAX_FILE_SIZE = 500 * 1024 * 1024
47
-
48
-
49
- def download_audio_file(url, current_whisper_model="", use_cookies=False, cookies=None):
50
- try:
51
- # Check if media already exists in the database and compare whisper models
52
- should_download, reason = check_media_and_whisper_model(
53
- url=url,
54
- current_whisper_model=current_whisper_model
55
- )
56
-
57
- if not should_download:
58
- logging.info(f"Skipping audio download: {reason}")
59
- return None
60
-
61
- logging.info(f"Proceeding with audio download: {reason}")
62
-
63
- # Set up the request headers
64
- headers = {}
65
- if use_cookies and cookies:
66
- try:
67
- cookie_dict = json.loads(cookies)
68
- headers['Cookie'] = '; '.join([f'{k}={v}' for k, v in cookie_dict.items()])
69
- except json.JSONDecodeError:
70
- logging.warning("Invalid cookie format. Proceeding without cookies.")
71
-
72
- # Make the request
73
- response = requests.get(url, headers=headers, stream=True)
74
- # Raise an exception for bad status codes
75
- response.raise_for_status()
76
-
77
- # Get the file size
78
- file_size = int(response.headers.get('content-length', 0))
79
- if file_size > 500 * 1024 * 1024: # 500 MB limit
80
- raise ValueError("File size exceeds the 500MB limit.")
81
-
82
- # Generate a unique filename
83
- file_name = f"audio_{uuid.uuid4().hex[:8]}.mp3"
84
- save_path = os.path.join('downloads', file_name)
85
-
86
- # Ensure the downloads directory exists
87
- os.makedirs('downloads', exist_ok=True)
88
-
89
-
90
- # Download the file
91
- with open(save_path, 'wb') as f:
92
- for chunk in response.iter_content(chunk_size=8192):
93
- if chunk:
94
- f.write(chunk)
95
-
96
- logging.info(f"Audio file downloaded successfully: {save_path}")
97
- return save_path
98
-
99
- except requests.RequestException as e:
100
- logging.error(f"Error downloading audio file: {str(e)}")
101
- raise
102
- except ValueError as e:
103
- logging.error(str(e))
104
- raise
105
- except Exception as e:
106
- logging.error(f"Unexpected error downloading audio file: {str(e)}")
107
- raise
108
-
109
- def process_audio_files(audio_urls, audio_file, whisper_model, api_name, api_key, use_cookies, cookies, keep_original,
110
- custom_keywords, custom_prompt_input, chunk_method, max_chunk_size, chunk_overlap,
111
- use_adaptive_chunking, use_multi_level_chunking, chunk_language, diarize,
112
- keep_timestamps, custom_title):
113
-
114
- start_time = time.time() # Start time for processing
115
- processed_count = 0
116
- failed_count = 0
117
- progress = []
118
- all_transcriptions = []
119
- all_summaries = []
120
- #v2
121
- def format_transcription_with_timestamps(segments):
122
- if keep_timestamps:
123
- formatted_segments = []
124
- for segment in segments:
125
- start = segment.get('Time_Start', 0)
126
- end = segment.get('Time_End', 0)
127
- text = segment.get('Text', '').strip() # Ensure text is stripped of leading/trailing spaces
128
-
129
- # Add the formatted timestamp and text to the list, followed by a newline
130
- formatted_segments.append(f"[{start:.2f}-{end:.2f}] {text}")
131
-
132
- # Join the segments with a newline to ensure proper formatting
133
- return "\n".join(formatted_segments)
134
- else:
135
- # Join the text without timestamps
136
- return "\n".join([segment.get('Text', '').strip() for segment in segments])
137
-
138
- def update_progress(message):
139
- progress.append(message)
140
- return "\n".join(progress)
141
-
142
- def cleanup_files():
143
- for file in temp_files:
144
- try:
145
- if os.path.exists(file):
146
- os.remove(file)
147
- update_progress(f"Temporary file {file} removed.")
148
- except Exception as e:
149
- update_progress(f"Failed to remove temporary file {file}: {str(e)}")
150
-
151
- def reencode_mp3(mp3_file_path):
152
- try:
153
- reencoded_mp3_path = mp3_file_path.replace(".mp3", "_reencoded.mp3")
154
- subprocess.run([ffmpeg_cmd, '-i', mp3_file_path, '-codec:a', 'libmp3lame', reencoded_mp3_path], check=True)
155
- update_progress(f"Re-encoded {mp3_file_path} to {reencoded_mp3_path}.")
156
- return reencoded_mp3_path
157
- except subprocess.CalledProcessError as e:
158
- update_progress(f"Error re-encoding {mp3_file_path}: {str(e)}")
159
- raise
160
-
161
- def convert_mp3_to_wav(mp3_file_path):
162
- try:
163
- wav_file_path = mp3_file_path.replace(".mp3", ".wav")
164
- subprocess.run([ffmpeg_cmd, '-i', mp3_file_path, wav_file_path], check=True)
165
- update_progress(f"Converted {mp3_file_path} to {wav_file_path}.")
166
- return wav_file_path
167
- except subprocess.CalledProcessError as e:
168
- update_progress(f"Error converting {mp3_file_path} to WAV: {str(e)}")
169
- raise
170
-
171
- try:
172
- # Check and set the ffmpeg command
173
- global ffmpeg_cmd
174
- if os.name == "nt":
175
- logging.debug("Running on Windows")
176
- ffmpeg_cmd = os.path.join(os.getcwd(), "Bin", "ffmpeg.exe")
177
- else:
178
- ffmpeg_cmd = 'ffmpeg' # Assume 'ffmpeg' is in PATH for non-Windows systems
179
-
180
- # Ensure ffmpeg is accessible
181
- if not os.path.exists(ffmpeg_cmd) and os.name == "nt":
182
- raise FileNotFoundError(f"ffmpeg executable not found at path: {ffmpeg_cmd}")
183
-
184
- # Define chunk options early to avoid undefined errors
185
- chunk_options = {
186
- 'method': chunk_method,
187
- 'max_size': max_chunk_size,
188
- 'overlap': chunk_overlap,
189
- 'adaptive': use_adaptive_chunking,
190
- 'multi_level': use_multi_level_chunking,
191
- 'language': chunk_language
192
- }
193
-
194
- # Process multiple URLs
195
- urls = [url.strip() for url in audio_urls.split('\n') if url.strip()]
196
-
197
- for i, url in enumerate(urls):
198
- update_progress(f"Processing URL {i + 1}/{len(urls)}: {url}")
199
-
200
- # Download and process audio file
201
- audio_file_path = download_audio_file(url, use_cookies, cookies)
202
- if not os.path.exists(audio_file_path):
203
- update_progress(f"Downloaded file not found: {audio_file_path}")
204
- failed_count += 1
205
- log_counter(
206
- metric_name="audio_files_failed_total",
207
- labels={"whisper_model": whisper_model, "api_name": api_name},
208
- value=1
209
- )
210
- continue
211
-
212
- temp_files.append(audio_file_path)
213
- update_progress("Audio file downloaded successfully.")
214
-
215
- # Re-encode MP3 to fix potential issues
216
- reencoded_mp3_path = reencode_mp3(audio_file_path)
217
- if not os.path.exists(reencoded_mp3_path):
218
- update_progress(f"Re-encoded file not found: {reencoded_mp3_path}")
219
- failed_count += 1
220
- log_counter(
221
- metric_name="audio_files_failed_total",
222
- labels={"whisper_model": whisper_model, "api_name": api_name},
223
- value=1
224
- )
225
- continue
226
-
227
- temp_files.append(reencoded_mp3_path)
228
-
229
- # Convert re-encoded MP3 to WAV
230
- wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
231
- if not os.path.exists(wav_file_path):
232
- update_progress(f"Converted WAV file not found: {wav_file_path}")
233
- failed_count += 1
234
- log_counter(
235
- metric_name="audio_files_failed_total",
236
- labels={"whisper_model": whisper_model, "api_name": api_name},
237
- value=1
238
- )
239
- continue
240
-
241
- temp_files.append(wav_file_path)
242
-
243
- # Initialize transcription
244
- transcription = ""
245
-
246
- # Transcribe audio
247
- if diarize:
248
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=True)
249
- else:
250
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model)
251
-
252
- # Handle segments nested under 'segments' key
253
- if isinstance(segments, dict) and 'segments' in segments:
254
- segments = segments['segments']
255
-
256
- if isinstance(segments, list):
257
- # Log first 5 segments for debugging
258
- logging.debug(f"Segments before formatting: {segments[:5]}")
259
- transcription = format_transcription_with_timestamps(segments)
260
- logging.debug(f"Formatted transcription (first 500 chars): {transcription[:500]}")
261
- update_progress("Audio transcribed successfully.")
262
- else:
263
- update_progress("Unexpected segments format received from speech_to_text.")
264
- logging.error(f"Unexpected segments format: {segments}")
265
- failed_count += 1
266
- log_counter(
267
- metric_name="audio_files_failed_total",
268
- labels={"whisper_model": whisper_model, "api_name": api_name},
269
- value=1
270
- )
271
- continue
272
-
273
- if not transcription.strip():
274
- update_progress("Transcription is empty.")
275
- failed_count += 1
276
- log_counter(
277
- metric_name="audio_files_failed_total",
278
- labels={"whisper_model": whisper_model, "api_name": api_name},
279
- value=1
280
- )
281
- else:
282
- # Apply chunking
283
- chunked_text = improved_chunking_process(transcription, chunk_options)
284
-
285
- # Summarize
286
- logging.debug(f"Audio Transcription API Name: {api_name}")
287
- if api_name:
288
- try:
289
- summary = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
290
- update_progress("Audio summarized successfully.")
291
- except Exception as e:
292
- logging.error(f"Error during summarization: {str(e)}")
293
- summary = "Summary generation failed"
294
- failed_count += 1
295
- log_counter(
296
- metric_name="audio_files_failed_total",
297
- labels={"whisper_model": whisper_model, "api_name": api_name},
298
- value=1
299
- )
300
- else:
301
- summary = "No summary available (API not provided)"
302
-
303
- all_transcriptions.append(transcription)
304
- all_summaries.append(summary)
305
-
306
- # Use custom_title if provided, otherwise use the original filename
307
- title = custom_title if custom_title else os.path.basename(wav_file_path)
308
-
309
- # Add to database
310
- add_media_with_keywords(
311
- url=url,
312
- title=title,
313
- media_type='audio',
314
- content=transcription,
315
- keywords=custom_keywords,
316
- prompt=custom_prompt_input,
317
- summary=summary,
318
- transcription_model=whisper_model,
319
- author="Unknown",
320
- ingestion_date=datetime.now().strftime('%Y-%m-%d')
321
- )
322
- update_progress("Audio file processed and added to database.")
323
- processed_count += 1
324
- log_counter(
325
- metric_name="audio_files_processed_total",
326
- labels={"whisper_model": whisper_model, "api_name": api_name},
327
- value=1
328
- )
329
-
330
- # Process uploaded file if provided
331
- if audio_file:
332
- url = generate_unique_id()
333
- if os.path.getsize(audio_file.name) > MAX_FILE_SIZE:
334
- update_progress(
335
- f"Uploaded file size exceeds the maximum limit of {MAX_FILE_SIZE / (1024 * 1024):.2f}MB. Skipping this file.")
336
- else:
337
- try:
338
- # Re-encode MP3 to fix potential issues
339
- reencoded_mp3_path = reencode_mp3(audio_file.name)
340
- if not os.path.exists(reencoded_mp3_path):
341
- update_progress(f"Re-encoded file not found: {reencoded_mp3_path}")
342
- return update_progress("Processing failed: Re-encoded file not found"), "", ""
343
-
344
- temp_files.append(reencoded_mp3_path)
345
-
346
- # Convert re-encoded MP3 to WAV
347
- wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
348
- if not os.path.exists(wav_file_path):
349
- update_progress(f"Converted WAV file not found: {wav_file_path}")
350
- return update_progress("Processing failed: Converted WAV file not found"), "", ""
351
-
352
- temp_files.append(wav_file_path)
353
-
354
- # Initialize transcription
355
- transcription = ""
356
-
357
- if diarize:
358
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=True)
359
- else:
360
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model)
361
-
362
- # Handle segments nested under 'segments' key
363
- if isinstance(segments, dict) and 'segments' in segments:
364
- segments = segments['segments']
365
-
366
- if isinstance(segments, list):
367
- transcription = format_transcription_with_timestamps(segments)
368
- else:
369
- update_progress("Unexpected segments format received from speech_to_text.")
370
- logging.error(f"Unexpected segments format: {segments}")
371
-
372
- chunked_text = improved_chunking_process(transcription, chunk_options)
373
-
374
- logging.debug(f"Audio Transcription API Name: {api_name}")
375
- if api_name:
376
- try:
377
- summary = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
378
- update_progress("Audio summarized successfully.")
379
- except Exception as e:
380
- logging.error(f"Error during summarization: {str(e)}")
381
- summary = "Summary generation failed"
382
- else:
383
- summary = "No summary available (API not provided)"
384
-
385
- all_transcriptions.append(transcription)
386
- all_summaries.append(summary)
387
-
388
- # Use custom_title if provided, otherwise use the original filename
389
- title = custom_title if custom_title else os.path.basename(wav_file_path)
390
-
391
- add_media_with_keywords(
392
- url="Uploaded File",
393
- title=title,
394
- media_type='audio',
395
- content=transcription,
396
- keywords=custom_keywords,
397
- prompt=custom_prompt_input,
398
- summary=summary,
399
- transcription_model=whisper_model,
400
- author="Unknown",
401
- ingestion_date=datetime.now().strftime('%Y-%m-%d')
402
- )
403
- update_progress("Uploaded file processed and added to database.")
404
- processed_count += 1
405
- log_counter(
406
- metric_name="audio_files_processed_total",
407
- labels={"whisper_model": whisper_model, "api_name": api_name},
408
- value=1
409
- )
410
- except Exception as e:
411
- update_progress(f"Error processing uploaded file: {str(e)}")
412
- logging.error(f"Error processing uploaded file: {str(e)}")
413
- failed_count += 1
414
- log_counter(
415
- metric_name="audio_files_failed_total",
416
- labels={"whisper_model": whisper_model, "api_name": api_name},
417
- value=1
418
- )
419
- return update_progress("Processing failed: Error processing uploaded file"), "", ""
420
- # Final cleanup
421
- if not keep_original:
422
- cleanup_files()
423
-
424
- end_time = time.time()
425
- processing_time = end_time - start_time
426
- # Log processing time
427
- log_histogram(
428
- metric_name="audio_processing_time_seconds",
429
- value=processing_time,
430
- labels={"whisper_model": whisper_model, "api_name": api_name}
431
- )
432
-
433
- # Optionally, log total counts
434
- log_counter(
435
- metric_name="total_audio_files_processed",
436
- labels={"whisper_model": whisper_model, "api_name": api_name},
437
- value=processed_count
438
- )
439
-
440
- log_counter(
441
- metric_name="total_audio_files_failed",
442
- labels={"whisper_model": whisper_model, "api_name": api_name},
443
- value=failed_count
444
- )
445
-
446
-
447
- final_progress = update_progress("All processing complete.")
448
- final_transcriptions = "\n\n".join(all_transcriptions)
449
- final_summaries = "\n\n".join(all_summaries)
450
-
451
- return final_progress, final_transcriptions, final_summaries
452
-
453
- except Exception as e:
454
- logging.error(f"Error processing audio files: {str(e)}")
455
- log_counter(
456
- metric_name="audio_files_failed_total",
457
- labels={"whisper_model": whisper_model, "api_name": api_name},
458
- value=1
459
- )
460
- cleanup_files()
461
- return update_progress(f"Processing failed: {str(e)}"), "", ""
462
-
463
-
464
- def format_transcription_with_timestamps(segments, keep_timestamps):
465
- """
466
- Formats the transcription segments with or without timestamps.
467
-
468
- Parameters:
469
- segments (list): List of transcription segments.
470
- keep_timestamps (bool): Whether to include timestamps.
471
-
472
- Returns:
473
- str: Formatted transcription.
474
- """
475
- if keep_timestamps:
476
- formatted_segments = []
477
- for segment in segments:
478
- start = segment.get('Time_Start', 0)
479
- end = segment.get('Time_End', 0)
480
- text = segment.get('Text', '').strip()
481
-
482
- formatted_segments.append(f"[{start:.2f}-{end:.2f}] {text}")
483
- return "\n".join(formatted_segments)
484
- else:
485
- return "\n".join([segment.get('Text', '').strip() for segment in segments])
486
-
487
-
488
- def download_youtube_audio(url):
489
- try:
490
- # Determine ffmpeg path based on the operating system.
491
- ffmpeg_path = './Bin/ffmpeg.exe' if os.name == 'nt' else 'ffmpeg'
492
-
493
- # Create a temporary directory
494
- with tempfile.TemporaryDirectory() as temp_dir:
495
- # Extract information about the video
496
- with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
497
- info_dict = ydl.extract_info(url, download=False)
498
- sanitized_title = sanitize_filename(info_dict['title'])
499
-
500
- # Setup the temporary filenames
501
- temp_video_path = Path(temp_dir) / f"{sanitized_title}_temp.mp4"
502
- temp_audio_path = Path(temp_dir) / f"{sanitized_title}.mp3"
503
-
504
- # Initialize yt-dlp with options for downloading
505
- ydl_opts = {
506
- 'format': 'bestaudio[ext=m4a]/best[height<=480]', # Prefer best audio, or video up to 480p
507
- 'ffmpeg_location': ffmpeg_path,
508
- 'outtmpl': str(temp_video_path),
509
- 'noplaylist': True,
510
- 'quiet': True
511
- }
512
-
513
- # Execute yt-dlp to download the video/audio
514
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
515
- ydl.download([url])
516
-
517
- # Check if the file exists
518
- if not temp_video_path.exists():
519
- raise FileNotFoundError(f"Expected file was not found: {temp_video_path}")
520
-
521
- # Use ffmpeg to extract audio
522
- ffmpeg_command = [
523
- ffmpeg_path,
524
- '-i', str(temp_video_path),
525
- '-vn', # No video
526
- '-acodec', 'libmp3lame',
527
- '-b:a', '192k',
528
- str(temp_audio_path)
529
- ]
530
- subprocess.run(ffmpeg_command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
531
-
532
- # Check if the audio file was created
533
- if not temp_audio_path.exists():
534
- raise FileNotFoundError(f"Expected audio file was not found: {temp_audio_path}")
535
-
536
- # Create a persistent directory for the download if it doesn't exist
537
- persistent_dir = Path("downloads")
538
- persistent_dir.mkdir(exist_ok=True)
539
-
540
- # Move the file from the temporary directory to the persistent directory
541
- persistent_file_path = persistent_dir / f"{sanitized_title}.mp3"
542
- os.replace(str(temp_audio_path), str(persistent_file_path))
543
-
544
- # Add the file to the list of downloaded files
545
- downloaded_files.append(str(persistent_file_path))
546
-
547
- return str(persistent_file_path), f"Audio downloaded successfully: {sanitized_title}.mp3"
548
- except Exception as e:
549
- return None, f"Error downloading audio: {str(e)}"
550
-
551
-
552
- def process_podcast(url, title, author, keywords, custom_prompt, api_name, api_key, whisper_model,
553
- keep_original=False, enable_diarization=False, use_cookies=False, cookies=None,
554
- chunk_method=None, max_chunk_size=300, chunk_overlap=0, use_adaptive_chunking=False,
555
- use_multi_level_chunking=False, chunk_language='english', keep_timestamps=True):
556
- """
557
- Processes a podcast by downloading the audio, transcribing it, summarizing the transcription,
558
- and adding the results to the database. Metrics are logged throughout the process.
559
-
560
- Parameters:
561
- url (str): URL of the podcast.
562
- title (str): Title of the podcast.
563
- author (str): Author of the podcast.
564
- keywords (str): Comma-separated keywords.
565
- custom_prompt (str): Custom prompt for summarization.
566
- api_name (str): API name for summarization.
567
- api_key (str): API key for summarization.
568
- whisper_model (str): Whisper model to use for transcription.
569
- keep_original (bool): Whether to keep the original audio file.
570
- enable_diarization (bool): Whether to enable speaker diarization.
571
- use_cookies (bool): Whether to use cookies for authenticated downloads.
572
- cookies (str): JSON-formatted cookies string.
573
- chunk_method (str): Method for chunking text.
574
- max_chunk_size (int): Maximum size for each text chunk.
575
- chunk_overlap (int): Overlap size between chunks.
576
- use_adaptive_chunking (bool): Whether to use adaptive chunking.
577
- use_multi_level_chunking (bool): Whether to use multi-level chunking.
578
- chunk_language (str): Language for chunking.
579
- keep_timestamps (bool): Whether to keep timestamps in transcription.
580
-
581
- Returns:
582
- tuple: (progress_message, transcription, summary, title, author, keywords, error_message)
583
- """
584
- start_time = time.time() # Start time for processing
585
- error_message = ""
586
- temp_files = []
587
-
588
- # Define labels for metrics
589
- labels = {
590
- "whisper_model": whisper_model,
591
- "api_name": api_name if api_name else "None"
592
- }
593
-
594
- def update_progress(message):
595
- """
596
- Updates the progress messages.
597
-
598
- Parameters:
599
- message (str): Progress message to append.
600
-
601
- Returns:
602
- str: Combined progress messages.
603
- """
604
- progress.append(message)
605
- return "\n".join(progress)
606
-
607
- def cleanup_files():
608
- if not keep_original:
609
- for file in temp_files:
610
- try:
611
- if os.path.exists(file):
612
- os.remove(file)
613
- update_progress(f"Temporary file {file} removed.")
614
- except Exception as e:
615
- update_progress(f"Failed to remove temporary file {file}: {str(e)}")
616
-
617
- progress = [] # Initialize progress messages
618
-
619
- try:
620
- # Handle cookies if required
621
- if use_cookies:
622
- cookies = json.loads(cookies)
623
-
624
- # Download the podcast audio file
625
- audio_file = download_audio_file(url, whisper_model, use_cookies, cookies)
626
- if not audio_file:
627
- raise RuntimeError("Failed to download podcast audio.")
628
- temp_files.append(audio_file)
629
- update_progress("Podcast downloaded successfully.")
630
-
631
- # Extract metadata from the podcast
632
- metadata = extract_metadata(url)
633
- title = title or metadata.get('title', 'Unknown Podcast')
634
- author = author or metadata.get('uploader', 'Unknown Author')
635
-
636
- # Format metadata for storage
637
- metadata_text = f"""
638
- Metadata:
639
- Title: {title}
640
- Author: {author}
641
- Series: {metadata.get('series', 'N/A')}
642
- Episode: {metadata.get('episode', 'N/A')}
643
- Season: {metadata.get('season', 'N/A')}
644
- Upload Date: {metadata.get('upload_date', 'N/A')}
645
- Duration: {metadata.get('duration', 'N/A')} seconds
646
- Description: {metadata.get('description', 'N/A')}
647
- """
648
-
649
- # Update keywords with metadata information
650
- new_keywords = []
651
- if metadata.get('series'):
652
- new_keywords.append(f"series:{metadata['series']}")
653
- if metadata.get('episode'):
654
- new_keywords.append(f"episode:{metadata['episode']}")
655
- if metadata.get('season'):
656
- new_keywords.append(f"season:{metadata['season']}")
657
-
658
- keywords = f"{keywords},{','.join(new_keywords)}" if keywords else ','.join(new_keywords)
659
- update_progress(f"Metadata extracted - Title: {title}, Author: {author}, Keywords: {keywords}")
660
-
661
- # Transcribe the podcast audio
662
- try:
663
- if enable_diarization:
664
- segments = speech_to_text(audio_file, whisper_model=whisper_model, diarize=True)
665
- else:
666
- segments = speech_to_text(audio_file, whisper_model=whisper_model)
667
- # SEems like this could be optimized... FIXME
668
- def format_segment(segment):
669
- start = segment.get('start', 0)
670
- end = segment.get('end', 0)
671
- text = segment.get('Text', '')
672
-
673
- if isinstance(segments, dict) and 'segments' in segments:
674
- segments = segments['segments']
675
-
676
- if isinstance(segments, list):
677
- transcription = format_transcription_with_timestamps(segments, keep_timestamps)
678
- update_progress("Podcast transcribed successfully.")
679
- else:
680
- raise ValueError("Unexpected segments format received from speech_to_text.")
681
-
682
- if not transcription.strip():
683
- raise ValueError("Transcription is empty.")
684
- except Exception as e:
685
- error_message = f"Transcription failed: {str(e)}"
686
- raise RuntimeError(error_message)
687
-
688
- # Apply chunking to the transcription
689
- chunk_options = {
690
- 'method': chunk_method,
691
- 'max_size': max_chunk_size,
692
- 'overlap': chunk_overlap,
693
- 'adaptive': use_adaptive_chunking,
694
- 'multi_level': use_multi_level_chunking,
695
- 'language': chunk_language
696
- }
697
- chunked_text = improved_chunking_process(transcription, chunk_options)
698
-
699
- # Combine metadata and transcription
700
- full_content = metadata_text + "\n\nTranscription:\n" + transcription
701
-
702
- # Summarize the transcription if API is provided
703
- summary = None
704
- if api_name:
705
- try:
706
- summary = perform_summarization(api_name, chunked_text, custom_prompt, api_key)
707
- update_progress("Podcast summarized successfully.")
708
- except Exception as e:
709
- error_message = f"Summarization failed: {str(e)}"
710
- raise RuntimeError(error_message)
711
- else:
712
- summary = "No summary available (API not provided)"
713
-
714
- # Add the processed podcast to the database
715
- try:
716
- add_media_with_keywords(
717
- url=url,
718
- title=title,
719
- media_type='podcast',
720
- content=full_content,
721
- keywords=keywords,
722
- prompt=custom_prompt,
723
- summary=summary or "No summary available",
724
- transcription_model=whisper_model,
725
- author=author,
726
- ingestion_date=datetime.now().strftime('%Y-%m-%d')
727
- )
728
- update_progress("Podcast added to database successfully.")
729
- except Exception as e:
730
- error_message = f"Error adding podcast to database: {str(e)}"
731
- raise RuntimeError(error_message)
732
-
733
- # Cleanup temporary files if required
734
- cleanup_files()
735
-
736
- # Calculate processing time
737
- end_time = time.time()
738
- processing_time = end_time - start_time
739
-
740
- # Log successful processing
741
- log_counter(
742
- metric_name="podcasts_processed_total",
743
- labels=labels,
744
- value=1
745
- )
746
-
747
- # Log processing time
748
- log_histogram(
749
- metric_name="podcast_processing_time_seconds",
750
- value=processing_time,
751
- labels=labels
752
- )
753
-
754
- # Return the final outputs
755
- final_progress = update_progress("Processing complete.")
756
- return (final_progress, full_content, summary or "No summary generated.",
757
- title, author, keywords, error_message)
758
-
759
- except Exception as e:
760
- # Calculate processing time up to the point of failure
761
- end_time = time.time()
762
- processing_time = end_time - start_time
763
-
764
- # Log failed processing
765
- log_counter(
766
- metric_name="podcasts_failed_total",
767
- labels=labels,
768
- value=1
769
- )
770
-
771
- # Log processing time even on failure
772
- log_histogram(
773
- metric_name="podcast_processing_time_seconds",
774
- value=processing_time,
775
- labels=labels
776
- )
777
-
778
- logging.error(f"Error processing podcast: {str(e)}")
779
- cleanup_files()
780
- final_progress = update_progress(f"Processing failed: {str(e)}")
781
- return (final_progress, "", "", "", "", "", str(e))
782
-
783
-
784
- #
785
- #
786
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Audio/Audio_Transcription_Lib.py DELETED
@@ -1,335 +0,0 @@
1
- # Audio_Transcription_Lib.py
2
- #########################################
3
- # Transcription Library
4
- # This library is used to perform transcription of audio files.
5
- # Currently, uses faster_whisper for transcription.
6
- #
7
- ####################
8
- # Function List
9
- #
10
- # 1. convert_to_wav(video_file_path, offset=0, overwrite=False)
11
- # 2. speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='small.en', vad_filter=False)
12
- #
13
- ####################
14
- #
15
- # Import necessary libraries to run solo for testing
16
- import gc
17
- import json
18
- import logging
19
- import multiprocessing
20
- import os
21
- import queue
22
- import sys
23
- import subprocess
24
- import tempfile
25
- import threading
26
- import time
27
- # DEBUG Imports
28
- #from memory_profiler import profile
29
- import pyaudio
30
- from faster_whisper import WhisperModel as OriginalWhisperModel
31
- from typing import Optional, Union, List, Dict, Any
32
- #
33
- # Import Local
34
- from App_Function_Libraries.Utils.Utils import load_comprehensive_config
35
- from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
36
- #
37
- #######################################################################################################################
38
- # Function Definitions
39
- #
40
-
41
- # Convert video .m4a into .wav using ffmpeg
42
- # ffmpeg -i "example.mp4" -ar 16000 -ac 1 -c:a pcm_s16le "output.wav"
43
- # https://www.gyan.dev/ffmpeg/builds/
44
- #
45
-
46
-
47
- whisper_model_instance = None
48
- config = load_comprehensive_config()
49
- processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
50
- total_thread_count = multiprocessing.cpu_count()
51
-
52
-
53
- class WhisperModel(OriginalWhisperModel):
54
- tldw_dir = os.path.dirname(os.path.dirname(__file__))
55
- default_download_root = os.path.join(tldw_dir, 'models', 'Whisper')
56
-
57
- valid_model_sizes = [
58
- "tiny.en", "tiny", "base.en", "base", "small.en", "small", "medium.en", "medium",
59
- "large-v1", "large-v2", "large-v3", "large", "distil-large-v2", "distil-medium.en",
60
- "distil-small.en", "distil-large-v3",
61
- ]
62
-
63
- def __init__(
64
- self,
65
- model_size_or_path: str,
66
- device: str = processing_choice,
67
- device_index: Union[int, List[int]] = 0,
68
- compute_type: str = "default",
69
- cpu_threads: int = 0,#total_thread_count, FIXME - I think this should be 0
70
- num_workers: int = 1,
71
- download_root: Optional[str] = None,
72
- local_files_only: bool = False,
73
- files: Optional[Dict[str, Any]] = None,
74
- **model_kwargs: Any
75
- ):
76
- if download_root is None:
77
- download_root = self.default_download_root
78
-
79
- os.makedirs(download_root, exist_ok=True)
80
-
81
- # FIXME - validate....
82
- # Also write an integration test...
83
- # Check if model_size_or_path is a valid model size
84
- if model_size_or_path in self.valid_model_sizes:
85
- # It's a model size, so we'll use the download_root
86
- model_path = os.path.join(download_root, model_size_or_path)
87
- if not os.path.isdir(model_path):
88
- # If it doesn't exist, we'll let the parent class download it
89
- model_size_or_path = model_size_or_path # Keep the original model size
90
- else:
91
- # If it exists, use the full path
92
- model_size_or_path = model_path
93
- else:
94
- # It's not a valid model size, so assume it's a path
95
- model_size_or_path = os.path.abspath(model_size_or_path)
96
-
97
- super().__init__(
98
- model_size_or_path,
99
- device=device,
100
- device_index=device_index,
101
- compute_type=compute_type,
102
- cpu_threads=cpu_threads,
103
- num_workers=num_workers,
104
- download_root=download_root,
105
- local_files_only=local_files_only,
106
- # Maybe? idk, FIXME
107
- # files=files,
108
- # **model_kwargs
109
- )
110
-
111
- def get_whisper_model(model_name, device):
112
- global whisper_model_instance
113
- if whisper_model_instance is None:
114
- logging.info(f"Initializing new WhisperModel with size {model_name} on device {device}")
115
- whisper_model_instance = WhisperModel(model_name, device=device)
116
- return whisper_model_instance
117
-
118
- # os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
119
- #DEBUG
120
- #@profile
121
- def convert_to_wav(video_file_path, offset=0, overwrite=False):
122
- log_counter("convert_to_wav_attempt", labels={"file_path": video_file_path})
123
- start_time = time.time()
124
-
125
- out_path = os.path.splitext(video_file_path)[0] + ".wav"
126
-
127
- if os.path.exists(out_path) and not overwrite:
128
- print(f"File '{out_path}' already exists. Skipping conversion.")
129
- logging.info(f"Skipping conversion as file already exists: {out_path}")
130
- log_counter("convert_to_wav_skipped", labels={"file_path": video_file_path})
131
- return out_path
132
-
133
- print("Starting conversion process of .m4a to .WAV")
134
- out_path = os.path.splitext(video_file_path)[0] + ".wav"
135
-
136
- try:
137
- if os.name == "nt":
138
- logging.debug("ffmpeg being ran on windows")
139
-
140
- if sys.platform.startswith('win'):
141
- ffmpeg_cmd = ".\\Bin\\ffmpeg.exe"
142
- logging.debug(f"ffmpeg_cmd: {ffmpeg_cmd}")
143
- else:
144
- ffmpeg_cmd = 'ffmpeg' # Assume 'ffmpeg' is in PATH for non-Windows systems
145
-
146
- command = [
147
- ffmpeg_cmd, # Assuming the working directory is correctly set where .\Bin exists
148
- "-ss", "00:00:00", # Start at the beginning of the video
149
- "-i", video_file_path,
150
- "-ar", "16000", # Audio sample rate
151
- "-ac", "1", # Number of audio channels
152
- "-c:a", "pcm_s16le", # Audio codec
153
- out_path
154
- ]
155
- try:
156
- # Redirect stdin from null device to prevent ffmpeg from waiting for input
157
- with open(os.devnull, 'rb') as null_file:
158
- result = subprocess.run(command, stdin=null_file, text=True, capture_output=True)
159
- if result.returncode == 0:
160
- logging.info("FFmpeg executed successfully")
161
- logging.debug("FFmpeg output: %s", result.stdout)
162
- else:
163
- logging.error("Error in running FFmpeg")
164
- logging.error("FFmpeg stderr: %s", result.stderr)
165
- raise RuntimeError(f"FFmpeg error: {result.stderr}")
166
- except Exception as e:
167
- logging.error("Error occurred - ffmpeg doesn't like windows")
168
- raise RuntimeError("ffmpeg failed")
169
- elif os.name == "posix":
170
- os.system(f'ffmpeg -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
171
- else:
172
- raise RuntimeError("Unsupported operating system")
173
- logging.info("Conversion to WAV completed: %s", out_path)
174
- log_counter("convert_to_wav_success", labels={"file_path": video_file_path})
175
- except Exception as e:
176
- logging.error("speech-to-text: Error transcribing audio: %s", str(e))
177
- log_counter("convert_to_wav_error", labels={"file_path": video_file_path, "error": str(e)})
178
- return {"error": str(e)}
179
-
180
- conversion_time = time.time() - start_time
181
- log_histogram("convert_to_wav_duration", conversion_time, labels={"file_path": video_file_path})
182
-
183
- gc.collect()
184
- return out_path
185
-
186
-
187
- # Transcribe .wav into .segments.json
188
- #DEBUG
189
- #@profile
190
- # FIXME - I feel like the `vad_filter` shoudl be enabled by default....
191
- def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='medium.en', vad_filter=False, diarize=False):
192
- log_counter("speech_to_text_attempt", labels={"file_path": audio_file_path, "model": whisper_model})
193
- time_start = time.time()
194
-
195
- if audio_file_path is None:
196
- log_counter("speech_to_text_error", labels={"error": "No audio file provided"})
197
- raise ValueError("speech-to-text: No audio file provided")
198
- logging.info("speech-to-text: Audio file path: %s", audio_file_path)
199
-
200
- try:
201
- _, file_ending = os.path.splitext(audio_file_path)
202
- out_file = audio_file_path.replace(file_ending, "-whisper_model-"+whisper_model+".segments.json")
203
- prettified_out_file = audio_file_path.replace(file_ending, "-whisper_model-"+whisper_model+".segments_pretty.json")
204
- if os.path.exists(out_file):
205
- logging.info("speech-to-text: Segments file already exists: %s", out_file)
206
- with open(out_file) as f:
207
- global segments
208
- segments = json.load(f)
209
- return segments
210
-
211
- logging.info('speech-to-text: Starting transcription...')
212
- # FIXME - revisit this
213
- options = dict(language=selected_source_lang, beam_size=10, best_of=10, vad_filter=vad_filter)
214
- transcribe_options = dict(task="transcribe", **options)
215
- # use function and config at top of file
216
- logging.debug("speech-to-text: Using whisper model: %s", whisper_model)
217
- whisper_model_instance = get_whisper_model(whisper_model, processing_choice)
218
- # faster_whisper transcription right here - FIXME -test batching - ha
219
- segments_raw, info = whisper_model_instance.transcribe(audio_file_path, **transcribe_options)
220
-
221
- segments = []
222
- for segment_chunk in segments_raw:
223
- chunk = {
224
- "Time_Start": segment_chunk.start,
225
- "Time_End": segment_chunk.end,
226
- "Text": segment_chunk.text
227
- }
228
- logging.debug("Segment: %s", chunk)
229
- segments.append(chunk)
230
- # Print to verify its working
231
- logging.info(f"{segment_chunk.start:.2f}s - {segment_chunk.end:.2f}s | {segment_chunk.text}")
232
-
233
- # Log it as well.
234
- logging.debug(
235
- f"Transcribed Segment: {segment_chunk.start:.2f}s - {segment_chunk.end:.2f}s | {segment_chunk.text}")
236
-
237
- if segments:
238
- segments[0]["Text"] = f"This text was transcribed using whisper model: {whisper_model}\n\n" + segments[0]["Text"]
239
-
240
- if not segments:
241
- log_counter("speech_to_text_error", labels={"error": "No transcription produced"})
242
- raise RuntimeError("No transcription produced. The audio file may be invalid or empty.")
243
-
244
- transcription_time = time.time() - time_start
245
- logging.info("speech-to-text: Transcription completed in %.2f seconds", transcription_time)
246
- log_histogram("speech_to_text_duration", transcription_time, labels={"file_path": audio_file_path, "model": whisper_model})
247
- log_counter("speech_to_text_success", labels={"file_path": audio_file_path, "model": whisper_model})
248
- # Save the segments to a JSON file - prettified and non-prettified
249
- # FIXME refactor so this is an optional flag to save either the prettified json file or the normal one
250
- save_json = True
251
- if save_json:
252
- logging.info("speech-to-text: Saving segments to JSON file")
253
- output_data = {'segments': segments}
254
- logging.info("speech-to-text: Saving prettified JSON to %s", prettified_out_file)
255
- with open(prettified_out_file, 'w') as f:
256
- json.dump(output_data, f, indent=2)
257
-
258
- logging.info("speech-to-text: Saving JSON to %s", out_file)
259
- with open(out_file, 'w') as f:
260
- json.dump(output_data, f)
261
-
262
- logging.debug(f"speech-to-text: returning {segments[:500]}")
263
- gc.collect()
264
- return segments
265
-
266
- except Exception as e:
267
- logging.error("speech-to-text: Error transcribing audio: %s", str(e))
268
- log_counter("speech_to_text_error", labels={"file_path": audio_file_path, "model": whisper_model, "error": str(e)})
269
- raise RuntimeError("speech-to-text: Error transcribing audio")
270
-
271
-
272
- def record_audio(duration, sample_rate=16000, chunk_size=1024):
273
- log_counter("record_audio_attempt", labels={"duration": duration})
274
- p = pyaudio.PyAudio()
275
- stream = p.open(format=pyaudio.paInt16,
276
- channels=1,
277
- rate=sample_rate,
278
- input=True,
279
- frames_per_buffer=chunk_size)
280
-
281
- print("Recording...")
282
- frames = []
283
- stop_recording = threading.Event()
284
- audio_queue = queue.Queue()
285
-
286
- def audio_callback():
287
- for _ in range(0, int(sample_rate / chunk_size * duration)):
288
- if stop_recording.is_set():
289
- break
290
- data = stream.read(chunk_size)
291
- audio_queue.put(data)
292
-
293
- audio_thread = threading.Thread(target=audio_callback)
294
- audio_thread.start()
295
-
296
- return p, stream, audio_queue, stop_recording, audio_thread
297
-
298
-
299
- def stop_recording(p, stream, audio_queue, stop_recording_event, audio_thread):
300
- log_counter("stop_recording_attempt")
301
- start_time = time.time()
302
- stop_recording_event.set()
303
- audio_thread.join()
304
-
305
- frames = []
306
- while not audio_queue.empty():
307
- frames.append(audio_queue.get())
308
-
309
- print("Recording finished.")
310
-
311
- stream.stop_stream()
312
- stream.close()
313
- p.terminate()
314
-
315
- stop_time = time.time() - start_time
316
- log_histogram("stop_recording_duration", stop_time)
317
- log_counter("stop_recording_success")
318
- return b''.join(frames)
319
-
320
- def save_audio_temp(audio_data, sample_rate=16000):
321
- log_counter("save_audio_temp_attempt")
322
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
323
- import wave
324
- wf = wave.open(temp_file.name, 'wb')
325
- wf.setnchannels(1)
326
- wf.setsampwidth(2)
327
- wf.setframerate(sample_rate)
328
- wf.writeframes(audio_data)
329
- wf.close()
330
- log_counter("save_audio_temp_success")
331
- return temp_file.name
332
-
333
- #
334
- #
335
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Audio/Diarization_Lib.py DELETED
@@ -1,275 +0,0 @@
1
- # Diarization_Lib.py
2
- #########################################
3
- # Diarization Library
4
- # This library is used to perform diarization of audio files.
5
- # Currently, uses FIXME for transcription.
6
- #
7
- ####################
8
- ####################
9
- # Function List
10
- #
11
- # 1. speaker_diarize(video_file_path, segments, embedding_model = "pyannote/embedding", embedding_size=512, num_speakers=0)
12
- #
13
- ####################
14
- # Import necessary libraries
15
- import logging
16
- from pathlib import Path
17
- from typing import Dict, List, Any
18
-
19
- #
20
- # Import Local Libraries
21
- from App_Function_Libraries.Audio.Audio_Transcription_Lib import speech_to_text
22
- #
23
- # Import 3rd Party Libraries
24
- from pyannote.audio.pipelines.speaker_diarization import SpeakerDiarization
25
- import yaml
26
- #
27
- #######################################################################################################################
28
- # Function Definitions
29
- #
30
-
31
- def load_pipeline_from_pretrained(path_to_config: str | Path) -> SpeakerDiarization:
32
- path_to_config = Path(path_to_config).resolve()
33
- logging.debug(f"Loading pyannote pipeline from {path_to_config}...")
34
-
35
- if not path_to_config.exists():
36
- raise FileNotFoundError(f"Config file not found: {path_to_config}")
37
-
38
- # Load the YAML configuration
39
- with open(path_to_config, 'r') as config_file:
40
- config = yaml.safe_load(config_file)
41
-
42
- # Debug: print the entire config
43
- logging.debug(f"Loaded config: {config}")
44
-
45
- # Create the SpeakerDiarization pipeline
46
- try:
47
- pipeline = SpeakerDiarization(
48
- segmentation=config['pipeline']['params']['segmentation'],
49
- embedding=config['pipeline']['params']['embedding'],
50
- clustering=config['pipeline']['params']['clustering'],
51
- )
52
- except KeyError as e:
53
- logging.error(f"Error accessing config key: {e}")
54
- raise
55
-
56
- # Set other parameters
57
- try:
58
- pipeline_params = {
59
- "segmentation": {},
60
- "clustering": {},
61
- }
62
-
63
- if 'params' in config and 'segmentation' in config['params']:
64
- if 'min_duration_off' in config['params']['segmentation']:
65
- pipeline_params["segmentation"]["min_duration_off"] = config['params']['segmentation']['min_duration_off']
66
-
67
- if 'params' in config and 'clustering' in config['params']:
68
- if 'method' in config['params']['clustering']:
69
- pipeline_params["clustering"]["method"] = config['params']['clustering']['method']
70
- if 'min_cluster_size' in config['params']['clustering']:
71
- pipeline_params["clustering"]["min_cluster_size"] = config['params']['clustering']['min_cluster_size']
72
- if 'threshold' in config['params']['clustering']:
73
- pipeline_params["clustering"]["threshold"] = config['params']['clustering']['threshold']
74
-
75
- if 'pipeline' in config and 'params' in config['pipeline']:
76
- if 'embedding_batch_size' in config['pipeline']['params']:
77
- pipeline_params["embedding_batch_size"] = config['pipeline']['params']['embedding_batch_size']
78
- if 'embedding_exclude_overlap' in config['pipeline']['params']:
79
- pipeline_params["embedding_exclude_overlap"] = config['pipeline']['params']['embedding_exclude_overlap']
80
- if 'segmentation_batch_size' in config['pipeline']['params']:
81
- pipeline_params["segmentation_batch_size"] = config['pipeline']['params']['segmentation_batch_size']
82
-
83
- logging.debug(f"Pipeline params: {pipeline_params}")
84
- pipeline.instantiate(pipeline_params)
85
- except KeyError as e:
86
- logging.error(f"Error accessing config key: {e}")
87
- raise
88
- except Exception as e:
89
- logging.error(f"Error instantiating pipeline: {e}")
90
- raise
91
-
92
- return pipeline
93
-
94
-
95
- def audio_diarization(audio_file_path: str) -> list:
96
- logging.info('audio-diarization: Loading pyannote pipeline')
97
-
98
- base_dir = Path(__file__).parent.resolve()
99
- config_path = base_dir / 'models' / 'pyannote_diarization_config.yaml'
100
- logging.info(f"audio-diarization: Loading pipeline from {config_path}")
101
-
102
- try:
103
- pipeline = load_pipeline_from_pretrained(config_path)
104
- except Exception as e:
105
- logging.error(f"Failed to load pipeline: {str(e)}")
106
- raise
107
-
108
- logging.info(f"audio-diarization: Audio file path: {audio_file_path}")
109
-
110
- try:
111
- logging.info('audio-diarization: Starting diarization...')
112
- diarization_result = pipeline(audio_file_path)
113
-
114
- segments = []
115
- for turn, _, speaker in diarization_result.itertracks(yield_label=True):
116
- segment = {
117
- "start": turn.start,
118
- "end": turn.end,
119
- "speaker": speaker
120
- }
121
- logging.debug(f"Segment: {segment}")
122
- segments.append(segment)
123
- logging.info("audio-diarization: Diarization completed with pyannote")
124
-
125
- return segments
126
-
127
- except Exception as e:
128
- logging.error(f"audio-diarization: Error performing diarization: {str(e)}")
129
- raise RuntimeError("audio-diarization: Error performing diarization") from e
130
-
131
-
132
- # Old
133
- # def audio_diarization(audio_file_path):
134
- # logging.info('audio-diarization: Loading pyannote pipeline')
135
- #
136
- # #config file loading
137
- # current_dir = os.path.dirname(os.path.abspath(__file__))
138
- # # Construct the path to the config file
139
- # config_path = os.path.join(current_dir, 'Config_Files', 'config.txt')
140
- # # Read the config file
141
- # config = configparser.ConfigParser()
142
- # config.read(config_path)
143
- # processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
144
- #
145
- # base_dir = Path(__file__).parent.resolve()
146
- # config_path = base_dir / 'models' / 'config.yaml'
147
- # pipeline = load_pipeline_from_pretrained(config_path)
148
- #
149
- # time_start = time.time()
150
- # if audio_file_path is None:
151
- # raise ValueError("audio-diarization: No audio file provided")
152
- # logging.info("audio-diarization: Audio file path: %s", audio_file_path)
153
- #
154
- # try:
155
- # _, file_ending = os.path.splitext(audio_file_path)
156
- # out_file = audio_file_path.replace(file_ending, ".diarization.json")
157
- # prettified_out_file = audio_file_path.replace(file_ending, ".diarization_pretty.json")
158
- # if os.path.exists(out_file):
159
- # logging.info("audio-diarization: Diarization file already exists: %s", out_file)
160
- # with open(out_file) as f:
161
- # global diarization_result
162
- # diarization_result = json.load(f)
163
- # return diarization_result
164
- #
165
- # logging.info('audio-diarization: Starting diarization...')
166
- # diarization_result = pipeline(audio_file_path)
167
- #
168
- # segments = []
169
- # for turn, _, speaker in diarization_result.itertracks(yield_label=True):
170
- # chunk = {
171
- # "Time_Start": turn.start,
172
- # "Time_End": turn.end,
173
- # "Speaker": speaker
174
- # }
175
- # logging.debug("Segment: %s", chunk)
176
- # segments.append(chunk)
177
- # logging.info("audio-diarization: Diarization completed with pyannote")
178
- #
179
- # output_data = {'segments': segments}
180
- #
181
- # logging.info("audio-diarization: Saving prettified JSON to %s", prettified_out_file)
182
- # with open(prettified_out_file, 'w') as f:
183
- # json.dump(output_data, f, indent=2)
184
- #
185
- # logging.info("audio-diarization: Saving JSON to %s", out_file)
186
- # with open(out_file, 'w') as f:
187
- # json.dump(output_data, f)
188
- #
189
- # except Exception as e:
190
- # logging.error("audio-diarization: Error performing diarization: %s", str(e))
191
- # raise RuntimeError("audio-diarization: Error performing diarization")
192
- # return segments
193
-
194
- def combine_transcription_and_diarization(audio_file_path: str) -> List[Dict[str, Any]]:
195
- logging.info('combine-transcription-and-diarization: Starting transcription and diarization...')
196
-
197
- try:
198
- logging.info('Performing speech-to-text...')
199
- transcription_result = speech_to_text(audio_file_path)
200
- logging.info(f"Transcription result type: {type(transcription_result)}")
201
- logging.info(f"Transcription result: {transcription_result[:3] if isinstance(transcription_result, list) and len(transcription_result) > 3 else transcription_result}")
202
-
203
- logging.info('Performing audio diarization...')
204
- diarization_result = audio_diarization(audio_file_path)
205
- logging.info(f"Diarization result type: {type(diarization_result)}")
206
- logging.info(f"Diarization result sample: {diarization_result[:3] if isinstance(diarization_result, list) and len(diarization_result) > 3 else diarization_result}")
207
-
208
- if not transcription_result:
209
- logging.error("Empty result from transcription")
210
- return []
211
-
212
- if not diarization_result:
213
- logging.error("Empty result from diarization")
214
- return []
215
-
216
- # Handle the case where transcription_result is a dict with a 'segments' key
217
- if isinstance(transcription_result, dict) and 'segments' in transcription_result:
218
- transcription_segments = transcription_result['segments']
219
- elif isinstance(transcription_result, list):
220
- transcription_segments = transcription_result
221
- else:
222
- logging.error(f"Unexpected transcription result format: {type(transcription_result)}")
223
- return []
224
-
225
- logging.info(f"Number of transcription segments: {len(transcription_segments)}")
226
- logging.info(f"Transcription segments sample: {transcription_segments[:3] if len(transcription_segments) > 3 else transcription_segments}")
227
-
228
- if not isinstance(diarization_result, list):
229
- logging.error(f"Unexpected diarization result format: {type(diarization_result)}")
230
- return []
231
-
232
- combined_result = []
233
- for transcription_segment in transcription_segments:
234
- if not isinstance(transcription_segment, dict):
235
- logging.warning(f"Unexpected transcription segment format: {transcription_segment}")
236
- continue
237
-
238
- for diarization_segment in diarization_result:
239
- if not isinstance(diarization_segment, dict):
240
- logging.warning(f"Unexpected diarization segment format: {diarization_segment}")
241
- continue
242
-
243
- try:
244
- trans_start = transcription_segment.get('Time_Start', 0)
245
- trans_end = transcription_segment.get('Time_End', 0)
246
- diar_start = diarization_segment.get('start', 0)
247
- diar_end = diarization_segment.get('end', 0)
248
-
249
- if trans_start >= diar_start and trans_end <= diar_end:
250
- combined_segment = {
251
- "Time_Start": trans_start,
252
- "Time_End": trans_end,
253
- "Speaker": diarization_segment.get('speaker', 'Unknown'),
254
- "Text": transcription_segment.get('Text', '')
255
- }
256
- combined_result.append(combined_segment)
257
- break
258
- except Exception as e:
259
- logging.error(f"Error processing segment: {str(e)}")
260
- logging.error(f"Transcription segment: {transcription_segment}")
261
- logging.error(f"Diarization segment: {diarization_segment}")
262
- continue
263
-
264
- logging.info(f"Combined result length: {len(combined_result)}")
265
- logging.info(f"Combined result sample: {combined_result[:3] if len(combined_result) > 3 else combined_result}")
266
- return combined_result
267
-
268
- except Exception as e:
269
- logging.error(f"Error in combine_transcription_and_diarization: {str(e)}", exc_info=True)
270
- return []
271
-
272
-
273
- #
274
- #
275
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Audio/__init__.py DELETED
File without changes
App_Function_Libraries/Benchmarks_Evaluations/Confabulation_check.py DELETED
@@ -1,81 +0,0 @@
1
- # Confabulation_check.py
2
- #
3
- # This file contains the functions that are used to check the confabulation of the user's input.
4
- #
5
- #
6
- # Imports
7
- #
8
- # External Imports
9
- #
10
- # Local Imports
11
- #
12
- #
13
- ####################################################################################################
14
- #
15
- # Functions:
16
- from App_Function_Libraries.Chat import chat_api_call
17
- from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import validate_inputs, detailed_api_error
18
-
19
-
20
- def simplified_geval(transcript: str, summary: str, api_name: str, api_key: str, temp: float = 0.7) -> str:
21
- """
22
- Perform a simplified version of G-Eval using a single query to evaluate the summary.
23
-
24
- Args:
25
- transcript (str): The original transcript
26
- summary (str): The summary to be evaluated
27
- api_name (str): The name of the LLM API to use
28
- api_key (str): The API key for the chosen LLM
29
- temp (float, optional): The temperature parameter for the API call. Defaults to 0.7.
30
-
31
- Returns:
32
- str: The evaluation result
33
- """
34
- try:
35
- validate_inputs(transcript, summary, api_name, api_key)
36
- except ValueError as e:
37
- return str(e)
38
-
39
- prompt = f"""You are an AI assistant tasked with evaluating the quality of a summary. You will be given an original transcript and a summary of that transcript. Your task is to evaluate the summary based on the following criteria:
40
-
41
- 1. Coherence (1-5): How well-structured and organized is the summary?
42
- 2. Consistency (1-5): How factually aligned is the summary with the original transcript?
43
- 3. Fluency (1-3): How well-written is the summary in terms of grammar, spelling, and readability?
44
- 4. Relevance (1-5): How well does the summary capture the important information from the transcript?
45
-
46
- Please provide a score for each criterion and a brief explanation for your scoring. Then, give an overall assessment of the summary's quality.
47
-
48
- Original Transcript:
49
- {transcript}
50
-
51
- Summary to Evaluate:
52
- {summary}
53
-
54
- Please provide your evaluation in the following format:
55
- Coherence: [score] - [brief explanation]
56
- Consistency: [score] - [brief explanation]
57
- Fluency: [score] - [brief explanation]
58
- Relevance: [score] - [brief explanation]
59
-
60
- Overall Assessment: [Your overall assessment of the summary's quality]
61
- """
62
-
63
- try:
64
- result = chat_api_call(
65
- api_name,
66
- api_key,
67
- prompt,
68
- "",
69
- temp=temp,
70
- system_message="You are a helpful AI assistant tasked with evaluating summaries."
71
- )
72
- except Exception as e:
73
- return detailed_api_error(api_name, e)
74
-
75
- formatted_result = f"""
76
- Confabulation Check Results:
77
-
78
- {result}
79
- """
80
-
81
- return formatted_result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Benchmarks_Evaluations/__init__.py DELETED
File without changes
App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py DELETED
@@ -1,498 +0,0 @@
1
- #######################################################################################################################
2
- #
3
- # Evaluations_Benchmarks_tab.py
4
- #
5
- # Description: This file contains the code to evaluate the generated text using G-Eval metric.
6
- #
7
- # Scripts taken from https://github.com/microsoft/promptflow/tree/main/examples/flows/evaluation/eval-summarization and modified.
8
- #
9
- import configparser
10
- import inspect
11
- import json
12
- import logging
13
- import os
14
- import re
15
- from typing import Dict, Callable, List, Any
16
-
17
- import gradio as gr
18
- from tenacity import (
19
- RetryError,
20
- Retrying,
21
- after_log,
22
- before_sleep_log,
23
- stop_after_attempt,
24
- wait_random_exponential,
25
- )
26
-
27
- from App_Function_Libraries.Chat import chat_api_call
28
-
29
- #
30
- #######################################################################################################################
31
- #
32
- # Start of G-Eval.py
33
-
34
- logger = logging.getLogger(__name__)
35
-
36
- current_dir = os.path.dirname(os.path.abspath(__file__))
37
- # Construct the path to the config file
38
- config_path = os.path.join(current_dir, 'Config_Files', 'config.txt')
39
- # Read the config file
40
- config = configparser.ConfigParser()
41
- config.read(config_path)
42
-
43
-
44
- def aggregate(
45
- fluency_list: List[float],
46
- consistency_list: List[float],
47
- relevance_list: List[float],
48
- coherence_list: List[float],
49
- ) -> Dict[str, float]:
50
- """
51
- Takes list of scores for 4 dims and outputs average for them.
52
-
53
- Args:
54
- fluency_list (List(float)): list of fluency scores
55
- consistency_list (List(float)): list of consistency scores
56
- relevance_list (List(float)): list of relevance scores
57
- coherence_list (List(float)): list of coherence scores
58
-
59
- Returns:
60
- Dict[str, float]: Returns average scores
61
- """
62
- average_fluency = sum(fluency_list) / len(fluency_list)
63
- average_consistency = sum(consistency_list) / len(consistency_list)
64
- average_relevance = sum(relevance_list) / len(relevance_list)
65
- average_coherence = sum(coherence_list) / len(coherence_list)
66
-
67
- log_metric("average_fluency", average_fluency)
68
- log_metric("average_consistency", average_consistency)
69
- log_metric("average_relevance", average_relevance)
70
- log_metric("average_coherence", average_coherence)
71
-
72
- return {
73
- "average_fluency": average_fluency,
74
- "average_consistency": average_consistency,
75
- "average_relevance": average_relevance,
76
- "average_coherence": average_coherence,
77
- }
78
-
79
- def run_geval(transcript: str, summary: str, api_key: str, api_name: str = None, save: bool = False):
80
- try:
81
- validate_inputs(transcript, summary, api_name, api_key)
82
- except ValueError as e:
83
- return str(e)
84
-
85
- prompts = {
86
- "coherence": """You will be given one summary written for a source document.
87
-
88
- Your task is to rate the summary on one metric.
89
-
90
- Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
91
-
92
- Evaluation Criteria:
93
-
94
- Coherence (1-5) - the collective quality of all sentences. We align this dimension with the DUC quality question of structure and coherence whereby "the summary should be well-structured and well-organized. The summary should not just be a heap of related information, but should build from sentence to a coherent body of information about a topic."
95
-
96
- Evaluation Steps:
97
-
98
- 1. Read the source document carefully and identify the main topic and key points.
99
- 2. Read the summary and compare it to the source document. Check if the summary covers the main topic and key points of the source document, and if it presents them in a clear and logical order.
100
- 3. Assign a score for coherence on a scale of 1 to 5, where 1 is the lowest and 5 is the highest based on the Evaluation Criteria.
101
-
102
-
103
- Example:
104
-
105
-
106
- Source Document:
107
-
108
- {{Document}}
109
-
110
- Summary:
111
-
112
- {{Summary}}
113
-
114
-
115
- Evaluation Form (scores ONLY):
116
-
117
- - Coherence:""",
118
- "consistency": """You will be given a source document. You will then be given one summary written for this source document.
119
-
120
- Your task is to rate the summary on one metric.
121
-
122
- Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
123
-
124
-
125
- Evaluation Criteria:
126
-
127
- Consistency (1-5) - the factual alignment between the summary and the summarized source. A factually consistent summary contains only statements that are entailed by the source document. Annotators were also asked to penalize summaries that contained hallucinated facts.
128
-
129
- Evaluation Steps:
130
-
131
- 1. Read the source document carefully and identify the main facts and details it presents.
132
- 2. Read the summary and compare it to the source document. Check if the summary contains any factual errors that are not supported by the source document.
133
- 3. Assign a score for consistency based on the Evaluation Criteria.
134
-
135
-
136
- Example:
137
-
138
-
139
- Source Document:
140
-
141
- {{Document}}
142
-
143
- Summary:
144
-
145
- {{Summary}}
146
-
147
-
148
- Evaluation Form (scores ONLY):
149
-
150
- - Consistency:""",
151
- "fluency": """You will be given one summary written for a source document.
152
-
153
- Your task is to rate the summary on one metric.
154
-
155
- Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
156
-
157
-
158
- Evaluation Criteria:
159
-
160
- Fluency (1-3): the quality of the summary in terms of grammar, spelling, punctuation, word choice, and sentence structure.
161
-
162
- - 1: Poor. The summary has many errors that make it hard to understand or sound unnatural.
163
- - 2: Fair. The summary has some errors that affect the clarity or smoothness of the text, but the main points are still comprehensible.
164
- - 3: Good. The summary has few or no errors and is easy to read and follow.
165
-
166
-
167
- Example:
168
-
169
- Summary:
170
-
171
- {{Summary}}
172
-
173
-
174
- Evaluation Form (scores ONLY):
175
-
176
- - Fluency (1-3):""",
177
- "relevance": """You will be given one summary written for a source document.
178
-
179
- Your task is to rate the summary on one metric.
180
-
181
- Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
182
-
183
- Evaluation Criteria:
184
-
185
- Relevance (1-5) - selection of important content from the source. The summary should include only important information from the source document. Annotators were instructed to penalize summaries which contained redundancies and excess information.
186
-
187
- Evaluation Steps:
188
-
189
- 1. Read the summary and the source document carefully.
190
- 2. Compare the summary to the source document and identify the main points of the source document.
191
- 3. Assess how well the summary covers the main points of the source document, and how much irrelevant or redundant information it contains.
192
- 4. Assign a relevance score from 1 to 5.
193
-
194
-
195
- Example:
196
-
197
-
198
- Source Document:
199
-
200
- {{Document}}
201
-
202
- Summary:
203
-
204
- {{Summary}}
205
-
206
-
207
- Evaluation Form (scores ONLY):
208
-
209
- - Relevance:"""
210
- }
211
-
212
- scores = {}
213
- explanations = {}
214
- for metric, prompt in prompts.items():
215
- full_prompt = prompt.replace("{{Document}}", transcript).replace("{{Summary}}", summary)
216
- try:
217
- score = geval_summarization(full_prompt, 5 if metric != "fluency" else 3, api_name, api_key)
218
- scores[metric] = score
219
- explanations[metric] = "Score based on the evaluation criteria."
220
- except Exception as e:
221
- error_message = detailed_api_error(api_name, e)
222
- return error_message
223
-
224
- avg_scores = aggregate([scores['fluency']], [scores['consistency']],
225
- [scores['relevance']], [scores['coherence']])
226
-
227
- results = {
228
- "scores": scores,
229
- "average_scores": avg_scores
230
- }
231
- logging.debug("Results: %s", results)
232
-
233
- if save is not None:
234
- logging.debug("Saving results to geval_results.json")
235
- save_eval_results(results)
236
- logging.debug("Results saved to geval_results.json")
237
-
238
- formatted_result = f"""
239
- Confabulation Check Results:
240
-
241
- Coherence: {scores['coherence']:.2f} - {explanations['coherence']}
242
- Consistency: {scores['consistency']:.2f} - {explanations['consistency']}
243
- Fluency: {scores['fluency']:.2f} - {explanations['fluency']}
244
- Relevance: {scores['relevance']:.2f} - {explanations['relevance']}
245
-
246
- Overall Assessment: The summary has been evaluated on four key metrics.
247
- The average scores are:
248
- Fluency: {avg_scores['average_fluency']:.2f}
249
- Consistency: {avg_scores['average_consistency']:.2f}
250
- Relevance: {avg_scores['average_relevance']:.2f}
251
- Coherence: {avg_scores['average_coherence']:.2f}
252
-
253
- These scores indicate the overall quality of the summary in terms of its
254
- coherence, consistency with the original text, fluency of language, and
255
- relevance of content.
256
- """
257
-
258
- return formatted_result
259
-
260
-
261
- def create_geval_tab():
262
- with gr.Tab("G-Eval", id="g-eval"):
263
- gr.Markdown("# G-Eval Summarization Evaluation")
264
- with gr.Row():
265
- with gr.Column():
266
- document_input = gr.Textbox(label="Source Document", lines=10)
267
- summary_input = gr.Textbox(label="Summary", lines=5)
268
- api_name_input = gr.Dropdown(
269
- choices=["OpenAI", "Anthropic", "Cohere", "Groq", "OpenRouter", "DeepSeek", "HuggingFace", "Mistral", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "Local-LLM", "Ollama"],
270
- label="Select API"
271
- )
272
- api_key_input = gr.Textbox(label="API Key (if required)", type="password")
273
- save_value = gr.Checkbox(label="Save Results to a JSON file(geval_results.json)")
274
- evaluate_button = gr.Button("Evaluate Summary")
275
- with gr.Column():
276
- output = gr.Textbox(label="Evaluation Results", lines=10)
277
-
278
- evaluate_button.click(
279
- fn=run_geval,
280
- inputs=[document_input, summary_input, api_name_input, api_key_input, save_value],
281
- outputs=output
282
- )
283
-
284
- return document_input, summary_input, api_name_input, api_key_input, evaluate_button, output
285
-
286
-
287
- def parse_output(output: str, max: float) -> float:
288
- """
289
- Function that extracts numerical score from the beginning of string
290
-
291
- Args:
292
- output (str): String to search
293
- max (float): Maximum score allowed
294
-
295
- Returns:
296
- float: The extracted score
297
- """
298
- matched: List[str] = re.findall(r"(?<!\S)\d+(?:\.\d+)?", output)
299
- if matched:
300
- if len(matched) == 1:
301
- score = float(matched[0])
302
- if score > max:
303
- raise ValueError(f"Parsed number: {score} was larger than max score: {max}")
304
- else:
305
- raise ValueError(f"More than one number detected in input. Input to parser was: {output}")
306
- else:
307
- raise ValueError(f'No number detected in input. Input to parser was "{output}". ')
308
- return score
309
-
310
- def geval_summarization(
311
- prompt_with_src_and_gen: str,
312
- max_score: float,
313
- api_endpoint: str,
314
- api_key: str,
315
- ) -> float:
316
- model = get_model_from_config(api_endpoint)
317
-
318
- try:
319
- for attempt in Retrying(
320
- reraise=True,
321
- before_sleep=before_sleep_log(logger, logging.INFO),
322
- after=after_log(logger, logging.INFO),
323
- wait=wait_random_exponential(multiplier=1, min=1, max=120),
324
- stop=stop_after_attempt(10),
325
- ):
326
- with attempt:
327
- system_message="You are a helpful AI assistant"
328
- # TEMP setting for Confabulation check
329
- temp = 0.7
330
- logging.info(f"Debug - geval_summarization Function - API Endpoint: {api_endpoint}")
331
- try:
332
- response = chat_api_call(api_endpoint, api_key, prompt_with_src_and_gen, "", temp, system_message)
333
- except Exception as e:
334
- raise ValueError(f"Unsupported API endpoint: {api_endpoint}")
335
- except RetryError:
336
- logger.exception(f"geval {api_endpoint} call failed\nInput prompt was: {prompt_with_src_and_gen}")
337
- raise
338
-
339
- try:
340
- score = parse_output(response, max_score)
341
- except ValueError as e:
342
- logger.warning(f"Error parsing output: {e}")
343
- score = 0
344
-
345
- return score
346
-
347
-
348
- def get_model_from_config(api_name: str) -> str:
349
- model = config.get('models', api_name)
350
- if isinstance(model, dict):
351
- # If the model is a dictionary, return a specific key or a default value
352
- return model.get('name', str(model)) # Adjust 'name' to the appropriate key if needed
353
- return str(model) if model is not None else ""
354
-
355
- def aggregate_llm_scores(llm_responses: List[str], max_score: float) -> float:
356
- """Parse and average valid scores from the generated responses of
357
- the G-Eval LLM call.
358
-
359
- Args:
360
- llm_responses (List[str]): List of scores from multiple LLMs
361
- max_score (float): The maximum score allowed.
362
-
363
- Returns:
364
- float: The average of all the valid scores
365
- """
366
- all_scores = []
367
- error_count = 0
368
- for generated in llm_responses:
369
- try:
370
- parsed = parse_output(generated, max_score)
371
- all_scores.append(parsed)
372
- except ValueError as e:
373
- logger.warning(e)
374
- error_count += 1
375
- if error_count:
376
- logger.warning(f"{error_count} out of 20 scores were discarded due to corrupt g-eval generation")
377
- score = sum(all_scores) / len(all_scores)
378
- return score
379
-
380
-
381
- def validate_inputs(document: str, summary: str, api_name: str, api_key: str) -> None:
382
- """
383
- Validate inputs for the G-Eval function.
384
-
385
- Args:
386
- document (str): The source document
387
- summary (str): The summary to evaluate
388
- api_name (str): The name of the API to use
389
- api_key (str): The API key
390
-
391
- Raises:
392
- ValueError: If any of the inputs are invalid
393
- """
394
- if not document.strip():
395
- raise ValueError("Source document cannot be empty")
396
- if not summary.strip():
397
- raise ValueError("Summary cannot be empty")
398
- if api_name.lower() not in ["openai", "anthropic", "cohere", "groq", "openrouter", "deepseek", "huggingface",
399
- "mistral", "llama.cpp", "kobold", "ooba", "tabbyapi", "vllm", "local-llm", "ollama"]:
400
- raise ValueError(f"Unsupported API: {api_name}")
401
-
402
-
403
- def detailed_api_error(api_name: str, error: Exception) -> str:
404
- """
405
- Generate a detailed error message for API failures.
406
-
407
- Args:
408
- api_name (str): The name of the API that failed
409
- error (Exception): The exception that was raised
410
-
411
- Returns:
412
- str: A detailed error message
413
- """
414
- error_type = type(error).__name__
415
- error_message = str(error)
416
- return f"API Failure: {api_name}\nError Type: {error_type}\nError Message: {error_message}\nPlease check your API key and network connection, and try again."
417
-
418
-
419
- def save_eval_results(results: Dict[str, Any], filename: str = "geval_results.json") -> None:
420
- """
421
- Save evaluation results to a JSON file.
422
-
423
- Args:
424
- results (Dict[str, Any]): The evaluation results
425
- filename (str): The name of the file to save results to
426
- """
427
- with open(filename, 'w') as f:
428
- json.dump(results, f, indent=2)
429
- print(f"Results saved to {filename}")
430
-
431
-
432
-
433
-
434
- #
435
- #
436
- #######################################################################################################################
437
- #
438
- # Taken from: https://github.com/microsoft/promptflow/blob/b5a68f45e4c3818a29e2f79a76f2e73b8ea6be44/src/promptflow-core/promptflow/_core/metric_logger.py
439
-
440
- class MetricLoggerManager:
441
- _instance = None
442
-
443
- def __init__(self):
444
- self._metric_loggers = []
445
-
446
- @staticmethod
447
- def get_instance() -> "MetricLoggerManager":
448
- if MetricLoggerManager._instance is None:
449
- MetricLoggerManager._instance = MetricLoggerManager()
450
- return MetricLoggerManager._instance
451
-
452
- def log_metric(self, key, value, variant_id=None):
453
- for logger in self._metric_loggers:
454
- if len(inspect.signature(logger).parameters) == 2:
455
- logger(key, value) # If the logger only accepts two parameters, we don't pass variant_id
456
- else:
457
- logger(key, value, variant_id)
458
-
459
- def add_metric_logger(self, logger_func: Callable):
460
- existing_logger = next((logger for logger in self._metric_loggers if logger is logger_func), None)
461
- if existing_logger:
462
- return
463
- if not callable(logger_func):
464
- return
465
- sign = inspect.signature(logger_func)
466
- # We accept two kinds of metric loggers:
467
- # def log_metric(k, v)
468
- # def log_metric(k, v, variant_id)
469
- if len(sign.parameters) not in [2, 3]:
470
- return
471
- self._metric_loggers.append(logger_func)
472
-
473
- def remove_metric_logger(self, logger_func: Callable):
474
- self._metric_loggers.remove(logger_func)
475
-
476
-
477
- def log_metric(key, value, variant_id=None):
478
- """Log a metric for current promptflow run.
479
-
480
- :param key: Metric name.
481
- :type key: str
482
- :param value: Metric value.
483
- :type value: float
484
- :param variant_id: Variant id for the metric.
485
- :type variant_id: str
486
- """
487
- MetricLoggerManager.get_instance().log_metric(key, value, variant_id)
488
-
489
-
490
- def add_metric_logger(logger_func: Callable):
491
- MetricLoggerManager.get_instance().add_metric_logger(logger_func)
492
-
493
-
494
- def remove_metric_logger(logger_func: Callable):
495
- MetricLoggerManager.get_instance().remove_metric_logger(logger_func)
496
- #
497
- # End of G-Eval.py
498
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Books/Book_Ingestion_Lib.py DELETED
@@ -1,577 +0,0 @@
1
- # Book_Ingestion_Lib.py
2
- #########################################
3
- # Library to hold functions for ingesting book files.#
4
- #
5
- ####################
6
- # Function List
7
- #
8
- # 1. ingest_text_file(file_path, title=None, author=None, keywords=None):
9
- # 2.
10
- #
11
- #
12
- ####################
13
- #
14
- # Imports
15
- import os
16
- import re
17
- import tempfile
18
- import zipfile
19
- from datetime import datetime
20
- import logging
21
- #
22
- # External Imports
23
- import ebooklib
24
- from bs4 import BeautifulSoup
25
- from ebooklib import epub
26
- #
27
- # Import Local
28
- from App_Function_Libraries.DB.DB_Manager import add_media_with_keywords, add_media_to_database
29
- from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
30
- from App_Function_Libraries.Chunk_Lib import chunk_ebook_by_chapters
31
- from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
32
- #
33
- #######################################################################################################################
34
- # Function Definitions
35
- #
36
-
37
- def import_epub(file_path,
38
- title=None,
39
- author=None,
40
- keywords=None,
41
- custom_prompt=None,
42
- system_prompt=None,
43
- summary=None,
44
- auto_summarize=False,
45
- api_name=None,
46
- api_key=None,
47
- chunk_options=None,
48
- custom_chapter_pattern=None
49
- ):
50
- """
51
- Imports an EPUB file, extracts its content, chunks it, optionally summarizes it, and adds it to the database.
52
-
53
- Parameters:
54
- - file_path (str): Path to the EPUB file.
55
- - title (str, optional): Title of the book.
56
- - author (str, optional): Author of the book.
57
- - keywords (str, optional): Comma-separated keywords for the book.
58
- - custom_prompt (str, optional): Custom user prompt for summarization.
59
- - summary (str, optional): Predefined summary of the book.
60
- - auto_summarize (bool, optional): Whether to auto-summarize the chunks.
61
- - api_name (str, optional): API name for summarization.
62
- - api_key (str, optional): API key for summarization.
63
- - chunk_options (dict, optional): Options for chunking.
64
- - custom_chapter_pattern (str, optional): Custom regex pattern for chapter detection.
65
-
66
- Returns:
67
- - str: Status message indicating success or failure.
68
- """
69
- try:
70
- logging.info(f"Importing EPUB file from {file_path}")
71
- log_counter("epub_import_attempt", labels={"file_path": file_path})
72
-
73
- start_time = datetime.now()
74
-
75
- # Convert EPUB to Markdown
76
- markdown_content = epub_to_markdown(file_path)
77
- logging.debug("Converted EPUB to Markdown.")
78
-
79
- # Extract metadata if not provided
80
- if not title or not author:
81
- extracted_title, extracted_author = extract_epub_metadata(markdown_content)
82
- title = title or extracted_title or os.path.splitext(os.path.basename(file_path))[0]
83
- author = author or extracted_author or "Unknown"
84
- logging.debug(f"Extracted metadata - Title: {title}, Author: {author}")
85
-
86
- # Process keywords
87
- keyword_list = [kw.strip() for kw in keywords.split(',')] if keywords else []
88
- logging.debug(f"Keywords: {keyword_list}")
89
-
90
- # Set default chunk options if not provided
91
- if chunk_options is None:
92
- chunk_options = {
93
- 'method': 'chapter',
94
- 'max_size': 500,
95
- 'overlap': 200,
96
- 'custom_chapter_pattern': custom_chapter_pattern
97
- }
98
- else:
99
- # Ensure 'method' is set to 'chapter' when using chapter chunking
100
- chunk_options.setdefault('method', 'chapter')
101
- chunk_options.setdefault('custom_chapter_pattern', custom_chapter_pattern)
102
-
103
- # Chunk the content by chapters
104
- chunks = chunk_ebook_by_chapters(markdown_content, chunk_options)
105
- logging.info(f"Total chunks created: {len(chunks)}")
106
- log_histogram("epub_chunks_created", len(chunks), labels={"file_path": file_path})
107
-
108
- if chunks:
109
- logging.debug(f"Structure of first chunk: {chunks[0].keys()}")
110
-
111
- # Handle summarization if enabled
112
- if auto_summarize and api_name and api_key:
113
- logging.info("Auto-summarization is enabled.")
114
- summarized_chunks = []
115
- for chunk in chunks:
116
- chunk_text = chunk.get('text', '')
117
- if chunk_text:
118
- summary_text = perform_summarization(api_name, chunk_text, custom_prompt, api_key,
119
- recursive_summarization=False, temp=None,
120
- system_message=system_prompt
121
- )
122
- chunk['metadata']['summary'] = summary_text
123
- summarized_chunks.append(chunk)
124
- chunks = summarized_chunks
125
- logging.info("Summarization of chunks completed.")
126
- log_counter("epub_chunks_summarized", value=len(chunks), labels={"file_path": file_path})
127
- else:
128
- # If not summarizing, set a default summary or use provided summary
129
- if summary:
130
- logging.debug("Using provided summary.")
131
- else:
132
- summary = "No summary provided."
133
-
134
- # Create info_dict
135
- info_dict = {
136
- 'title': title,
137
- 'uploader': author,
138
- 'ingestion_date': datetime.now().strftime('%Y-%m-%d')
139
- }
140
-
141
- # Prepare segments for database
142
- segments = [{'Text': chunk.get('text', chunk.get('content', ''))} for chunk in chunks]
143
- logging.debug(f"Prepared segments for database. Number of segments: {len(segments)}")
144
-
145
- # Add to database
146
- result = add_media_to_database(
147
- url=file_path,
148
- info_dict=info_dict,
149
- segments=segments,
150
- summary=summary,
151
- keywords=keyword_list,
152
- custom_prompt_input=custom_prompt,
153
- whisper_model="Imported",
154
- media_type="ebook",
155
- overwrite=False
156
- )
157
-
158
- end_time = datetime.now()
159
- processing_time = (end_time - start_time).total_seconds()
160
- log_histogram("epub_import_duration", processing_time, labels={"file_path": file_path})
161
-
162
- logging.info(f"Ebook '{title}' by {author} imported successfully. Database result: {result}")
163
- log_counter("epub ingested into the DB successfully", labels={"file_path": file_path})
164
- return f"Ebook '{title}' by {author} imported successfully. Database result: {result}"
165
-
166
- except Exception as e:
167
- logging.exception(f"Error importing ebook: {str(e)}")
168
- log_counter("epub_import_error", labels={"file_path": file_path, "error": str(e)})
169
- return f"Error importing ebook: {str(e)}"
170
-
171
-
172
- # FIXME
173
- def process_zip_file(zip_file,
174
- title,
175
- author,
176
- keywords,
177
- custom_prompt,
178
- system_prompt,
179
- summary,
180
- auto_summarize,
181
- api_name,
182
- api_key,
183
- chunk_options
184
- ):
185
- """
186
- Processes a ZIP file containing multiple EPUB files and imports each one.
187
-
188
- Parameters:
189
- - zip_file (file-like object): The ZIP file to process.
190
- - title (str): Title prefix for the books.
191
- - author (str): Author name for the books.
192
- - keywords (str): Comma-separated keywords.
193
- - custom_prompt (str): Custom user prompt for summarization.
194
- - summary (str): Predefined summary (not used in this context).
195
- - auto_summarize (bool): Whether to auto-summarize the chunks.
196
- - api_name (str): API name for summarization.
197
- - api_key (str): API key for summarization.
198
- - chunk_options (dict): Options for chunking.
199
-
200
- Returns:
201
- - str: Combined status messages for all EPUB files in the ZIP.
202
- """
203
- results = []
204
- try:
205
- with tempfile.TemporaryDirectory() as temp_dir:
206
- zip_path = zip_file.name if hasattr(zip_file, 'name') else zip_file.path
207
- logging.info(f"Extracting ZIP file {zip_path} to temporary directory {temp_dir}")
208
- log_counter("zip_processing_attempt", labels={"zip_path": zip_path})
209
-
210
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
211
- zip_ref.extractall(temp_dir)
212
-
213
- epub_files = [f for f in os.listdir(temp_dir) if f.lower().endswith('.epub')]
214
- log_histogram("epub_files_in_zip", len(epub_files), labels={"zip_path": zip_path})
215
-
216
- for filename in epub_files:
217
- file_path = os.path.join(temp_dir, filename)
218
- logging.info(f"Processing EPUB file {filename} from ZIP.")
219
- result = import_epub(
220
- file_path=file_path,
221
- title=title,
222
- author=author,
223
- keywords=keywords,
224
- custom_prompt=custom_prompt,
225
- summary=summary,
226
- auto_summarize=auto_summarize,
227
- api_name=api_name,
228
- api_key=api_key,
229
- chunk_options=chunk_options,
230
- custom_chapter_pattern=chunk_options.get('custom_chapter_pattern') if chunk_options else None
231
- )
232
- results.append(f"File: {filename} - {result}")
233
-
234
- logging.info("Completed processing all EPUB files in the ZIP.")
235
- log_counter("zip_processing_success", labels={"zip_path": zip_path})
236
- except Exception as e:
237
- logging.exception(f"Error processing ZIP file: {str(e)}")
238
- log_counter("zip_processing_error", labels={"zip_path": zip_path, "error": str(e)})
239
- return f"Error processing ZIP file: {str(e)}"
240
-
241
- return "\n".join(results)
242
-
243
-
244
- def import_file_handler(file,
245
- title,
246
- author,
247
- keywords,
248
- system_prompt,
249
- custom_prompt,
250
- auto_summarize,
251
- api_name,
252
- api_key,
253
- max_chunk_size,
254
- chunk_overlap,
255
- custom_chapter_pattern
256
- ):
257
- try:
258
- log_counter("file_import_attempt", labels={"file_name": file.name})
259
-
260
- # Handle max_chunk_size
261
- if isinstance(max_chunk_size, str):
262
- max_chunk_size = int(max_chunk_size) if max_chunk_size.strip() else 4000
263
- elif not isinstance(max_chunk_size, int):
264
- max_chunk_size = 4000 # Default value if not a string or int
265
-
266
- # Handle chunk_overlap
267
- if isinstance(chunk_overlap, str):
268
- chunk_overlap = int(chunk_overlap) if chunk_overlap.strip() else 0
269
- elif not isinstance(chunk_overlap, int):
270
- chunk_overlap = 0 # Default value if not a string or int
271
-
272
- chunk_options = {
273
- 'method': 'chapter',
274
- 'max_size': max_chunk_size,
275
- 'overlap': chunk_overlap,
276
- 'custom_chapter_pattern': custom_chapter_pattern if custom_chapter_pattern else None
277
- }
278
-
279
- if file is None:
280
- log_counter("file_import_error", labels={"error": "No file uploaded"})
281
- return "No file uploaded."
282
-
283
- file_path = file.name
284
- if not os.path.exists(file_path):
285
- log_counter("file_import_error", labels={"error": "File not found", "file_name": file.name})
286
- return "Uploaded file not found."
287
-
288
- start_time = datetime.now()
289
-
290
- if file_path.lower().endswith('.epub'):
291
- status = import_epub(
292
- file_path,
293
- title,
294
- author,
295
- keywords,
296
- custom_prompt=custom_prompt,
297
- system_prompt=system_prompt,
298
- summary=None,
299
- auto_summarize=auto_summarize,
300
- api_name=api_name,
301
- api_key=api_key,
302
- chunk_options=chunk_options,
303
- custom_chapter_pattern=custom_chapter_pattern
304
- )
305
- log_counter("epub_import_success", labels={"file_name": file.name})
306
- result = f"📚 EPUB Imported Successfully:\n{status}"
307
- elif file.name.lower().endswith('.zip'):
308
- status = process_zip_file(
309
- zip_file=file,
310
- title=title,
311
- author=author,
312
- keywords=keywords,
313
- custom_prompt=custom_prompt,
314
- system_prompt=system_prompt,
315
- summary=None,
316
- auto_summarize=auto_summarize,
317
- api_name=api_name,
318
- api_key=api_key,
319
- chunk_options=chunk_options
320
- )
321
- log_counter("zip_import_success", labels={"file_name": file.name})
322
- result = f"📦 ZIP Processed Successfully:\n{status}"
323
- elif file.name.lower().endswith(('.chm', '.html', '.pdf', '.xml', '.opml')):
324
- file_type = file.name.split('.')[-1].upper()
325
- log_counter("unsupported_file_type", labels={"file_type": file_type})
326
- result = f"{file_type} file import is not yet supported."
327
- else:
328
- log_counter("unsupported_file_type", labels={"file_type": file.name.split('.')[-1]})
329
- result = "❌ Unsupported file type. Please upload an `.epub` file or a `.zip` file containing `.epub` files."
330
-
331
- end_time = datetime.now()
332
- processing_time = (end_time - start_time).total_seconds()
333
- log_histogram("file_import_duration", processing_time, labels={"file_name": file.name})
334
-
335
- return result
336
-
337
- except ValueError as ve:
338
- logging.exception(f"Error parsing input values: {str(ve)}")
339
- log_counter("file_import_error", labels={"error": "Invalid input", "file_name": file.name})
340
- return f"❌ Error: Invalid input for chunk size or overlap. Please enter valid numbers."
341
- except Exception as e:
342
- logging.exception(f"Error during file import: {str(e)}")
343
- log_counter("file_import_error", labels={"error": str(e), "file_name": file.name})
344
- return f"❌ Error during import: {str(e)}"
345
-
346
-
347
- def read_epub(file_path):
348
- """
349
- Reads and extracts text from an EPUB file.
350
-
351
- Parameters:
352
- - file_path (str): Path to the EPUB file.
353
-
354
- Returns:
355
- - str: Extracted text content from the EPUB.
356
- """
357
- try:
358
- logging.info(f"Reading EPUB file from {file_path}")
359
- book = epub.read_epub(file_path)
360
- chapters = []
361
- for item in book.get_items():
362
- if item.get_type() == ebooklib.ITEM_DOCUMENT:
363
- chapters.append(item.get_content())
364
-
365
- text = ""
366
- for html_content in chapters:
367
- soup = BeautifulSoup(html_content, 'html.parser')
368
- text += soup.get_text(separator='\n\n') + "\n\n"
369
- logging.debug("EPUB content extraction completed.")
370
- return text
371
- except Exception as e:
372
- logging.exception(f"Error reading EPUB file: {str(e)}")
373
- raise
374
-
375
-
376
- # Ingest a text file into the database with Title/Author/Keywords
377
- def extract_epub_metadata(content):
378
- title_match = re.search(r'Title:\s*(.*?)\n', content)
379
- author_match = re.search(r'Author:\s*(.*?)\n', content)
380
-
381
- title = title_match.group(1) if title_match else None
382
- author = author_match.group(1) if author_match else None
383
-
384
- return title, author
385
-
386
-
387
- def ingest_text_file(file_path, title=None, author=None, keywords=None):
388
- """
389
- Ingests a plain text file into the database with optional metadata.
390
-
391
- Parameters:
392
- - file_path (str): Path to the text file.
393
- - title (str, optional): Title of the document.
394
- - author (str, optional): Author of the document.
395
- - keywords (str, optional): Comma-separated keywords.
396
-
397
- Returns:
398
- - str: Status message indicating success or failure.
399
- """
400
- try:
401
- with open(file_path, 'r', encoding='utf-8') as file:
402
- content = file.read()
403
-
404
- # Check if it's a converted epub and extract metadata if so
405
- if 'epub_converted' in (keywords or '').lower():
406
- extracted_title, extracted_author = extract_epub_metadata(content)
407
- title = title or extracted_title
408
- author = author or extracted_author
409
- logging.debug(f"Extracted metadata for converted EPUB - Title: {title}, Author: {author}")
410
-
411
- # If title is still not provided, use the filename without extension
412
- if not title:
413
- title = os.path.splitext(os.path.basename(file_path))[0]
414
-
415
- # If author is still not provided, set it to 'Unknown'
416
- if not author:
417
- author = 'Unknown'
418
-
419
- # If keywords are not provided, use a default keyword
420
- if not keywords:
421
- keywords = 'text_file,epub_converted'
422
- else:
423
- keywords = f'text_file,epub_converted,{keywords}'
424
-
425
- # Add the text file to the database
426
- add_media_with_keywords(
427
- url=file_path,
428
- title=title,
429
- media_type='document',
430
- content=content,
431
- keywords=keywords,
432
- prompt='No prompt for text files',
433
- summary='No summary for text files',
434
- transcription_model='None',
435
- author=author,
436
- ingestion_date=datetime.now().strftime('%Y-%m-%d')
437
- )
438
-
439
- logging.info(f"Text file '{title}' by {author} ingested successfully.")
440
- return f"Text file '{title}' by {author} ingested successfully."
441
- except Exception as e:
442
- logging.error(f"Error ingesting text file: {str(e)}")
443
- return f"Error ingesting text file: {str(e)}"
444
-
445
-
446
- def ingest_folder(folder_path, keywords=None):
447
- """
448
- Ingests all text files within a specified folder.
449
-
450
- Parameters:
451
- - folder_path (str): Path to the folder containing text files.
452
- - keywords (str, optional): Comma-separated keywords to add to each file.
453
-
454
- Returns:
455
- - str: Combined status messages for all ingested text files.
456
- """
457
- results = []
458
- try:
459
- logging.info(f"Ingesting all text files from folder {folder_path}")
460
- for filename in os.listdir(folder_path):
461
- if filename.lower().endswith('.txt'):
462
- file_path = os.path.join(folder_path, filename)
463
- result = ingest_text_file(file_path, keywords=keywords)
464
- results.append(result)
465
- logging.info("Completed ingestion of all text files in the folder.")
466
- except Exception as e:
467
- logging.exception(f"Error ingesting folder: {str(e)}")
468
- return f"Error ingesting folder: {str(e)}"
469
-
470
- return "\n".join(results)
471
-
472
-
473
- def epub_to_markdown(epub_path):
474
- """
475
- Converts an EPUB file to Markdown format, including the table of contents and chapter contents.
476
-
477
- Parameters:
478
- - epub_path (str): Path to the EPUB file.
479
-
480
- Returns:
481
- - str: Markdown-formatted content of the EPUB.
482
- """
483
- try:
484
- logging.info(f"Converting EPUB to Markdown from {epub_path}")
485
- book = epub.read_epub(epub_path)
486
- markdown_content = "# Table of Contents\n\n"
487
- chapters = []
488
-
489
- # Extract and format the table of contents
490
- toc = book.toc
491
- for item in toc:
492
- if isinstance(item, tuple):
493
- section, children = item
494
- level = 1
495
- markdown_content += format_toc_item(section, level)
496
- for child in children:
497
- markdown_content += format_toc_item(child, level + 1)
498
- else:
499
- markdown_content += format_toc_item(item, 1)
500
-
501
- markdown_content += "\n---\n\n"
502
-
503
- # Process each chapter
504
- for item in book.get_items():
505
- if item.get_type() == ebooklib.ITEM_DOCUMENT:
506
- chapter_content = item.get_content().decode('utf-8')
507
- soup = BeautifulSoup(chapter_content, 'html.parser')
508
-
509
- # Extract chapter title
510
- title = soup.find(['h1', 'h2', 'h3'])
511
- if title:
512
- chapter_title = title.get_text()
513
- markdown_content += f"# {chapter_title}\n\n"
514
-
515
- # Process chapter content
516
- for elem in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol']):
517
- if elem.name.startswith('h'):
518
- level = int(elem.name[1])
519
- markdown_content += f"{'#' * level} {elem.get_text()}\n\n"
520
- elif elem.name == 'p':
521
- markdown_content += f"{elem.get_text()}\n\n"
522
- elif elem.name in ['ul', 'ol']:
523
- for li in elem.find_all('li'):
524
- prefix = '-' if elem.name == 'ul' else '1.'
525
- markdown_content += f"{prefix} {li.get_text()}\n"
526
- markdown_content += "\n"
527
-
528
- markdown_content += "---\n\n"
529
-
530
- logging.debug("EPUB to Markdown conversion completed.")
531
- return markdown_content
532
-
533
- except Exception as e:
534
- logging.exception(f"Error converting EPUB to Markdown: {str(e)}")
535
- raise
536
-
537
-
538
- def format_toc_item(item, level):
539
- """
540
- Formats a table of contents item into Markdown list format.
541
-
542
- Parameters:
543
- - item (epub.Link or epub.Section): TOC item.
544
- - level (int): Heading level for indentation.
545
-
546
- Returns:
547
- - str: Markdown-formatted TOC item.
548
- """
549
- try:
550
- if isinstance(item, epub.Link):
551
- title = item.title
552
- elif isinstance(item, epub.Section):
553
- title = item.title
554
- else:
555
- title = str(item)
556
-
557
- return f"{' ' * (level - 1)}- [{title}](#{slugify(title)})\n"
558
- except Exception as e:
559
- logging.exception(f"Error formatting TOC item: {str(e)}")
560
- return ""
561
-
562
-
563
- def slugify(text):
564
- """
565
- Converts a string into a slug suitable for Markdown links.
566
-
567
- Parameters:
568
- - text (str): The text to slugify.
569
-
570
- Returns:
571
- - str: Slugified text.
572
- """
573
- return re.sub(r'[\W_]+', '-', text.lower()).strip('-')
574
-
575
- #
576
- # End of Function Definitions
577
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Books/__init__.py DELETED
File without changes
App_Function_Libraries/Character_Chat/Character_Chat_Lib.py DELETED
@@ -1,607 +0,0 @@
1
- # Character_Chat_Lib.py
2
- # Description: Functions for character chat cards.
3
- #
4
- # Imports
5
- import json
6
- import logging
7
- import io
8
- import base64
9
- import time
10
- from typing import Dict, Any, Optional, List, Tuple
11
- #
12
- # External Imports
13
- from PIL import Image
14
- #
15
- # Local imports
16
- from App_Function_Libraries.DB.DB_Manager import get_character_card_by_id, get_character_chat_by_id
17
- from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
18
- #
19
- # Constants
20
- ####################################################################################################
21
- #
22
- # Functions
23
-
24
- # Using https://github.com/malfoyslastname/character-card-spec-v2 as the standard for v2 character cards
25
-
26
- #################################################################################
27
- #
28
- # Placeholder functions:
29
-
30
- def replace_placeholders(text: str, char_name: str, user_name: str) -> str:
31
- """
32
- Replace placeholders in the given text with appropriate values.
33
-
34
- Args:
35
- text (str): The text containing placeholders.
36
- char_name (str): The name of the character.
37
- user_name (str): The name of the user.
38
-
39
- Returns:
40
- str: The text with placeholders replaced.
41
- """
42
- replacements = {
43
- '{{char}}': char_name,
44
- '{{user}}': user_name,
45
- '{{random_user}}': user_name # Assuming random_user is the same as user for simplicity
46
- }
47
-
48
- for placeholder, value in replacements.items():
49
- text = text.replace(placeholder, value)
50
-
51
- return text
52
-
53
- def replace_user_placeholder(history, user_name):
54
- """
55
- Replaces all instances of '{{user}}' in the chat history with the actual user name.
56
-
57
- Args:
58
- history (list): The current chat history as a list of tuples (user_message, bot_message).
59
- user_name (str): The name entered by the user.
60
-
61
- Returns:
62
- list: Updated chat history with placeholders replaced.
63
- """
64
- if not user_name:
65
- user_name = "User" # Default name if none provided
66
-
67
- updated_history = []
68
- for user_msg, bot_msg in history:
69
- # Replace in user message
70
- if user_msg:
71
- user_msg = user_msg.replace("{{user}}", user_name)
72
- # Replace in bot message
73
- if bot_msg:
74
- bot_msg = bot_msg.replace("{{user}}", user_name)
75
- updated_history.append((user_msg, bot_msg))
76
- return updated_history
77
-
78
- #
79
- # End of Placeholder functions
80
- #################################################################################
81
-
82
- #################################################################################
83
- #
84
- # Functions for character card processing:
85
-
86
- def extract_character_id(choice: str) -> int:
87
- """Extract the character ID from the dropdown selection string."""
88
- log_counter("extract_character_id_attempt")
89
- try:
90
- character_id = int(choice.split('(ID: ')[1].rstrip(')'))
91
- log_counter("extract_character_id_success")
92
- return character_id
93
- except Exception as e:
94
- log_counter("extract_character_id_error", labels={"error": str(e)})
95
- raise
96
-
97
- def load_character_wrapper(character_id: int, user_name: str) -> Tuple[Dict[str, Any], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
98
- """Wrapper function to load character and image using the extracted ID."""
99
- log_counter("load_character_wrapper_attempt")
100
- start_time = time.time()
101
- try:
102
- char_data, chat_history, img = load_character_and_image(character_id, user_name)
103
- load_duration = time.time() - start_time
104
- log_histogram("load_character_wrapper_duration", load_duration)
105
- log_counter("load_character_wrapper_success")
106
- return char_data, chat_history, img
107
- except Exception as e:
108
- log_counter("load_character_wrapper_error", labels={"error": str(e)})
109
- raise
110
-
111
- def parse_character_book(book_data: Dict[str, Any]) -> Dict[str, Any]:
112
- """
113
- Parse the character book data from a V2 character card.
114
-
115
- Args:
116
- book_data (Dict[str, Any]): The raw character book data from the character card.
117
-
118
- Returns:
119
- Dict[str, Any]: The parsed and structured character book data.
120
- """
121
- parsed_book = {
122
- 'name': book_data.get('name', ''),
123
- 'description': book_data.get('description', ''),
124
- 'scan_depth': book_data.get('scan_depth'),
125
- 'token_budget': book_data.get('token_budget'),
126
- 'recursive_scanning': book_data.get('recursive_scanning', False),
127
- 'extensions': book_data.get('extensions', {}),
128
- 'entries': []
129
- }
130
-
131
- for entry in book_data.get('entries', []):
132
- parsed_entry = {
133
- 'keys': entry['keys'],
134
- 'content': entry['content'],
135
- 'extensions': entry.get('extensions', {}),
136
- 'enabled': entry['enabled'],
137
- 'insertion_order': entry['insertion_order'],
138
- 'case_sensitive': entry.get('case_sensitive', False),
139
- 'name': entry.get('name', ''),
140
- 'priority': entry.get('priority'),
141
- 'id': entry.get('id'),
142
- 'comment': entry.get('comment', ''),
143
- 'selective': entry.get('selective', False),
144
- 'secondary_keys': entry.get('secondary_keys', []),
145
- 'constant': entry.get('constant', False),
146
- 'position': entry.get('position')
147
- }
148
- parsed_book['entries'].append(parsed_entry)
149
-
150
- return parsed_book
151
-
152
- def load_character_and_image(character_id: int, user_name: str) -> Tuple[Optional[Dict[str, Any]], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
153
- """
154
- Load a character and its associated image based on the character ID.
155
-
156
- Args:
157
- character_id (int): The ID of the character to load.
158
- user_name (str): The name of the user, used for placeholder replacement.
159
-
160
- Returns:
161
- Tuple[Optional[Dict[str, Any]], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
162
- A tuple containing the character data, chat history, and character image (if available).
163
- """
164
- log_counter("load_character_and_image_attempt")
165
- start_time = time.time()
166
- try:
167
- char_data = get_character_card_by_id(character_id)
168
- if not char_data:
169
- log_counter("load_character_and_image_no_data")
170
- logging.warning(f"No character data found for ID: {character_id}")
171
- return None, [], None
172
-
173
- # Replace placeholders in character data
174
- for field in ['first_mes', 'mes_example', 'scenario', 'description', 'personality']:
175
- if field in char_data:
176
- char_data[field] = replace_placeholders(char_data[field], char_data['name'], user_name)
177
-
178
- # Replace placeholders in first_mes
179
- first_mes = char_data.get('first_mes', "Hello! I'm ready to chat.")
180
- first_mes = replace_placeholders(first_mes, char_data['name'], user_name)
181
-
182
- chat_history = [(None, first_mes)] if first_mes else []
183
-
184
- img = None
185
- if char_data.get('image'):
186
- try:
187
- image_data = base64.b64decode(char_data['image'])
188
- img = Image.open(io.BytesIO(image_data)).convert("RGBA")
189
- log_counter("load_character_image_success")
190
- except Exception as e:
191
- log_counter("load_character_image_error", labels={"error": str(e)})
192
- logging.error(f"Error processing image for character '{char_data['name']}': {e}")
193
-
194
- load_duration = time.time() - start_time
195
- log_histogram("load_character_and_image_duration", load_duration)
196
- log_counter("load_character_and_image_success")
197
- return char_data, chat_history, img
198
-
199
- except Exception as e:
200
- log_counter("load_character_and_image_error", labels={"error": str(e)})
201
- logging.error(f"Error in load_character_and_image: {e}")
202
- return None, [], None
203
-
204
- def load_chat_and_character(chat_id: int, user_name: str) -> Tuple[Optional[Dict[str, Any]], List[Tuple[str, str]], Optional[Image.Image]]:
205
- """
206
- Load a chat and its associated character, including the character image and process templates.
207
-
208
- Args:
209
- chat_id (int): The ID of the chat to load.
210
- user_name (str): The name of the user.
211
-
212
- Returns:
213
- Tuple[Optional[Dict[str, Any]], List[Tuple[str, str]], Optional[Image.Image]]:
214
- A tuple containing the character data, processed chat history, and character image (if available).
215
- """
216
- log_counter("load_chat_and_character_attempt")
217
- start_time = time.time()
218
- try:
219
- # Load the chat
220
- chat = get_character_chat_by_id(chat_id)
221
- if not chat:
222
- log_counter("load_chat_and_character_no_chat")
223
- logging.warning(f"No chat found with ID: {chat_id}")
224
- return None, [], None
225
-
226
- # Load the associated character
227
- character_id = chat['character_id']
228
- char_data = get_character_card_by_id(character_id)
229
- if not char_data:
230
- log_counter("load_chat_and_character_no_character")
231
- logging.warning(f"No character found for chat ID: {chat_id}")
232
- return None, chat['chat_history'], None
233
-
234
- # Process the chat history
235
- processed_history = process_chat_history(chat['chat_history'], char_data['name'], user_name)
236
-
237
- # Load the character image
238
- img = None
239
- if char_data.get('image'):
240
- try:
241
- image_data = base64.b64decode(char_data['image'])
242
- img = Image.open(io.BytesIO(image_data)).convert("RGBA")
243
- log_counter("load_chat_character_image_success")
244
- except Exception as e:
245
- log_counter("load_chat_character_image_error", labels={"error": str(e)})
246
- logging.error(f"Error processing image for character '{char_data['name']}': {e}")
247
-
248
- # Process character data templates
249
- for field in ['first_mes', 'mes_example', 'scenario', 'description', 'personality']:
250
- if field in char_data:
251
- char_data[field] = replace_placeholders(char_data[field], char_data['name'], user_name)
252
-
253
- load_duration = time.time() - start_time
254
- log_histogram("load_chat_and_character_duration", load_duration)
255
- log_counter("load_chat_and_character_success")
256
- return char_data, processed_history, img
257
-
258
- except Exception as e:
259
- log_counter("load_chat_and_character_error", labels={"error": str(e)})
260
- logging.error(f"Error in load_chat_and_character: {e}")
261
- return None, [], None
262
-
263
-
264
- def extract_json_from_image(image_file):
265
- logging.debug(f"Attempting to extract JSON from image: {image_file.name}")
266
- log_counter("extract_json_from_image_attempt")
267
- start_time = time.time()
268
- try:
269
- with Image.open(image_file) as img:
270
- logging.debug("Image opened successfully")
271
- metadata = img.info
272
- if 'chara' in metadata:
273
- logging.debug("Found 'chara' in image metadata")
274
- chara_content = metadata['chara']
275
- logging.debug(f"Content of 'chara' metadata (first 100 chars): {chara_content[:100]}...")
276
- try:
277
- decoded_content = base64.b64decode(chara_content).decode('utf-8')
278
- logging.debug(f"Decoded content (first 100 chars): {decoded_content[:100]}...")
279
- log_counter("extract_json_from_image_metadata_success")
280
- return decoded_content
281
- except Exception as e:
282
- logging.error(f"Error decoding base64 content: {e}")
283
- log_counter("extract_json_from_image_decode_error", labels={"error": str(e)})
284
-
285
- logging.warning("'chara' not found in metadata, attempting to find JSON data in image bytes")
286
- # Alternative method to extract embedded JSON from image bytes if metadata is not available
287
- img_byte_arr = io.BytesIO()
288
- img.save(img_byte_arr, format='PNG')
289
- img_bytes = img_byte_arr.getvalue()
290
- img_str = img_bytes.decode('latin1')
291
-
292
- # Search for JSON-like structures in the image bytes
293
- json_start = img_str.find('{')
294
- json_end = img_str.rfind('}')
295
- if json_start != -1 and json_end != -1 and json_end > json_start:
296
- possible_json = img_str[json_start:json_end+1]
297
- try:
298
- json.loads(possible_json)
299
- logging.debug("Found JSON data in image bytes")
300
- log_counter("extract_json_from_image_bytes_success")
301
- return possible_json
302
- except json.JSONDecodeError:
303
- logging.debug("No valid JSON found in image bytes")
304
- log_counter("extract_json_from_image_invalid_json")
305
-
306
- logging.warning("No JSON data found in the image")
307
- log_counter("extract_json_from_image_no_json_found")
308
- except Exception as e:
309
- log_counter("extract_json_from_image_error", labels={"error": str(e)})
310
- logging.error(f"Error extracting JSON from image: {e}")
311
-
312
- extract_duration = time.time() - start_time
313
- log_histogram("extract_json_from_image_duration", extract_duration)
314
- return None
315
-
316
-
317
- def load_chat_history(file):
318
- log_counter("load_chat_history_attempt")
319
- start_time = time.time()
320
- try:
321
- content = file.read().decode('utf-8')
322
- chat_data = json.loads(content)
323
-
324
- # Extract history and character name from the loaded data
325
- history = chat_data.get('history') or chat_data.get('messages')
326
- character_name = chat_data.get('character') or chat_data.get('character_name')
327
-
328
- if not history or not character_name:
329
- log_counter("load_chat_history_incomplete_data")
330
- logging.error("Chat history or character name missing in the imported file.")
331
- return None, None
332
-
333
- load_duration = time.time() - start_time
334
- log_histogram("load_chat_history_duration", load_duration)
335
- log_counter("load_chat_history_success")
336
- return history, character_name
337
- except Exception as e:
338
- log_counter("load_chat_history_error", labels={"error": str(e)})
339
- logging.error(f"Error loading chat history: {e}")
340
- return None, None
341
-
342
-
343
- def process_chat_history(chat_history: List[Tuple[str, str]], char_name: str, user_name: str) -> List[Tuple[str, str]]:
344
- """
345
- Process the chat history to replace placeholders in both user and character messages.
346
-
347
- Args:
348
- chat_history (List[Tuple[str, str]]): The chat history.
349
- char_name (str): The name of the character.
350
- user_name (str): The name of the user.
351
-
352
- Returns:
353
- List[Tuple[str, str]]: The processed chat history.
354
- """
355
- log_counter("process_chat_history_attempt")
356
- start_time = time.time()
357
- try:
358
- processed_history = []
359
- for user_msg, char_msg in chat_history:
360
- if user_msg:
361
- user_msg = replace_placeholders(user_msg, char_name, user_name)
362
- if char_msg:
363
- char_msg = replace_placeholders(char_msg, char_name, user_name)
364
- processed_history.append((user_msg, char_msg))
365
-
366
- process_duration = time.time() - start_time
367
- log_histogram("process_chat_history_duration", process_duration)
368
- log_counter("process_chat_history_success", labels={"message_count": len(chat_history)})
369
- return processed_history
370
- except Exception as e:
371
- log_counter("process_chat_history_error", labels={"error": str(e)})
372
- logging.error(f"Error processing chat history: {e}")
373
- raise
374
-
375
- def validate_character_book(book_data):
376
- """
377
- Validate the 'character_book' field in the character card.
378
-
379
- Args:
380
- book_data (dict): The character book data.
381
-
382
- Returns:
383
- Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
384
- """
385
- validation_messages = []
386
-
387
- # Optional fields with expected types
388
- optional_fields = {
389
- 'name': str,
390
- 'description': str,
391
- 'scan_depth': (int, float),
392
- 'token_budget': (int, float),
393
- 'recursive_scanning': bool,
394
- 'extensions': dict,
395
- 'entries': list
396
- }
397
-
398
- for field, expected_type in optional_fields.items():
399
- if field in book_data:
400
- if not isinstance(book_data[field], expected_type):
401
- validation_messages.append(f"Field 'character_book.{field}' must be of type '{expected_type}'.")
402
- # 'entries' is required
403
- if 'entries' not in book_data or not isinstance(book_data['entries'], list):
404
- validation_messages.append("Field 'character_book.entries' is required and must be a list.")
405
- return False, validation_messages
406
-
407
- # Validate each entry in 'entries'
408
- entries = book_data.get('entries', [])
409
- entry_ids = set()
410
- for idx, entry in enumerate(entries):
411
- is_valid_entry, entry_messages = validate_character_book_entry(entry, idx, entry_ids)
412
- if not is_valid_entry:
413
- validation_messages.extend(entry_messages)
414
-
415
- is_valid = len(validation_messages) == 0
416
- return is_valid, validation_messages
417
-
418
- def validate_character_book_entry(entry, idx, entry_ids):
419
- """
420
- Validate an entry in the 'character_book.entries' list.
421
-
422
- Args:
423
- entry (dict): The entry data.
424
- idx (int): The index of the entry in the list.
425
- entry_ids (set): A set of existing entry IDs for uniqueness checking.
426
-
427
- Returns:
428
- Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
429
- """
430
- validation_messages = []
431
- required_fields = {
432
- 'keys': list,
433
- 'content': str,
434
- 'extensions': dict,
435
- 'enabled': bool,
436
- 'insertion_order': (int, float)
437
- }
438
-
439
- for field, expected_type in required_fields.items():
440
- if field not in entry:
441
- validation_messages.append(f"Entry {idx}: Missing required field '{field}'.")
442
- elif not isinstance(entry[field], expected_type):
443
- validation_messages.append(f"Entry {idx}: Field '{field}' must be of type '{expected_type}'.")
444
- elif field == 'content' and not entry[field].strip():
445
- validation_messages.append(f"Entry {idx}: Field 'content' cannot be empty.")
446
- elif field == 'keys' and not entry[field]:
447
- validation_messages.append(f"Entry {idx}: Field 'keys' cannot be empty.")
448
-
449
- # Optional fields
450
- optional_fields = {
451
- 'case_sensitive': bool,
452
- 'name': str,
453
- 'priority': (int, float),
454
- 'id': (int, float),
455
- 'comment': str,
456
- 'selective': bool,
457
- 'secondary_keys': list,
458
- 'constant': bool,
459
- 'position': str # Should be 'before_char' or 'after_char'
460
- }
461
-
462
- for field, expected_type in optional_fields.items():
463
- if field in entry and not isinstance(entry[field], expected_type):
464
- validation_messages.append(f"Entry {idx}: Field '{field}' must be of type '{expected_type}'.")
465
-
466
- # Validate 'position' value if present
467
- if 'position' in entry:
468
- if entry['position'] not in ['before_char', 'after_char']:
469
- validation_messages.append(f"Entry {idx}: Field 'position' must be 'before_char' or 'after_char'.")
470
-
471
- # Validate 'secondary_keys' if 'selective' is True
472
- if entry.get('selective', False):
473
- if 'secondary_keys' not in entry or not isinstance(entry['secondary_keys'], list):
474
- validation_messages.append(f"Entry {idx}: 'secondary_keys' must be a list when 'selective' is True.")
475
- elif not entry['secondary_keys']:
476
- validation_messages.append(f"Entry {idx}: 'secondary_keys' cannot be empty when 'selective' is True.")
477
-
478
- # Validate 'keys' list elements
479
- if 'keys' in entry and isinstance(entry['keys'], list):
480
- for i, key in enumerate(entry['keys']):
481
- if not isinstance(key, str) or not key.strip():
482
- validation_messages.append(f"Entry {idx}: Element {i} in 'keys' must be a non-empty string.")
483
-
484
- # Validate 'secondary_keys' list elements
485
- if 'secondary_keys' in entry and isinstance(entry['secondary_keys'], list):
486
- for i, key in enumerate(entry['secondary_keys']):
487
- if not isinstance(key, str) or not key.strip():
488
- validation_messages.append(f"Entry {idx}: Element {i} in 'secondary_keys' must be a non-empty string.")
489
-
490
- # Validate 'id' uniqueness
491
- if 'id' in entry:
492
- entry_id = entry['id']
493
- if entry_id in entry_ids:
494
- validation_messages.append \
495
- (f"Entry {idx}: Duplicate 'id' value '{entry_id}'. Each entry 'id' must be unique.")
496
- else:
497
- entry_ids.add(entry_id)
498
-
499
- # Validate 'extensions' keys are namespaced
500
- if 'extensions' in entry and isinstance(entry['extensions'], dict):
501
- for key in entry['extensions'].keys():
502
- if '/' not in key and '_' not in key:
503
- validation_messages.append \
504
- (f"Entry {idx}: Extension key '{key}' in 'extensions' should be namespaced to prevent conflicts.")
505
-
506
- is_valid = len(validation_messages) == 0
507
- return is_valid, validation_messages
508
-
509
- def validate_v2_card(card_data):
510
- """
511
- Validate a character card according to the V2 specification.
512
-
513
- Args:
514
- card_data (dict): The parsed character card data.
515
-
516
- Returns:
517
- Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
518
- """
519
- validation_messages = []
520
-
521
- # Check top-level fields
522
- if 'spec' not in card_data:
523
- validation_messages.append("Missing 'spec' field.")
524
- elif card_data['spec'] != 'chara_card_v2':
525
- validation_messages.append(f"Invalid 'spec' value: {card_data['spec']}. Expected 'chara_card_v2'.")
526
-
527
- if 'spec_version' not in card_data:
528
- validation_messages.append("Missing 'spec_version' field.")
529
- else:
530
- # Ensure 'spec_version' is '2.0' or higher
531
- try:
532
- spec_version = float(card_data['spec_version'])
533
- if spec_version < 2.0:
534
- validation_messages.append \
535
- (f"'spec_version' must be '2.0' or higher. Found '{card_data['spec_version']}'.")
536
- except ValueError:
537
- validation_messages.append \
538
- (f"Invalid 'spec_version' format: {card_data['spec_version']}. Must be a number as a string.")
539
-
540
- if 'data' not in card_data:
541
- validation_messages.append("Missing 'data' field.")
542
- return False, validation_messages # Cannot proceed without 'data' field
543
-
544
- data = card_data['data']
545
-
546
- # Required fields in 'data'
547
- required_fields = ['name', 'description', 'personality', 'scenario', 'first_mes', 'mes_example']
548
- for field in required_fields:
549
- if field not in data:
550
- validation_messages.append(f"Missing required field in 'data': '{field}'.")
551
- elif not isinstance(data[field], str):
552
- validation_messages.append(f"Field '{field}' must be a string.")
553
- elif not data[field].strip():
554
- validation_messages.append(f"Field '{field}' cannot be empty.")
555
-
556
- # Optional fields with expected types
557
- optional_fields = {
558
- 'creator_notes': str,
559
- 'system_prompt': str,
560
- 'post_history_instructions': str,
561
- 'alternate_greetings': list,
562
- 'tags': list,
563
- 'creator': str,
564
- 'character_version': str,
565
- 'extensions': dict,
566
- 'character_book': dict # If present, should be a dict
567
- }
568
-
569
- for field, expected_type in optional_fields.items():
570
- if field in data:
571
- if not isinstance(data[field], expected_type):
572
- validation_messages.append(f"Field '{field}' must be of type '{expected_type.__name__}'.")
573
- elif field == 'extensions':
574
- # Validate that extensions keys are properly namespaced
575
- for key in data[field].keys():
576
- if '/' not in key and '_' not in key:
577
- validation_messages.append \
578
- (f"Extension key '{key}' in 'extensions' should be namespaced to prevent conflicts.")
579
-
580
- # If 'alternate_greetings' is present, check that it's a list of non-empty strings
581
- if 'alternate_greetings' in data and isinstance(data['alternate_greetings'], list):
582
- for idx, greeting in enumerate(data['alternate_greetings']):
583
- if not isinstance(greeting, str) or not greeting.strip():
584
- validation_messages.append(f"Element {idx} in 'alternate_greetings' must be a non-empty string.")
585
-
586
- # If 'tags' is present, check that it's a list of non-empty strings
587
- if 'tags' in data and isinstance(data['tags'], list):
588
- for idx, tag in enumerate(data['tags']):
589
- if not isinstance(tag, str) or not tag.strip():
590
- validation_messages.append(f"Element {idx} in 'tags' must be a non-empty string.")
591
-
592
- # Validate 'extensions' field
593
- if 'extensions' in data and not isinstance(data['extensions'], dict):
594
- validation_messages.append("Field 'extensions' must be a dictionary.")
595
-
596
- # Validate 'character_book' if present
597
- if 'character_book' in data:
598
- is_valid_book, book_messages = validate_character_book(data['character_book'])
599
- if not is_valid_book:
600
- validation_messages.extend(book_messages)
601
-
602
- is_valid = len(validation_messages) == 0
603
- return is_valid, validation_messages
604
-
605
- #
606
- # End of File
607
- ####################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Character_Chat/__init__.py DELETED
File without changes
App_Function_Libraries/Chat.py DELETED
@@ -1,439 +0,0 @@
1
- # Chat.py
2
- # Chat functions for interacting with the LLMs as chatbots
3
- import base64
4
- # Imports
5
- import json
6
- import logging
7
- import os
8
- import re
9
- import tempfile
10
- import time
11
- from datetime import datetime
12
- from pathlib import Path
13
- #
14
- # External Imports
15
- #
16
- # Local Imports
17
- from App_Function_Libraries.DB.DB_Manager import get_conversation_name, save_chat_history_to_database
18
- from App_Function_Libraries.LLM_API_Calls import chat_with_openai, chat_with_anthropic, chat_with_cohere, \
19
- chat_with_groq, chat_with_openrouter, chat_with_deepseek, chat_with_mistral, chat_with_huggingface
20
- from App_Function_Libraries.LLM_API_Calls_Local import chat_with_aphrodite, chat_with_local_llm, chat_with_ollama, \
21
- chat_with_kobold, chat_with_llama, chat_with_oobabooga, chat_with_tabbyapi, chat_with_vllm, chat_with_custom_openai
22
- from App_Function_Libraries.DB.SQLite_DB import load_media_content
23
- from App_Function_Libraries.Utils.Utils import generate_unique_filename, load_and_log_configs
24
- from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
25
- #
26
- ####################################################################################################
27
- #
28
- # Functions:
29
-
30
- def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message=None):
31
- log_counter("chat_api_call_attempt", labels={"api_endpoint": api_endpoint})
32
- start_time = time.time()
33
- if not api_key:
34
- api_key = None
35
- model = None
36
- try:
37
- logging.info(f"Debug - Chat API Call - API Endpoint: {api_endpoint}")
38
- logging.info(f"Debug - Chat API Call - API Key: {api_key}")
39
- logging.info(f"Debug - Chat chat_api_call - API Endpoint: {api_endpoint}")
40
- if api_endpoint.lower() == 'openai':
41
- response = chat_with_openai(api_key, input_data, prompt, temp, system_message)
42
-
43
- elif api_endpoint.lower() == 'anthropic':
44
- # Retrieve the model from config
45
- loaded_config_data = load_and_log_configs()
46
- model = loaded_config_data['models']['anthropic'] if loaded_config_data else None
47
- response = chat_with_anthropic(
48
- api_key=api_key,
49
- input_data=input_data,
50
- model=model,
51
- custom_prompt_arg=prompt,
52
- system_prompt=system_message
53
- )
54
-
55
- elif api_endpoint.lower() == "cohere":
56
- response = chat_with_cohere(
57
- api_key,
58
- input_data,
59
- model=model,
60
- custom_prompt_arg=prompt,
61
- system_prompt=system_message,
62
- temp=temp
63
- )
64
-
65
- elif api_endpoint.lower() == "groq":
66
- response = chat_with_groq(api_key, input_data, prompt, temp, system_message)
67
-
68
- elif api_endpoint.lower() == "openrouter":
69
- response = chat_with_openrouter(api_key, input_data, prompt, temp, system_message)
70
-
71
- elif api_endpoint.lower() == "deepseek":
72
- response = chat_with_deepseek(api_key, input_data, prompt, temp, system_message)
73
-
74
- elif api_endpoint.lower() == "mistral":
75
- response = chat_with_mistral(api_key, input_data, prompt, temp, system_message)
76
-
77
- elif api_endpoint.lower() == "llama.cpp":
78
- response = chat_with_llama(input_data, prompt, temp, None, api_key, system_message)
79
- elif api_endpoint.lower() == "kobold":
80
- response = chat_with_kobold(input_data, api_key, prompt, temp, system_message)
81
-
82
- elif api_endpoint.lower() == "ooba":
83
- response = chat_with_oobabooga(input_data, api_key, prompt, temp, system_message)
84
-
85
- elif api_endpoint.lower() == "tabbyapi":
86
- response = chat_with_tabbyapi(input_data, prompt, temp, system_message)
87
-
88
- elif api_endpoint.lower() == "vllm":
89
- response = chat_with_vllm(input_data, prompt, system_message)
90
-
91
- elif api_endpoint.lower() == "local-llm":
92
- response = chat_with_local_llm(input_data, prompt, temp, system_message)
93
-
94
- elif api_endpoint.lower() == "huggingface":
95
- response = chat_with_huggingface(api_key, input_data, prompt, temp) # , system_message)
96
-
97
- elif api_endpoint.lower() == "ollama":
98
- response = chat_with_ollama(input_data, prompt, None, api_key, temp, system_message)
99
-
100
- elif api_endpoint.lower() == "aphrodite":
101
- response = chat_with_aphrodite(input_data, prompt, temp, system_message)
102
-
103
- elif api_endpoint.lower() == "custom-openai-api":
104
- response = chat_with_custom_openai(api_key, input_data, prompt, temp, system_message)
105
-
106
- else:
107
- raise ValueError(f"Unsupported API endpoint: {api_endpoint}")
108
-
109
- call_duration = time.time() - start_time
110
- log_histogram("chat_api_call_duration", call_duration, labels={"api_endpoint": api_endpoint})
111
- log_counter("chat_api_call_success", labels={"api_endpoint": api_endpoint})
112
- return response
113
-
114
- except Exception as e:
115
- log_counter("chat_api_call_error", labels={"api_endpoint": api_endpoint, "error": str(e)})
116
- logging.error(f"Error in chat function: {str(e)}")
117
- return f"An error occurred: {str(e)}"
118
-
119
-
120
- def chat(message, history, media_content, selected_parts, api_endpoint, api_key, prompt, temperature,
121
- system_message=None):
122
- log_counter("chat_attempt", labels={"api_endpoint": api_endpoint})
123
- start_time = time.time()
124
- try:
125
- logging.info(f"Debug - Chat Function - Message: {message}")
126
- logging.info(f"Debug - Chat Function - Media Content: {media_content}")
127
- logging.info(f"Debug - Chat Function - Selected Parts: {selected_parts}")
128
- logging.info(f"Debug - Chat Function - API Endpoint: {api_endpoint}")
129
- # logging.info(f"Debug - Chat Function - Prompt: {prompt}")
130
-
131
- # Ensure selected_parts is a list
132
- if not isinstance(selected_parts, (list, tuple)):
133
- selected_parts = [selected_parts] if selected_parts else []
134
-
135
- # logging.debug(f"Debug - Chat Function - Selected Parts (after check): {selected_parts}")
136
-
137
- # Combine the selected parts of the media content
138
- combined_content = "\n\n".join(
139
- [f"{part.capitalize()}: {media_content.get(part, '')}" for part in selected_parts if part in media_content])
140
- # Print first 500 chars
141
- # logging.debug(f"Debug - Chat Function - Combined Content: {combined_content[:500]}...")
142
-
143
- # Prepare the input for the API
144
- input_data = f"{combined_content}\n\n" if combined_content else ""
145
- for old_message, old_response in history:
146
- input_data += f"{old_message}\nAssistant: {old_response}\n\n"
147
- input_data += f"{message}\n"
148
-
149
- if system_message:
150
- print(f"System message: {system_message}")
151
- logging.debug(f"Debug - Chat Function - System Message: {system_message}")
152
- temperature = float(temperature) if temperature else 0.7
153
- temp = temperature
154
-
155
- logging.debug(f"Debug - Chat Function - Temperature: {temperature}")
156
- logging.debug(f"Debug - Chat Function - API Key: {api_key[:10]}")
157
- logging.debug(f"Debug - Chat Function - Prompt: {prompt}")
158
-
159
- # Use the existing API request code based on the selected endpoint
160
- response = chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message)
161
-
162
- chat_duration = time.time() - start_time
163
- log_histogram("chat_duration", chat_duration, labels={"api_endpoint": api_endpoint})
164
- log_counter("chat_success", labels={"api_endpoint": api_endpoint})
165
- return response
166
- except Exception as e:
167
- log_counter("chat_error", labels={"api_endpoint": api_endpoint, "error": str(e)})
168
- logging.error(f"Error in chat function: {str(e)}")
169
- return f"An error occurred: {str(e)}"
170
-
171
-
172
- def save_chat_history_to_db_wrapper(chatbot, conversation_id, media_content, media_name=None):
173
- log_counter("save_chat_history_to_db_attempt")
174
- start_time = time.time()
175
- logging.info(f"Attempting to save chat history. Media content type: {type(media_content)}")
176
- try:
177
- # Extract the media_id and media_name from the media_content
178
- media_id = None
179
- if isinstance(media_content, dict):
180
- media_id = None
181
- logging.debug(f"Media content keys: {media_content.keys()}")
182
- if 'content' in media_content:
183
- try:
184
- content = media_content['content']
185
- if isinstance(content, str):
186
- content_json = json.loads(content)
187
- elif isinstance(content, dict):
188
- content_json = content
189
- else:
190
- raise ValueError(f"Unexpected content type: {type(content)}")
191
-
192
- # Use the webpage_url as the media_id
193
- media_id = content_json.get('webpage_url')
194
- # Use the title as the media_name
195
- media_name = content_json.get('title')
196
-
197
- logging.info(f"Extracted media_id: {media_id}, media_name: {media_name}")
198
- except json.JSONDecodeError:
199
- logging.error("Failed to decode JSON from media_content['content']")
200
- except Exception as e:
201
- logging.error(f"Error processing media_content: {str(e)}")
202
- else:
203
- logging.warning("'content' key not found in media_content")
204
- else:
205
- logging.warning(f"media_content is not a dictionary. Type: {type(media_content)}")
206
-
207
- if media_id is None:
208
- # If we couldn't find a media_id, we'll use a placeholder
209
- media_id = "unknown_media"
210
- logging.warning(f"Unable to extract media_id from media_content. Using placeholder: {media_id}")
211
-
212
- if media_name is None:
213
- media_name = "Unnamed Media"
214
- logging.warning(f"Unable to extract media_name from media_content. Using placeholder: {media_name}")
215
-
216
- # Generate a unique conversation name using media_id and current timestamp
217
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
218
- conversation_name = f"{media_name}_{timestamp}"
219
-
220
- new_conversation_id = save_chat_history_to_database(chatbot, conversation_id, media_id, media_name,
221
- conversation_name)
222
- save_duration = time.time() - start_time
223
- log_histogram("save_chat_history_to_db_duration", save_duration)
224
- log_counter("save_chat_history_to_db_success")
225
- return new_conversation_id, f"Chat history saved successfully as {conversation_name}!"
226
- except Exception as e:
227
- log_counter("save_chat_history_to_db_error", labels={"error": str(e)})
228
- error_message = f"Failed to save chat history: {str(e)}"
229
- logging.error(error_message, exc_info=True)
230
- return conversation_id, error_message
231
-
232
-
233
- def save_chat_history(history, conversation_id, media_content):
234
- log_counter("save_chat_history_attempt")
235
- start_time = time.time()
236
- try:
237
- content, conversation_name = generate_chat_history_content(history, conversation_id, media_content)
238
-
239
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
240
- safe_conversation_name = re.sub(r'[^a-zA-Z0-9_-]', '_', conversation_name)
241
- base_filename = f"{safe_conversation_name}_{timestamp}.json"
242
-
243
- # Create a temporary file
244
- with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file:
245
- temp_file.write(content)
246
- temp_file_path = temp_file.name
247
-
248
- # Generate a unique filename
249
- unique_filename = generate_unique_filename(os.path.dirname(temp_file_path), base_filename)
250
- final_path = os.path.join(os.path.dirname(temp_file_path), unique_filename)
251
-
252
- # Rename the temporary file to the unique filename
253
- os.rename(temp_file_path, final_path)
254
-
255
- save_duration = time.time() - start_time
256
- log_histogram("save_chat_history_duration", save_duration)
257
- log_counter("save_chat_history_success")
258
- return final_path
259
- except Exception as e:
260
- log_counter("save_chat_history_error", labels={"error": str(e)})
261
- logging.error(f"Error saving chat history: {str(e)}")
262
- return None
263
-
264
-
265
- def generate_chat_history_content(history, conversation_id, media_content):
266
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
267
-
268
- conversation_name = get_conversation_name(conversation_id)
269
-
270
- if not conversation_name:
271
- media_name = extract_media_name(media_content)
272
- if media_name:
273
- conversation_name = f"{media_name}-chat"
274
- else:
275
- conversation_name = f"chat-{timestamp}" # Fallback name
276
-
277
- chat_data = {
278
- "conversation_id": conversation_id,
279
- "conversation_name": conversation_name,
280
- "timestamp": timestamp,
281
- "history": [
282
- {
283
- "role": "user" if i % 2 == 0 else "bot",
284
- "content": msg[0] if isinstance(msg, tuple) else msg
285
- }
286
- for i, msg in enumerate(history)
287
- ]
288
- }
289
-
290
- return json.dumps(chat_data, indent=2), conversation_name
291
-
292
-
293
- def extract_media_name(media_content):
294
- if isinstance(media_content, dict):
295
- content = media_content.get('content', {})
296
- if isinstance(content, str):
297
- try:
298
- content = json.loads(content)
299
- except json.JSONDecodeError:
300
- logging.warning("Failed to parse media_content JSON string")
301
- return None
302
-
303
- # Try to extract title from the content
304
- if isinstance(content, dict):
305
- return content.get('title') or content.get('name')
306
-
307
- logging.warning(f"Unexpected media_content format: {type(media_content)}")
308
- return None
309
-
310
-
311
- def update_chat_content(selected_item, use_content, use_summary, use_prompt, item_mapping):
312
- log_counter("update_chat_content_attempt")
313
- start_time = time.time()
314
- logging.debug(f"Debug - Update Chat Content - Selected Item: {selected_item}\n")
315
- logging.debug(f"Debug - Update Chat Content - Use Content: {use_content}\n\n\n\n")
316
- logging.debug(f"Debug - Update Chat Content - Use Summary: {use_summary}\n\n")
317
- logging.debug(f"Debug - Update Chat Content - Use Prompt: {use_prompt}\n\n")
318
- logging.debug(f"Debug - Update Chat Content - Item Mapping: {item_mapping}\n\n")
319
-
320
- if selected_item and selected_item in item_mapping:
321
- media_id = item_mapping[selected_item]
322
- content = load_media_content(media_id)
323
- selected_parts = []
324
- if use_content and "content" in content:
325
- selected_parts.append("content")
326
- if use_summary and "summary" in content:
327
- selected_parts.append("summary")
328
- if use_prompt and "prompt" in content:
329
- selected_parts.append("prompt")
330
-
331
- # Modified debug print
332
- if isinstance(content, dict):
333
- print(f"Debug - Update Chat Content - Content keys: {list(content.keys())}")
334
- for key, value in content.items():
335
- print(f"Debug - Update Chat Content - {key} (first 500 char): {str(value)[:500]}\n\n\n\n")
336
- else:
337
- print(f"Debug - Update Chat Content - Content(first 500 char): {str(content)[:500]}\n\n\n\n")
338
-
339
- print(f"Debug - Update Chat Content - Selected Parts: {selected_parts}")
340
- update_duration = time.time() - start_time
341
- log_histogram("update_chat_content_duration", update_duration)
342
- log_counter("update_chat_content_success")
343
- return content, selected_parts
344
- else:
345
- log_counter("update_chat_content_error", labels={"error": str("No item selected or item not in mapping")})
346
- print(f"Debug - Update Chat Content - No item selected or item not in mapping")
347
- return {}, []
348
-
349
- #
350
- # End of Chat functions
351
- #######################################################################################################################
352
-
353
-
354
- #######################################################################################################################
355
- #
356
- # Character Card Functions
357
-
358
- CHARACTERS_FILE = Path('.', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
359
-
360
-
361
- def save_character(character_data):
362
- log_counter("save_character_attempt")
363
- start_time = time.time()
364
- characters_file = os.path.join(os.path.dirname(__file__), '..', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
365
- characters_dir = os.path.dirname(characters_file)
366
-
367
- try:
368
- if os.path.exists(characters_file):
369
- with open(characters_file, 'r') as f:
370
- characters = json.load(f)
371
- else:
372
- characters = {}
373
-
374
- char_name = character_data['name']
375
-
376
- # Save the image separately if it exists
377
- if 'image' in character_data:
378
- img_data = base64.b64decode(character_data['image'])
379
- img_filename = f"{char_name.replace(' ', '_')}.png"
380
- img_path = os.path.join(characters_dir, img_filename)
381
- with open(img_path, 'wb') as f:
382
- f.write(img_data)
383
- character_data['image_path'] = os.path.abspath(img_path)
384
- del character_data['image'] # Remove the base64 image data from the JSON
385
-
386
- characters[char_name] = character_data
387
-
388
- with open(characters_file, 'w') as f:
389
- json.dump(characters, f, indent=2)
390
-
391
- save_duration = time.time() - start_time
392
- log_histogram("save_character_duration", save_duration)
393
- log_counter("save_character_success")
394
- logging.info(f"Character '{char_name}' saved successfully.")
395
- except Exception as e:
396
- log_counter("save_character_error", labels={"error": str(e)})
397
- logging.error(f"Error saving character: {str(e)}")
398
-
399
-
400
- def load_characters():
401
- log_counter("load_characters_attempt")
402
- start_time = time.time()
403
- try:
404
- characters_file = os.path.join(os.path.dirname(__file__), '..', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
405
- if os.path.exists(characters_file):
406
- with open(characters_file, 'r') as f:
407
- characters = json.load(f)
408
- logging.debug(f"Loaded {len(characters)} characters from {characters_file}")
409
- load_duration = time.time() - start_time
410
- log_histogram("load_characters_duration", load_duration)
411
- log_counter("load_characters_success", labels={"character_count": len(characters)})
412
- return characters
413
- else:
414
- logging.warning(f"Characters file not found: {characters_file}")
415
- return {}
416
- except Exception as e:
417
- log_counter("load_characters_error", labels={"error": str(e)})
418
- return {}
419
-
420
-
421
-
422
- def get_character_names():
423
- log_counter("get_character_names_attempt")
424
- start_time = time.time()
425
- try:
426
- characters = load_characters()
427
- names = list(characters.keys())
428
- get_names_duration = time.time() - start_time
429
- log_histogram("get_character_names_duration", get_names_duration)
430
- log_counter("get_character_names_success", labels={"name_count": len(names)})
431
- return names
432
- except Exception as e:
433
- log_counter("get_character_names_error", labels={"error": str(e)})
434
- logging.error(f"Error getting character names: {str(e)}")
435
- return []
436
-
437
- #
438
- # End of Chat.py
439
- ##########################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Chat_related_functions.py DELETED
@@ -1,41 +0,0 @@
1
- # Chat_related_functions.py
2
- # Contains functions related to chat
3
- # WIP.
4
- #
5
- # Importing required libraries
6
- import json
7
- import os
8
- from pathlib import Path
9
- import json
10
- #
11
- ########################################################################################################################
12
- # Set globals
13
- CHARACTERS_FILE = Path('.', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
14
-
15
- def save_character(character_data):
16
- if CHARACTERS_FILE.exists():
17
- with CHARACTERS_FILE.open('r') as f:
18
- characters = json.load(f)
19
- else:
20
- characters = {}
21
-
22
- characters[character_data['name']] = character_data
23
-
24
- with CHARACTERS_FILE.open('w') as f:
25
- json.dump(characters, f, indent=2)
26
-
27
-
28
- def load_characters():
29
- if os.path.exists(CHARACTERS_FILE):
30
- with open(CHARACTERS_FILE, 'r') as f:
31
- return json.load(f)
32
- return {}
33
-
34
-
35
- def get_character_names():
36
- characters = load_characters()
37
- return list(characters.keys())
38
-
39
-
40
-
41
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Chunk_Lib.py DELETED
@@ -1,1051 +0,0 @@
1
- # Chunk_Lib.py
2
- #########################################
3
- # Chunking Library
4
- # This library is used to perform chunking of input files.
5
- # Currently, uses naive approaches. Nothing fancy.
6
- #
7
- ####
8
- # Import necessary libraries
9
- import hashlib
10
- import json
11
- import logging
12
- import re
13
- from typing import Any, Dict, List, Optional, Tuple
14
- #
15
- # Import 3rd party
16
- from openai import OpenAI
17
- from tqdm import tqdm
18
- from langdetect import detect
19
- from transformers import GPT2Tokenizer
20
- import nltk
21
- from nltk.tokenize import sent_tokenize, word_tokenize
22
- from sklearn.feature_extraction.text import TfidfVectorizer
23
- from sklearn.metrics.pairwise import cosine_similarity
24
- #
25
- # Import Local
26
- from App_Function_Libraries.Tokenization_Methods_Lib import openai_tokenize
27
- from App_Function_Libraries.Utils.Utils import load_comprehensive_config
28
- #
29
- #######################################################################################################################
30
- # Config Settings
31
- #
32
- #
33
- # FIXME - Make sure it only downloads if it already exists, and does a check first.
34
- # Ensure NLTK data is downloaded
35
- def ensure_nltk_data():
36
- try:
37
- nltk.data.find('tokenizers/punkt')
38
- except LookupError:
39
- nltk.download('punkt')
40
- #ensure_nltk_data()
41
-
42
- #
43
- # Load GPT2 tokenizer
44
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
45
- #
46
- # Load configuration
47
- config = load_comprehensive_config()
48
- # Embedding Chunking options
49
- chunk_options = {
50
- 'method': config.get('Chunking', 'method', fallback='words'),
51
- 'max_size': config.getint('Chunking', 'max_size', fallback=400),
52
- 'overlap': config.getint('Chunking', 'overlap', fallback=200),
53
- 'adaptive': config.getboolean('Chunking', 'adaptive', fallback=False),
54
- 'multi_level': config.getboolean('Chunking', 'multi_level', fallback=False),
55
- 'language': config.get('Chunking', 'language', fallback='english')
56
- }
57
-
58
- openai_api_key = config.get('API', 'openai_api_key')
59
- #
60
- # End of settings
61
- #######################################################################################################################
62
- #
63
- # Functions:
64
-
65
- # Create a chunking class for refactoring FIXME
66
- # class Chunker:
67
- # def __init__(self, tokenizer: GPT2Tokenizer):
68
- # self.tokenizer = tokenizer
69
- #
70
- # def detect_language(self, text: str) -> str:
71
- # try:
72
- # return detect(text)
73
- # except:
74
- # return 'en'
75
- #
76
- # def chunk_text(self, text: str, method: str, max_size: int, overlap: int, language: str = None) -> List[str]:
77
- # if language is None:
78
- # language = self.detect_language(text)
79
- #
80
- # if method == 'words':
81
- # return self.chunk_text_by_words(text, max_size, overlap, language)
82
- # elif method == 'sentences':
83
- # return self.chunk_text_by_sentences(text, max_size, overlap, language)
84
- # elif method == 'paragraphs':
85
- # return self.chunk_text_by_paragraphs(text, max_size, overlap)
86
- # elif method == 'tokens':
87
- # return self.chunk_text_by_tokens(text, max_size, overlap, language)
88
- # elif method == 'semantic':
89
- # return self.semantic_chunking(text, max_size)
90
- # else:
91
- # return [text]
92
-
93
- def detect_language(text: str) -> str:
94
- try:
95
- return detect(text)
96
- except:
97
- # Default to English if detection fails
98
- return 'en'
99
-
100
-
101
- def load_document(file_path: str) -> str:
102
- with open(file_path, 'r', encoding='utf-8') as file:
103
- text = file.read()
104
- return re.sub(r'\s+', ' ', text).strip()
105
-
106
-
107
- def improved_chunking_process(text: str, chunk_options: Dict[str, Any] = None) -> List[Dict[str, Any]]:
108
- logging.debug("Improved chunking process started...")
109
-
110
- # Extract JSON metadata if present
111
- json_content = {}
112
- try:
113
- json_end = text.index("}\n") + 1
114
- json_content = json.loads(text[:json_end])
115
- text = text[json_end:].strip()
116
- logging.debug(f"Extracted JSON metadata: {json_content}")
117
- except (ValueError, json.JSONDecodeError):
118
- logging.debug("No JSON metadata found at the beginning of the text")
119
-
120
- # Extract any additional header text
121
- header_match = re.match(r"(This text was transcribed using.*?)\n\n", text, re.DOTALL)
122
- header_text = ""
123
- if header_match:
124
- header_text = header_match.group(1)
125
- text = text[len(header_text):].strip()
126
- logging.debug(f"Extracted header text: {header_text}")
127
-
128
- options = chunk_options.copy() if chunk_options else {}
129
- if chunk_options:
130
- options.update(chunk_options)
131
-
132
- chunk_method = options.get('method', 'words')
133
- max_size = options.get('max_size', 2000)
134
- overlap = options.get('overlap', 0)
135
- language = options.get('language', None)
136
-
137
- if language is None:
138
- language = detect_language(text)
139
-
140
- if chunk_method == 'json':
141
- chunks = chunk_text_by_json(text, max_size=max_size, overlap=overlap)
142
- else:
143
- chunks = chunk_text(text, chunk_method, max_size, overlap, language)
144
-
145
- chunks_with_metadata = []
146
- total_chunks = len(chunks)
147
- for i, chunk in enumerate(chunks):
148
- metadata = {
149
- 'chunk_index': i + 1,
150
- 'total_chunks': total_chunks,
151
- 'chunk_method': chunk_method,
152
- 'max_size': max_size,
153
- 'overlap': overlap,
154
- 'language': language,
155
- 'relative_position': (i + 1) / total_chunks
156
- }
157
- metadata.update(json_content) # Add the extracted JSON content to metadata
158
- metadata['header_text'] = header_text # Add the header text to metadata
159
-
160
- if chunk_method == 'json':
161
- chunk_text_content = json.dumps(chunk['json'], ensure_ascii=False)
162
- else:
163
- chunk_text_content = chunk
164
-
165
- chunks_with_metadata.append({
166
- 'text': chunk_text_content,
167
- 'metadata': metadata
168
- })
169
-
170
- return chunks_with_metadata
171
-
172
-
173
- def multi_level_chunking(text: str, method: str, max_size: int, overlap: int, language: str) -> List[str]:
174
- logging.debug("Multi-level chunking process started...")
175
- # First level: chunk by paragraphs
176
- paragraphs = chunk_text_by_paragraphs(text, max_size * 2, overlap)
177
-
178
- # Second level: chunk each paragraph further
179
- chunks = []
180
- for para in paragraphs:
181
- if method == 'words':
182
- chunks.extend(chunk_text_by_words(para, max_words=max_size, overlap=overlap, language=language))
183
- elif method == 'sentences':
184
- chunks.extend(chunk_text_by_sentences(para, max_sentences=max_size, overlap=overlap, language=language))
185
- else:
186
- chunks.append(para)
187
-
188
- return chunks
189
-
190
-
191
- # FIXME - ensure language detection occurs in each chunk function
192
- def chunk_text(text: str, method: str, max_size: int, overlap: int, language: str = None) -> List[str]:
193
- if method == 'words':
194
- logging.debug("Chunking by words...")
195
- return chunk_text_by_words(text, max_words=max_size, overlap=overlap, language=language)
196
- elif method == 'sentences':
197
- logging.debug("Chunking by sentences...")
198
- return chunk_text_by_sentences(text, max_sentences=max_size, overlap=overlap, language=language)
199
- elif method == 'paragraphs':
200
- logging.debug("Chunking by paragraphs...")
201
- return chunk_text_by_paragraphs(text, max_paragraphs=max_size, overlap=overlap)
202
- elif method == 'tokens':
203
- logging.debug("Chunking by tokens...")
204
- return chunk_text_by_tokens(text, max_tokens=max_size, overlap=overlap)
205
- elif method == 'semantic':
206
- logging.debug("Chunking by semantic similarity...")
207
- return semantic_chunking(text, max_chunk_size=max_size)
208
- else:
209
- logging.warning(f"Unknown chunking method '{method}'. Returning full text as a single chunk.")
210
- return [text]
211
-
212
- def determine_chunk_position(relative_position: float) -> str:
213
- if relative_position < 0.33:
214
- return "This chunk is from the beginning of the document"
215
- elif relative_position < 0.66:
216
- return "This chunk is from the middle of the document"
217
- else:
218
- return "This chunk is from the end of the document"
219
-
220
-
221
- def chunk_text_by_words(text: str, max_words: int = 300, overlap: int = 0, language: str = None) -> List[str]:
222
- logging.debug("chunk_text_by_words...")
223
- if language is None:
224
- language = detect_language(text)
225
-
226
- if language.startswith('zh'): # Chinese
227
- import jieba
228
- words = list(jieba.cut(text))
229
- elif language == 'ja': # Japanese
230
- import fugashi
231
- tagger = fugashi.Tagger()
232
- words = [word.surface for word in tagger(text)]
233
- else: # Default to simple splitting for other languages
234
- words = text.split()
235
-
236
- chunks = []
237
- for i in range(0, len(words), max_words - overlap):
238
- chunk = ' '.join(words[i:i + max_words])
239
- chunks.append(chunk)
240
- return post_process_chunks(chunks)
241
-
242
-
243
- def chunk_text_by_sentences(text: str, max_sentences: int = 10, overlap: int = 0, language: str = None) -> List[str]:
244
- logging.debug("chunk_text_by_sentences...")
245
- if language is None:
246
- language = detect_language(text)
247
-
248
- if language.startswith('zh'): # Chinese
249
- import jieba
250
- # Use jieba to perform sentence segmentation
251
- # jieba does not support sentence segmentation out of the box
252
- # Use punctuation as delimiters
253
- sentences = re.split(r'[。!?;]', text)
254
- sentences = [s.strip() for s in sentences if s.strip()]
255
- elif language == 'ja': # Japanese
256
- import fugashi
257
- tagger = fugashi.Tagger()
258
- # Simple sentence segmentation based on punctuation
259
- sentences = re.split(r'[。!?]', text)
260
- sentences = [s.strip() for s in sentences if s.strip()]
261
- else: # Default to NLTK for other languages
262
- try:
263
- sentences = sent_tokenize(text, language=language)
264
- except LookupError:
265
- logging.warning(f"Punkt tokenizer not found for language '{language}'. Using default 'english'.")
266
- sentences = sent_tokenize(text, language='english')
267
-
268
- chunks = []
269
- previous_overlap = []
270
-
271
- for i in range(0, len(sentences), max_sentences - overlap):
272
- current_sentences = sentences[i:i + max_sentences]
273
- if overlap > 0 and previous_overlap:
274
- current_sentences = previous_overlap + current_sentences
275
- chunk = ' '.join(current_sentences)
276
- chunks.append(chunk)
277
- previous_overlap = sentences[i + max_sentences - overlap:i + max_sentences] if overlap > 0 else []
278
-
279
- return post_process_chunks(chunks)
280
-
281
-
282
- def chunk_text_by_paragraphs(text: str, max_paragraphs: int = 5, overlap: int = 0) -> List[str]:
283
- logging.debug("chunk_text_by_paragraphs...")
284
- paragraphs = re.split(r'\n\s*\n', text)
285
- chunks = []
286
- for i in range(0, len(paragraphs), max_paragraphs - overlap):
287
- chunk = '\n\n'.join(paragraphs[i:i + max_paragraphs])
288
- chunks.append(chunk)
289
- return post_process_chunks(chunks)
290
-
291
-
292
- def chunk_text_by_tokens(text: str, max_tokens: int = 1000, overlap: int = 0) -> List[str]:
293
- logging.debug("chunk_text_by_tokens...")
294
- # This is a simplified token-based chunking. For more accurate tokenization,
295
- # consider using a proper tokenizer like GPT-2 TokenizerFast
296
- words = text.split()
297
- chunks = []
298
- current_chunk = []
299
- current_token_count = 0
300
-
301
- for word in words:
302
- word_token_count = len(word) // 4 + 1 # Rough estimate of token count
303
- if current_token_count + word_token_count > max_tokens and current_chunk:
304
- chunks.append(' '.join(current_chunk))
305
- current_chunk = current_chunk[-overlap:] if overlap > 0 else []
306
- current_token_count = sum(len(w) // 4 + 1 for w in current_chunk)
307
-
308
- current_chunk.append(word)
309
- current_token_count += word_token_count
310
-
311
- if current_chunk:
312
- chunks.append(' '.join(current_chunk))
313
-
314
- return post_process_chunks(chunks)
315
- # def chunk_text_by_tokens(text: str, max_tokens: int = 1000, overlap: int = 0) -> List[str]:
316
- # logging.debug("chunk_text_by_tokens...")
317
- # # Use GPT2 tokenizer for tokenization
318
- # tokens = tokenizer.encode(text)
319
- # chunks = []
320
- # for i in range(0, len(tokens), max_tokens - overlap):
321
- # chunk_tokens = tokens[i:i + max_tokens]
322
- # chunk = tokenizer.decode(chunk_tokens)
323
- # chunks.append(chunk)
324
- # return post_process_chunks(chunks)
325
-
326
-
327
- def post_process_chunks(chunks: List[str]) -> List[str]:
328
- return [chunk.strip() for chunk in chunks if chunk.strip()]
329
-
330
-
331
- # FIXME - F
332
- def get_chunk_metadata(chunk: str, full_text: str, chunk_type: str = "generic",
333
- chapter_number: Optional[int] = None,
334
- chapter_pattern: Optional[str] = None,
335
- language: str = None) -> Dict[str, Any]:
336
- """
337
- Generate metadata for a chunk based on its position in the full text.
338
- """
339
- chunk_length = len(chunk)
340
- start_index = full_text.find(chunk)
341
- end_index = start_index + chunk_length if start_index != -1 else None
342
-
343
- # Calculate a hash for the chunk
344
- chunk_hash = hashlib.md5(chunk.encode()).hexdigest()
345
-
346
- metadata = {
347
- 'start_index': start_index,
348
- 'end_index': end_index,
349
- 'word_count': len(chunk.split()),
350
- 'char_count': chunk_length,
351
- 'chunk_type': chunk_type,
352
- 'language': language,
353
- 'chunk_hash': chunk_hash,
354
- 'relative_position': start_index / len(full_text) if len(full_text) > 0 and start_index != -1 else 0
355
- }
356
-
357
- if chunk_type == "chapter":
358
- metadata['chapter_number'] = chapter_number
359
- metadata['chapter_pattern'] = chapter_pattern
360
-
361
- return metadata
362
-
363
-
364
- def process_document_with_metadata(text: str, chunk_options: Dict[str, Any],
365
- document_metadata: Dict[str, Any]) -> Dict[str, Any]:
366
- chunks = improved_chunking_process(text, chunk_options)
367
-
368
- return {
369
- 'document_metadata': document_metadata,
370
- 'chunks': chunks
371
- }
372
-
373
-
374
- # Hybrid approach, chunk each sentence while ensuring total token size does not exceed a maximum number
375
- def chunk_text_hybrid(text: str, max_tokens: int = 1000, overlap: int = 0) -> List[str]:
376
- logging.debug("chunk_text_hybrid...")
377
- sentences = sent_tokenize(text)
378
- chunks = []
379
- current_chunk = []
380
- current_length = 0
381
-
382
- for sentence in sentences:
383
- tokens = tokenizer.encode(sentence)
384
- if current_length + len(tokens) > max_tokens and current_chunk:
385
- chunks.append(' '.join(current_chunk))
386
- # Handle overlap
387
- if overlap > 0:
388
- overlap_tokens = tokenizer.encode(' '.join(current_chunk[-overlap:]))
389
- current_chunk = current_chunk[-overlap:]
390
- current_length = len(overlap_tokens)
391
- else:
392
- current_chunk = []
393
- current_length = 0
394
-
395
- current_chunk.append(sentence)
396
- current_length += len(tokens)
397
-
398
- if current_chunk:
399
- chunks.append(' '.join(current_chunk))
400
-
401
- return post_process_chunks(chunks)
402
-
403
-
404
- # Thanks openai
405
- def chunk_on_delimiter(input_string: str,
406
- max_tokens: int,
407
- delimiter: str) -> List[str]:
408
- logging.debug("chunk_on_delimiter...")
409
- chunks = input_string.split(delimiter)
410
- combined_chunks, _, dropped_chunk_count = combine_chunks_with_no_minimum(
411
- chunks, max_tokens, chunk_delimiter=delimiter, add_ellipsis_for_overflow=True)
412
- if dropped_chunk_count > 0:
413
- logging.warning(f"Warning: {dropped_chunk_count} chunks were dropped due to exceeding the token limit.")
414
- combined_chunks = [f"{chunk}{delimiter}" for chunk in combined_chunks]
415
- return combined_chunks
416
-
417
-
418
-
419
-
420
- # FIXME
421
- def recursive_summarize_chunks(chunks: List[str], summarize_func, custom_prompt: Optional[str] = None,
422
- temp: Optional[float] = None, system_prompt: Optional[str] = None) -> List[str]:
423
- logging.debug("recursive_summarize_chunks...")
424
- summarized_chunks = []
425
- current_summary = ""
426
-
427
- logging.debug(f"Summarizing {len(chunks)} chunks recursively...")
428
- logging.debug(f"Temperature is set to {temp}")
429
- for i, chunk in enumerate(chunks):
430
- if i == 0:
431
- current_summary = summarize_func(chunk, custom_prompt, temp, system_prompt)
432
- else:
433
- combined_text = current_summary + "\n\n" + chunk
434
- current_summary = summarize_func(combined_text, custom_prompt, temp, system_prompt)
435
-
436
- summarized_chunks.append(current_summary)
437
-
438
- return summarized_chunks
439
-
440
-
441
- # Sample text for testing
442
- sample_text = """
443
- Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence
444
- concerned with the interactions between computers and human language, in particular how to program computers
445
- to process and analyze large amounts of natural language data. The result is a computer capable of "understanding"
446
- the contents of documents, including the contextual nuances of the language within them. The technology can then
447
- accurately extract information and insights contained in the documents as well as categorize and organize the documents themselves.
448
-
449
- Challenges in natural language processing frequently involve speech recognition, natural language understanding,
450
- and natural language generation.
451
-
452
- Natural language processing has its roots in the 1950s. Already in 1950, Alan Turing published an article titled
453
- "Computing Machinery and Intelligence" which proposed what is now called the Turing test as a criterion of intelligence.
454
- """
455
-
456
- # Example usage of different chunking methods
457
- # print("Chunking by words:")
458
- # print(chunk_text_by_words(sample_text, max_words=50))
459
- #
460
- # print("\nChunking by sentences:")
461
- # print(chunk_text_by_sentences(sample_text, max_sentences=2))
462
- #
463
- # print("\nChunking by paragraphs:")
464
- # print(chunk_text_by_paragraphs(sample_text, max_paragraphs=1))
465
- #
466
- # print("\nChunking by tokens:")
467
- # print(chunk_text_by_tokens(sample_text, max_tokens=50))
468
- #
469
- # print("\nHybrid chunking:")
470
- # print(chunk_text_hybrid(sample_text, max_tokens=50))
471
-
472
-
473
-
474
- #######################################################################################################################
475
- #
476
- # Experimental Semantic Chunking
477
- #
478
-
479
- # Chunk text into segments based on semantic similarity
480
- def count_units(text: str, unit: str = 'words') -> int:
481
- if unit == 'words':
482
- return len(text.split())
483
- elif unit == 'tokens':
484
- return len(tokenizer.encode(text))
485
- elif unit == 'characters':
486
- return len(text)
487
- else:
488
- raise ValueError("Invalid unit. Choose 'words', 'tokens', or 'characters'.")
489
-
490
-
491
-
492
- def semantic_chunking(text: str, max_chunk_size: int = 2000, unit: str = 'words') -> List[str]:
493
- logging.debug("semantic_chunking...")
494
- sentences = sent_tokenize(text)
495
- vectorizer = TfidfVectorizer()
496
- sentence_vectors = vectorizer.fit_transform(sentences)
497
-
498
- chunks = []
499
- current_chunk = []
500
- current_size = 0
501
-
502
- for i, sentence in enumerate(sentences):
503
- sentence_size = count_units(sentence, unit)
504
- if current_size + sentence_size > max_chunk_size and current_chunk:
505
- chunks.append(' '.join(current_chunk))
506
- # Use last 3 sentences for overlap
507
- current_chunk = current_chunk[-3:]
508
- current_size = count_units(' '.join(current_chunk), unit)
509
-
510
- current_chunk.append(sentence)
511
- current_size += sentence_size
512
-
513
- if i + 1 < len(sentences):
514
- current_vector = sentence_vectors[i]
515
- next_vector = sentence_vectors[i + 1]
516
- similarity = cosine_similarity(current_vector, next_vector)[0][0]
517
- if similarity < 0.5 and current_size >= max_chunk_size // 2:
518
- chunks.append(' '.join(current_chunk))
519
- current_chunk = current_chunk[-3:]
520
- current_size = count_units(' '.join(current_chunk), unit)
521
-
522
- if current_chunk:
523
- chunks.append(' '.join(current_chunk))
524
-
525
- return chunks
526
-
527
-
528
- def semantic_chunk_long_file(file_path: str, max_chunk_size: int = 1000, overlap: int = 100, unit: str = 'words') -> Optional[List[str]]:
529
- logging.debug("semantic_chunk_long_file...")
530
- try:
531
- with open(file_path, 'r', encoding='utf-8') as file:
532
- content = file.read()
533
-
534
- chunks = semantic_chunking(content, max_chunk_size, unit)
535
- return chunks
536
- except Exception as e:
537
- logging.error(f"Error chunking text file: {str(e)}")
538
- return None
539
-
540
- #
541
- #
542
- #######################################################################################################################
543
-
544
-
545
- #######################################################################################################################
546
- #
547
- # Embedding Chunking
548
-
549
- def chunk_for_embedding(text: str, file_name: str, custom_chunk_options: Dict[str, Any] = None) -> List[Dict[str, Any]]:
550
- options = chunk_options.copy()
551
- if custom_chunk_options:
552
- options.update(custom_chunk_options)
553
-
554
- logging.info(f"Chunking options: {options}")
555
- chunks = improved_chunking_process(text, options)
556
- total_chunks = len(chunks)
557
- logging.info(f"Total chunks created: {total_chunks}")
558
-
559
- chunked_text_with_headers = []
560
- for i, chunk in enumerate(chunks, 1):
561
- chunk_text = chunk['text']
562
- chunk_position = determine_chunk_position(chunk['metadata']['relative_position'])
563
- chunk_header = f"""
564
- Original Document: {file_name}
565
- Chunk: {i} of {total_chunks}
566
- Position: {chunk_position}
567
-
568
- --- Chunk Content ---
569
- """
570
-
571
- full_chunk_text = chunk_header + chunk_text
572
- chunk['text'] = full_chunk_text
573
- chunk['metadata']['file_name'] = file_name
574
- chunked_text_with_headers.append(chunk)
575
-
576
- return chunked_text_with_headers
577
-
578
- #
579
- # End of Embedding Chunking
580
- #######################################################################################################################
581
-
582
-
583
- #######################################################################################################################
584
- #
585
- # JSON Chunking
586
-
587
- # FIXME
588
- def chunk_text_by_json(text: str, max_size: int = 1000, overlap: int = 0) -> List[Dict[str, Any]]:
589
- """
590
- Chunk JSON-formatted text into smaller JSON chunks while preserving structure.
591
-
592
- Parameters:
593
- - text (str): The JSON-formatted text to be chunked.
594
- - max_size (int): Maximum number of items or characters per chunk.
595
- - overlap (int): Number of items or characters to overlap between chunks.
596
-
597
- Returns:
598
- - List[Dict[str, Any]]: A list of chunks with their metadata.
599
- """
600
- logging.debug("chunk_text_by_json started...")
601
- try:
602
- json_data = json.loads(text)
603
- except json.JSONDecodeError as e:
604
- logging.error(f"Invalid JSON data: {e}")
605
- raise ValueError(f"Invalid JSON data: {e}")
606
-
607
- # Determine if JSON data is a list or a dict
608
- if isinstance(json_data, list):
609
- return chunk_json_list(json_data, max_size, overlap)
610
- elif isinstance(json_data, dict):
611
- return chunk_json_dict(json_data, max_size, overlap)
612
- else:
613
- logging.error("Unsupported JSON structure. Only JSON objects and arrays are supported.")
614
- raise ValueError("Unsupported JSON structure. Only JSON objects and arrays are supported.")
615
-
616
-
617
- def chunk_json_list(json_list: List[Any], max_size: int, overlap: int) -> List[Dict[str, Any]]:
618
- """
619
- Chunk a JSON array into smaller chunks.
620
-
621
- Parameters:
622
- - json_list (List[Any]): The JSON array to be chunked.
623
- - max_size (int): Maximum number of items per chunk.
624
- - overlap (int): Number of items to overlap between chunks.
625
-
626
- Returns:
627
- - List[Dict[str, Any]]: A list of JSON chunks with metadata.
628
- """
629
- logging.debug("chunk_json_list started...")
630
- chunks = []
631
- total_items = len(json_list)
632
- step = max_size - overlap
633
- if step <= 0:
634
- raise ValueError("max_size must be greater than overlap.")
635
-
636
- for i in range(0, total_items, step):
637
- chunk = json_list[i:i + max_size]
638
- metadata = {
639
- 'chunk_index': i // step + 1,
640
- 'total_chunks': (total_items + step - 1) // step,
641
- 'chunk_method': 'json_list',
642
- 'max_size': max_size,
643
- 'overlap': overlap,
644
- 'relative_position': i / total_items
645
- }
646
- chunks.append({
647
- 'json': chunk,
648
- 'metadata': metadata
649
- })
650
-
651
- logging.debug(f"chunk_json_list created {len(chunks)} chunks.")
652
- return chunks
653
-
654
-
655
-
656
- def chunk_json_dict(json_dict: Dict[str, Any], max_size: int, overlap: int) -> List[Dict[str, Any]]:
657
- """
658
- Chunk a JSON object into smaller chunks based on its 'data' key while preserving other keys like 'metadata'.
659
-
660
- Parameters:
661
- - json_dict (Dict[str, Any]): The JSON object to be chunked.
662
- - max_size (int): Maximum number of key-value pairs per chunk in the 'data' section.
663
- - overlap (int): Number of key-value pairs to overlap between chunks.
664
-
665
- Returns:
666
- - List[Dict[str, Any]]: A list of JSON chunks with metadata.
667
- """
668
- logging.debug("chunk_json_dict started...")
669
-
670
- # Preserve non-chunked sections
671
- preserved_keys = ['metadata']
672
- preserved_data = {key: value for key, value in json_dict.items() if key in preserved_keys}
673
-
674
- # Identify the chunkable section
675
- chunkable_key = 'data'
676
- if chunkable_key not in json_dict or not isinstance(json_dict[chunkable_key], dict):
677
- logging.error("No chunkable 'data' section found in JSON dictionary.")
678
- raise ValueError("No chunkable 'data' section found in JSON dictionary.")
679
-
680
- chunkable_data = json_dict[chunkable_key]
681
- data_keys = list(chunkable_data.keys())
682
- total_keys = len(data_keys)
683
- chunks = []
684
- step = max_size - overlap
685
- if step <= 0:
686
- raise ValueError("max_size must be greater than overlap.")
687
-
688
- # Adjust the loop to prevent creating an extra chunk
689
- for i in range(0, total_keys, step):
690
- chunk_keys = data_keys[i:i + max_size]
691
-
692
- # Handle overlap
693
- if i != 0 and overlap > 0:
694
- overlap_keys = data_keys[i - overlap:i]
695
- chunk_keys = overlap_keys + chunk_keys
696
-
697
- # Remove duplicate keys caused by overlap
698
- unique_chunk_keys = []
699
- seen_keys = set()
700
- for key in chunk_keys:
701
- if key not in seen_keys:
702
- unique_chunk_keys.append(key)
703
- seen_keys.add(key)
704
-
705
- chunk_data = {key: chunkable_data[key] for key in unique_chunk_keys}
706
-
707
- metadata = {
708
- 'chunk_index': (i // step) + 1,
709
- 'total_chunks': (total_keys + step - 1) // step,
710
- 'chunk_method': 'json_dict',
711
- 'max_size': max_size,
712
- 'overlap': overlap,
713
- 'language': 'english', # Assuming English; modify as needed
714
- 'relative_position': (i // step + 1) / ((total_keys + step - 1) // step)
715
- }
716
-
717
- # Merge preserved data into metadata
718
- metadata.update(preserved_data.get('metadata', {}))
719
-
720
- # Create the chunk with preserved data
721
- chunk = {
722
- 'metadata': preserved_data,
723
- 'data': chunk_data
724
- }
725
-
726
- chunks.append({
727
- 'json': chunk,
728
- 'metadata': metadata
729
- })
730
-
731
- logging.debug(f"chunk_json_dict created {len(chunks)} chunks.")
732
- return chunks
733
-
734
-
735
- #
736
- # End of JSON Chunking
737
- #######################################################################################################################
738
-
739
- #######################################################################################################################
740
- #
741
- # OpenAI Rolling Summarization
742
- #
743
-
744
- client = OpenAI(api_key=openai_api_key)
745
- def get_chat_completion(messages, model='gpt-4-turbo'):
746
- response = client.chat.completions.create(
747
- model=model,
748
- messages=messages,
749
- temperature=0,
750
- )
751
- return response.choices[0].message.content
752
-
753
-
754
- # This function combines text chunks into larger blocks without exceeding a specified token count.
755
- # It returns the combined chunks, their original indices, and the number of dropped chunks due to overflow.
756
- def combine_chunks_with_no_minimum(
757
- chunks: List[str],
758
- max_tokens: int,
759
- chunk_delimiter: str = "\n\n",
760
- header: Optional[str] = None,
761
- add_ellipsis_for_overflow: bool = False,
762
- ) -> Tuple[List[str], List[List[int]], int]:
763
- dropped_chunk_count = 0
764
- output = [] # list to hold the final combined chunks
765
- output_indices = [] # list to hold the indices of the final combined chunks
766
- candidate = [header] if header else [] # list to hold the current combined chunk candidate
767
- candidate_indices = []
768
- for chunk_i, chunk in enumerate(chunks):
769
- chunk_with_header = [chunk] if not header else [header, chunk]
770
- combined_text = chunk_delimiter.join(candidate + chunk_with_header)
771
- token_count = len(tokenizer.encode(combined_text))
772
- if token_count > max_tokens:
773
- if add_ellipsis_for_overflow and len(candidate) > 0:
774
- ellipsis_text = chunk_delimiter.join(candidate + ["..."])
775
- if len(tokenizer.encode(ellipsis_text)) <= max_tokens:
776
- candidate = candidate + ["..."]
777
- dropped_chunk_count += 1
778
- if len(candidate) > 0:
779
- output.append(chunk_delimiter.join(candidate))
780
- output_indices.append(candidate_indices)
781
- candidate = chunk_with_header
782
- candidate_indices = [chunk_i]
783
- else:
784
- logging.warning(f"Single chunk at index {chunk_i} exceeds max_tokens and will be dropped.")
785
- dropped_chunk_count += 1
786
- else:
787
- candidate.extend(chunk_with_header)
788
- candidate_indices.append(chunk_i)
789
-
790
- if candidate:
791
- output.append(chunk_delimiter.join(candidate))
792
- output_indices.append(candidate_indices)
793
- return output, output_indices, dropped_chunk_count
794
-
795
-
796
- def rolling_summarize(text: str,
797
- detail: float = 0,
798
- model: str = 'gpt-4o',
799
- additional_instructions: Optional[str] = None,
800
- minimum_chunk_size: Optional[int] = 500,
801
- chunk_delimiter: str = ".",
802
- summarize_recursively: bool = False,
803
- verbose: bool = False) -> str:
804
- """
805
- Summarizes a given text by splitting it into chunks, each of which is summarized individually.
806
- The level of detail in the summary can be adjusted, and the process can optionally be made recursive.
807
-
808
- Parameters:
809
- - text (str): The text to be summarized.
810
- - detail (float, optional): A value between 0 and 1 indicating the desired level of detail in the summary.
811
- - additional_instructions (Optional[str], optional): Additional instructions for the model.
812
- - minimum_chunk_size (Optional[int], optional): The minimum size for text chunks.
813
- - chunk_delimiter (str, optional): The delimiter used to split the text into chunks.
814
- - summarize_recursively (bool, optional): If True, summaries are generated recursively.
815
- - verbose (bool, optional): If True, prints detailed information about the chunking process.
816
-
817
- Returns:
818
- - str: The final compiled summary of the text.
819
-
820
- The function first determines the number of chunks by interpolating between a minimum and a maximum chunk count
821
- based on the `detail` parameter. It then splits the text into chunks and summarizes each chunk. If
822
- `summarize_recursively` is True, each summary is based on the previous summaries, adding more context to the
823
- summarization process. The function returns a compiled summary of all chunks.
824
- """
825
-
826
- # Check detail is set correctly
827
- assert 0 <= detail <= 1, "Detail must be between 0 and 1."
828
-
829
- # Interpolate the number of chunks based on the detail parameter
830
- text_length = len(tokenizer.encode(text))
831
- max_chunks = text_length // minimum_chunk_size if minimum_chunk_size else 10
832
- min_chunks = 1
833
- num_chunks = int(min_chunks + detail * (max_chunks - min_chunks))
834
-
835
- # Adjust chunk_size based on interpolated number of chunks
836
- chunk_size = max(minimum_chunk_size, text_length // num_chunks) if num_chunks else text_length
837
- text_chunks = chunk_on_delimiter(text, chunk_size, chunk_delimiter)
838
- if verbose:
839
- print(f"Splitting the text into {len(text_chunks)} chunks to be summarized.")
840
- print(f"Chunk lengths are {[len(tokenizer.encode(x)) for x in text_chunks]} tokens.")
841
-
842
- # Set system message
843
- system_message_content = "Rewrite this text in summarized form."
844
- if additional_instructions:
845
- system_message_content += f"\n\n{additional_instructions}"
846
-
847
- accumulated_summaries = []
848
- for i, chunk in enumerate(tqdm(text_chunks, desc="Summarizing chunks")):
849
- if summarize_recursively and accumulated_summaries:
850
- # Combine previous summary with current chunk for recursive summarization
851
- combined_text = accumulated_summaries[-1] + "\n\n" + chunk
852
- user_message_content = f"Previous summary and new content to summarize:\n\n{combined_text}"
853
- else:
854
- user_message_content = chunk
855
-
856
- messages = [
857
- {"role": "system", "content": system_message_content},
858
- {"role": "user", "content": user_message_content}
859
- ]
860
-
861
- response = get_chat_completion(messages, model=model)
862
- accumulated_summaries.append(response)
863
-
864
- final_summary = '\n\n'.join(accumulated_summaries)
865
- return final_summary
866
-
867
- #
868
- #
869
- #######################################################################################################################
870
- #
871
- # Ebook Chapter Chunking
872
-
873
-
874
- def chunk_ebook_by_chapters(text: str, chunk_options: Dict[str, Any]) -> List[Dict[str, Any]]:
875
- logging.debug("chunk_ebook_by_chapters")
876
- max_chunk_size = int(chunk_options.get('max_size', 300))
877
- overlap = int(chunk_options.get('overlap', 0))
878
- custom_pattern = chunk_options.get('custom_chapter_pattern', None)
879
-
880
- # List of chapter heading patterns to try, in order
881
- chapter_patterns = [
882
- custom_pattern,
883
- r'^#{1,2}\s+', # Markdown style: '# ' or '## '
884
- r'^Chapter\s+\d+', # 'Chapter ' followed by numbers
885
- r'^\d+\.\s+', # Numbered chapters: '1. ', '2. ', etc.
886
- r'^[A-Z\s]+$' # All caps headings
887
- ]
888
-
889
- chapter_positions = []
890
- used_pattern = None
891
-
892
- for pattern in chapter_patterns:
893
- if pattern is None:
894
- continue
895
- chapter_regex = re.compile(pattern, re.MULTILINE | re.IGNORECASE)
896
- chapter_positions = [match.start() for match in chapter_regex.finditer(text)]
897
- if chapter_positions:
898
- used_pattern = pattern
899
- break
900
-
901
- # If no chapters found, return the entire content as one chunk
902
- if not chapter_positions:
903
- metadata = get_chunk_metadata(
904
- chunk=text,
905
- full_text=text,
906
- chunk_type="whole_document",
907
- language=chunk_options.get('language', 'english')
908
- )
909
- return [{'text': text, 'metadata': metadata}]
910
-
911
- # Split content into chapters
912
- chunks = []
913
- for i in range(len(chapter_positions)):
914
- start = chapter_positions[i]
915
- end = chapter_positions[i + 1] if i + 1 < len(chapter_positions) else None
916
- chapter = text[start:end]
917
-
918
- # Apply overlap if specified
919
- if overlap > 0 and i > 0:
920
- overlap_start = max(0, chapter_positions[i] - overlap)
921
- chapter = text[overlap_start:end]
922
-
923
- chunks.append(chapter)
924
-
925
- # Post-process chunks
926
- processed_chunks = post_process_chunks(chunks)
927
-
928
- # Add metadata to chunks
929
- chunks_with_metadata = []
930
- for i, chunk in enumerate(processed_chunks):
931
- metadata = get_chunk_metadata(
932
- chunk=chunk,
933
- full_text=text,
934
- chunk_type="chapter",
935
- chapter_number=i + 1,
936
- chapter_pattern=used_pattern,
937
- language=chunk_options.get('language', 'english')
938
- )
939
- chunks_with_metadata.append({'text': chunk, 'metadata': metadata})
940
-
941
- return chunks_with_metadata
942
-
943
- #
944
- # End of ebook chapter chunking
945
- #######################################################################################################################
946
-
947
- #######################################################################################################################
948
- #
949
- # Functions for adapative chunking:
950
-
951
- # FIXME - punkt
952
-
953
- def adaptive_chunk_size(text: str, base_size: int = 1000, min_size: int = 500, max_size: int = 2000) -> int:
954
- # Tokenize the text into sentences
955
- sentences = sent_tokenize(text)
956
-
957
- if not sentences:
958
- return base_size
959
-
960
- # Calculate average sentence length
961
- avg_sentence_length = sum(len(s.split()) for s in sentences) / len(sentences)
962
-
963
- # Adjust chunk size based on average sentence length
964
- if avg_sentence_length < 10:
965
- size_factor = 1.2 # Increase chunk size for short sentences
966
- elif avg_sentence_length > 20:
967
- size_factor = 0.8 # Decrease chunk size for long sentences
968
- else:
969
- size_factor = 1.0
970
-
971
- # Calculate adaptive chunk size
972
- adaptive_size = int(base_size * size_factor)
973
-
974
- # Ensure chunk size is within bounds
975
- return max(min_size, min(adaptive_size, max_size))
976
-
977
-
978
- def adaptive_chunk_size_non_punkt(text: str, base_size: int, min_size: int = 100, max_size: int = 2000) -> int:
979
- # Adaptive logic: adjust chunk size based on text complexity
980
- words = text.split()
981
- if not words:
982
- return base_size # Return base_size if text is empty
983
-
984
- avg_word_length = sum(len(word) for word in words) / len(words)
985
-
986
- if avg_word_length > 6: # Threshold for "complex" text
987
- adjusted_size = int(base_size * 0.8) # Reduce chunk size for complex text
988
- elif avg_word_length < 4: # Threshold for "simple" text
989
- adjusted_size = int(base_size * 1.2) # Increase chunk size for simple text
990
- else:
991
- adjusted_size = base_size
992
-
993
- # Ensure the chunk size is within the specified range
994
- return max(min_size, min(adjusted_size, max_size))
995
-
996
-
997
- def adaptive_chunking(text: str, base_size: int = 1000, min_size: int = 500, max_size: int = 2000) -> List[str]:
998
- logging.debug("adaptive_chunking...")
999
- chunk_size = adaptive_chunk_size(text, base_size, min_size, max_size)
1000
- words = text.split()
1001
- chunks = []
1002
- current_chunk = []
1003
- current_length = 0
1004
-
1005
- for word in words:
1006
- if current_length + len(word) > chunk_size and current_chunk:
1007
- chunks.append(' '.join(current_chunk))
1008
- current_chunk = []
1009
- current_length = 0
1010
- current_chunk.append(word)
1011
- current_length += len(word) + 1 # +1 for space
1012
-
1013
- if current_chunk:
1014
- chunks.append(' '.join(current_chunk))
1015
-
1016
- return chunks
1017
-
1018
- # FIXME - usage example
1019
- # chunk_options = {
1020
- # 'method': 'words', # or any other method
1021
- # 'base_size': 1000,
1022
- # 'min_size': 100,
1023
- # 'max_size': 2000,
1024
- # 'adaptive': True,
1025
- # 'language': 'en'
1026
- # }
1027
- #chunks = improved_chunking_process(your_text, chunk_options)
1028
-
1029
-
1030
- # Example of chunking a document with metadata
1031
- # document_metadata = {
1032
- # 'title': 'Example Document',
1033
- # 'author': 'John Doe',
1034
- # 'creation_date': '2023-06-14',
1035
- # 'source': 'https://example.com/document',
1036
- # 'document_type': 'article'
1037
- # }
1038
- #
1039
- # chunk_options = {
1040
- # 'method': 'sentences',
1041
- # 'base_size': 1000,
1042
- # 'adaptive': True,
1043
- # 'language': 'en'
1044
- # }
1045
- #
1046
- # processed_document = process_document_with_metadata(your_text, chunk_options, document_metadata)
1047
-
1048
-
1049
- #
1050
- # End of Chunking Library
1051
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/DB/Character_Chat_DB.py DELETED
@@ -1,702 +0,0 @@
1
- # character_chat_db.py
2
- # Database functions for managing character cards and chat histories.
3
- # #
4
- # Imports
5
- import configparser
6
- import sqlite3
7
- import json
8
- import logging
9
- import os
10
- import sys
11
- from typing import List, Dict, Optional, Tuple, Any, Union
12
-
13
- from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
14
- #from Tests.Chat_APIs.Chat_APIs_Integration_test import logging
15
-
16
- #
17
- #######################################################################################################################
18
- #
19
- #
20
-
21
- def ensure_database_directory():
22
- os.makedirs(get_database_dir(), exist_ok=True)
23
-
24
- ensure_database_directory()
25
-
26
-
27
- # Construct the path to the config file
28
- config_path = get_project_relative_path('Config_Files/config.txt')
29
-
30
- # Read the config file
31
- config = configparser.ConfigParser()
32
- config.read(config_path)
33
-
34
- # Get the chat db path from the config, or use the default if not specified
35
- chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
36
- print(f"Chat Database path: {chat_DB_PATH}")
37
-
38
- ########################################################################################################
39
- #
40
- # Functions
41
-
42
- # FIXME - Setup properly and test/add documentation for its existence...
43
- def initialize_database():
44
- """Initialize the SQLite database with required tables and FTS5 virtual tables."""
45
- conn = None
46
- try:
47
- conn = sqlite3.connect(chat_DB_PATH)
48
- cursor = conn.cursor()
49
-
50
- # Enable foreign key constraints
51
- cursor.execute("PRAGMA foreign_keys = ON;")
52
-
53
- # Create CharacterCards table with V2 fields
54
- cursor.execute("""
55
- CREATE TABLE IF NOT EXISTS CharacterCards (
56
- id INTEGER PRIMARY KEY AUTOINCREMENT,
57
- name TEXT UNIQUE NOT NULL,
58
- description TEXT,
59
- personality TEXT,
60
- scenario TEXT,
61
- image BLOB,
62
- post_history_instructions TEXT,
63
- first_mes TEXT,
64
- mes_example TEXT,
65
- creator_notes TEXT,
66
- system_prompt TEXT,
67
- alternate_greetings TEXT,
68
- tags TEXT,
69
- creator TEXT,
70
- character_version TEXT,
71
- extensions TEXT,
72
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP
73
- );
74
- """)
75
-
76
- # Create CharacterChats table
77
- cursor.execute("""
78
- CREATE TABLE IF NOT EXISTS CharacterChats (
79
- id INTEGER PRIMARY KEY AUTOINCREMENT,
80
- character_id INTEGER NOT NULL,
81
- conversation_name TEXT,
82
- chat_history TEXT,
83
- is_snapshot BOOLEAN DEFAULT FALSE,
84
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
85
- FOREIGN KEY (character_id) REFERENCES CharacterCards(id) ON DELETE CASCADE
86
- );
87
- """)
88
-
89
- # Create FTS5 virtual table for CharacterChats
90
- cursor.execute("""
91
- CREATE VIRTUAL TABLE IF NOT EXISTS CharacterChats_fts USING fts5(
92
- conversation_name,
93
- chat_history,
94
- content='CharacterChats',
95
- content_rowid='id'
96
- );
97
- """)
98
-
99
- # Create triggers to keep FTS5 table in sync with CharacterChats
100
- cursor.executescript("""
101
- CREATE TRIGGER IF NOT EXISTS CharacterChats_ai AFTER INSERT ON CharacterChats BEGIN
102
- INSERT INTO CharacterChats_fts(rowid, conversation_name, chat_history)
103
- VALUES (new.id, new.conversation_name, new.chat_history);
104
- END;
105
-
106
- CREATE TRIGGER IF NOT EXISTS CharacterChats_ad AFTER DELETE ON CharacterChats BEGIN
107
- DELETE FROM CharacterChats_fts WHERE rowid = old.id;
108
- END;
109
-
110
- CREATE TRIGGER IF NOT EXISTS CharacterChats_au AFTER UPDATE ON CharacterChats BEGIN
111
- UPDATE CharacterChats_fts SET conversation_name = new.conversation_name, chat_history = new.chat_history
112
- WHERE rowid = new.id;
113
- END;
114
- """)
115
-
116
- # Create ChatKeywords table
117
- cursor.execute("""
118
- CREATE TABLE IF NOT EXISTS ChatKeywords (
119
- chat_id INTEGER NOT NULL,
120
- keyword TEXT NOT NULL,
121
- FOREIGN KEY (chat_id) REFERENCES CharacterChats(id) ON DELETE CASCADE
122
- );
123
- """)
124
-
125
- # Create indexes for faster searches
126
- cursor.execute("""
127
- CREATE INDEX IF NOT EXISTS idx_chatkeywords_keyword ON ChatKeywords(keyword);
128
- """)
129
- cursor.execute("""
130
- CREATE INDEX IF NOT EXISTS idx_chatkeywords_chat_id ON ChatKeywords(chat_id);
131
- """)
132
-
133
- conn.commit()
134
- logging.info("Database initialized successfully.")
135
- except sqlite3.Error as e:
136
- logging.error(f"SQLite error occurred during database initialization: {e}")
137
- if conn:
138
- conn.rollback()
139
- raise
140
- except Exception as e:
141
- logging.error(f"Unexpected error occurred during database initialization: {e}")
142
- if conn:
143
- conn.rollback()
144
- raise
145
- finally:
146
- if conn:
147
- conn.close()
148
-
149
- # Call initialize_database() at the start of your application
150
- def setup_chat_database():
151
- try:
152
- initialize_database()
153
- except Exception as e:
154
- logging.critical(f"Failed to initialize database: {e}")
155
- sys.exit(1)
156
-
157
- setup_chat_database()
158
-
159
- ########################################################################################################
160
- #
161
- # Character Card handling
162
-
163
- def parse_character_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
164
- """Parse and validate a character card according to V2 specification."""
165
- v2_data = {
166
- 'name': card_data.get('name', ''),
167
- 'description': card_data.get('description', ''),
168
- 'personality': card_data.get('personality', ''),
169
- 'scenario': card_data.get('scenario', ''),
170
- 'first_mes': card_data.get('first_mes', ''),
171
- 'mes_example': card_data.get('mes_example', ''),
172
- 'creator_notes': card_data.get('creator_notes', ''),
173
- 'system_prompt': card_data.get('system_prompt', ''),
174
- 'post_history_instructions': card_data.get('post_history_instructions', ''),
175
- 'alternate_greetings': json.dumps(card_data.get('alternate_greetings', [])),
176
- 'tags': json.dumps(card_data.get('tags', [])),
177
- 'creator': card_data.get('creator', ''),
178
- 'character_version': card_data.get('character_version', ''),
179
- 'extensions': json.dumps(card_data.get('extensions', {}))
180
- }
181
-
182
- # Handle 'image' separately as it might be binary data
183
- if 'image' in card_data:
184
- v2_data['image'] = card_data['image']
185
-
186
- return v2_data
187
-
188
-
189
- def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
190
- """Add or update a character card in the database."""
191
- conn = sqlite3.connect(chat_DB_PATH)
192
- cursor = conn.cursor()
193
- try:
194
- parsed_card = parse_character_card(card_data)
195
-
196
- # Check if character already exists
197
- cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (parsed_card['name'],))
198
- row = cursor.fetchone()
199
-
200
- if row:
201
- # Update existing character
202
- character_id = row[0]
203
- update_query = """
204
- UPDATE CharacterCards
205
- SET description = ?, personality = ?, scenario = ?, image = ?,
206
- post_history_instructions = ?, first_mes = ?, mes_example = ?,
207
- creator_notes = ?, system_prompt = ?, alternate_greetings = ?,
208
- tags = ?, creator = ?, character_version = ?, extensions = ?
209
- WHERE id = ?
210
- """
211
- cursor.execute(update_query, (
212
- parsed_card['description'], parsed_card['personality'], parsed_card['scenario'],
213
- parsed_card['image'], parsed_card['post_history_instructions'], parsed_card['first_mes'],
214
- parsed_card['mes_example'], parsed_card['creator_notes'], parsed_card['system_prompt'],
215
- parsed_card['alternate_greetings'], parsed_card['tags'], parsed_card['creator'],
216
- parsed_card['character_version'], parsed_card['extensions'], character_id
217
- ))
218
- else:
219
- # Insert new character
220
- insert_query = """
221
- INSERT INTO CharacterCards (name, description, personality, scenario, image,
222
- post_history_instructions, first_mes, mes_example, creator_notes, system_prompt,
223
- alternate_greetings, tags, creator, character_version, extensions)
224
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
225
- """
226
- cursor.execute(insert_query, (
227
- parsed_card['name'], parsed_card['description'], parsed_card['personality'],
228
- parsed_card['scenario'], parsed_card['image'], parsed_card['post_history_instructions'],
229
- parsed_card['first_mes'], parsed_card['mes_example'], parsed_card['creator_notes'],
230
- parsed_card['system_prompt'], parsed_card['alternate_greetings'], parsed_card['tags'],
231
- parsed_card['creator'], parsed_card['character_version'], parsed_card['extensions']
232
- ))
233
- character_id = cursor.lastrowid
234
-
235
- conn.commit()
236
- return character_id
237
- except sqlite3.IntegrityError as e:
238
- logging.error(f"Error adding character card: {e}")
239
- return None
240
- except Exception as e:
241
- logging.error(f"Unexpected error adding character card: {e}")
242
- return None
243
- finally:
244
- conn.close()
245
-
246
- # def add_character_card(card_data: Dict) -> Optional[int]:
247
- # """Add or update a character card in the database.
248
- #
249
- # Returns the ID of the inserted character or None if failed.
250
- # """
251
- # conn = sqlite3.connect(chat_DB_PATH)
252
- # cursor = conn.cursor()
253
- # try:
254
- # # Ensure all required fields are present
255
- # required_fields = ['name', 'description', 'personality', 'scenario', 'image', 'post_history_instructions', 'first_message']
256
- # for field in required_fields:
257
- # if field not in card_data:
258
- # card_data[field] = '' # Assign empty string if field is missing
259
- #
260
- # # Check if character already exists
261
- # cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (card_data['name'],))
262
- # row = cursor.fetchone()
263
- #
264
- # if row:
265
- # # Update existing character
266
- # character_id = row[0]
267
- # cursor.execute("""
268
- # UPDATE CharacterCards
269
- # SET description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
270
- # WHERE id = ?
271
- # """, (
272
- # card_data['description'],
273
- # card_data['personality'],
274
- # card_data['scenario'],
275
- # card_data['image'],
276
- # card_data['post_history_instructions'],
277
- # card_data['first_message'],
278
- # character_id
279
- # ))
280
- # else:
281
- # # Insert new character
282
- # cursor.execute("""
283
- # INSERT INTO CharacterCards (name, description, personality, scenario, image, post_history_instructions, first_message)
284
- # VALUES (?, ?, ?, ?, ?, ?, ?)
285
- # """, (
286
- # card_data['name'],
287
- # card_data['description'],
288
- # card_data['personality'],
289
- # card_data['scenario'],
290
- # card_data['image'],
291
- # card_data['post_history_instructions'],
292
- # card_data['first_message']
293
- # ))
294
- # character_id = cursor.lastrowid
295
- #
296
- # conn.commit()
297
- # return cursor.lastrowid
298
- # except sqlite3.IntegrityError as e:
299
- # logging.error(f"Error adding character card: {e}")
300
- # return None
301
- # except Exception as e:
302
- # logging.error(f"Unexpected error adding character card: {e}")
303
- # return None
304
- # finally:
305
- # conn.close()
306
-
307
-
308
- def get_character_cards() -> List[Dict]:
309
- """Retrieve all character cards from the database."""
310
- logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
311
- conn = sqlite3.connect(chat_DB_PATH)
312
- cursor = conn.cursor()
313
- cursor.execute("SELECT * FROM CharacterCards")
314
- rows = cursor.fetchall()
315
- columns = [description[0] for description in cursor.description]
316
- conn.close()
317
- characters = [dict(zip(columns, row)) for row in rows]
318
- #logging.debug(f"Characters fetched from DB: {characters}")
319
- return characters
320
-
321
-
322
- def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
323
- """
324
- Retrieve a single character card by its ID.
325
-
326
- Args:
327
- character_id: Can be either an integer ID or a dictionary containing character data.
328
-
329
- Returns:
330
- A dictionary containing the character card data, or None if not found.
331
- """
332
- conn = sqlite3.connect(chat_DB_PATH)
333
- cursor = conn.cursor()
334
- try:
335
- if isinstance(character_id, dict):
336
- # If a dictionary is passed, assume it's already a character card
337
- return character_id
338
- elif isinstance(character_id, int):
339
- # If an integer is passed, fetch the character from the database
340
- cursor.execute("SELECT * FROM CharacterCards WHERE id = ?", (character_id,))
341
- row = cursor.fetchone()
342
- if row:
343
- columns = [description[0] for description in cursor.description]
344
- return dict(zip(columns, row))
345
- else:
346
- logging.warning(f"Invalid type for character_id: {type(character_id)}")
347
- return None
348
- except Exception as e:
349
- logging.error(f"Error in get_character_card_by_id: {e}")
350
- return None
351
- finally:
352
- conn.close()
353
-
354
-
355
- def update_character_card(character_id: int, card_data: Dict) -> bool:
356
- """Update an existing character card."""
357
- conn = sqlite3.connect(chat_DB_PATH)
358
- cursor = conn.cursor()
359
- try:
360
- cursor.execute("""
361
- UPDATE CharacterCards
362
- SET name = ?, description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
363
- WHERE id = ?
364
- """, (
365
- card_data.get('name'),
366
- card_data.get('description'),
367
- card_data.get('personality'),
368
- card_data.get('scenario'),
369
- card_data.get('image'),
370
- card_data.get('post_history_instructions', ''),
371
- card_data.get('first_message', "Hello! I'm ready to chat."),
372
- character_id
373
- ))
374
- conn.commit()
375
- return cursor.rowcount > 0
376
- except sqlite3.IntegrityError as e:
377
- logging.error(f"Error updating character card: {e}")
378
- return False
379
- finally:
380
- conn.close()
381
-
382
-
383
- def delete_character_card(character_id: int) -> bool:
384
- """Delete a character card and its associated chats."""
385
- conn = sqlite3.connect(chat_DB_PATH)
386
- cursor = conn.cursor()
387
- try:
388
- # Delete associated chats first due to foreign key constraint
389
- cursor.execute("DELETE FROM CharacterChats WHERE character_id = ?", (character_id,))
390
- cursor.execute("DELETE FROM CharacterCards WHERE id = ?", (character_id,))
391
- conn.commit()
392
- return cursor.rowcount > 0
393
- except sqlite3.Error as e:
394
- logging.error(f"Error deleting character card: {e}")
395
- return False
396
- finally:
397
- conn.close()
398
-
399
-
400
- def add_character_chat(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]], keywords: Optional[List[str]] = None, is_snapshot: bool = False) -> Optional[int]:
401
- """
402
- Add a new chat history for a character, optionally associating keywords.
403
-
404
- Args:
405
- character_id (int): The ID of the character.
406
- conversation_name (str): Name of the conversation.
407
- chat_history (List[Tuple[str, str]]): List of (user, bot) message tuples.
408
- keywords (Optional[List[str]]): List of keywords to associate with this chat.
409
- is_snapshot (bool, optional): Whether this chat is a snapshot.
410
-
411
- Returns:
412
- Optional[int]: The ID of the inserted chat or None if failed.
413
- """
414
- conn = sqlite3.connect(chat_DB_PATH)
415
- cursor = conn.cursor()
416
- try:
417
- chat_history_json = json.dumps(chat_history)
418
- cursor.execute("""
419
- INSERT INTO CharacterChats (character_id, conversation_name, chat_history, is_snapshot)
420
- VALUES (?, ?, ?, ?)
421
- """, (
422
- character_id,
423
- conversation_name,
424
- chat_history_json,
425
- is_snapshot
426
- ))
427
- chat_id = cursor.lastrowid
428
-
429
- if keywords:
430
- # Insert keywords into ChatKeywords table
431
- keyword_records = [(chat_id, keyword.strip().lower()) for keyword in keywords]
432
- cursor.executemany("""
433
- INSERT INTO ChatKeywords (chat_id, keyword)
434
- VALUES (?, ?)
435
- """, keyword_records)
436
-
437
- conn.commit()
438
- return chat_id
439
- except sqlite3.Error as e:
440
- logging.error(f"Error adding character chat: {e}")
441
- return None
442
- finally:
443
- conn.close()
444
-
445
-
446
- def get_character_chats(character_id: Optional[int] = None) -> List[Dict]:
447
- """Retrieve all chats, or chats for a specific character if character_id is provided."""
448
- conn = sqlite3.connect(chat_DB_PATH)
449
- cursor = conn.cursor()
450
- if character_id is not None:
451
- cursor.execute("SELECT * FROM CharacterChats WHERE character_id = ?", (character_id,))
452
- else:
453
- cursor.execute("SELECT * FROM CharacterChats")
454
- rows = cursor.fetchall()
455
- columns = [description[0] for description in cursor.description]
456
- conn.close()
457
- return [dict(zip(columns, row)) for row in rows]
458
-
459
-
460
- def get_character_chat_by_id(chat_id: int) -> Optional[Dict]:
461
- """Retrieve a single chat by its ID."""
462
- conn = sqlite3.connect(chat_DB_PATH)
463
- cursor = conn.cursor()
464
- cursor.execute("SELECT * FROM CharacterChats WHERE id = ?", (chat_id,))
465
- row = cursor.fetchone()
466
- conn.close()
467
- if row:
468
- columns = [description[0] for description in cursor.description]
469
- chat = dict(zip(columns, row))
470
- chat['chat_history'] = json.loads(chat['chat_history'])
471
- return chat
472
- return None
473
-
474
-
475
- def search_character_chats(query: str, character_id: Optional[int] = None) -> Tuple[List[Dict], str]:
476
- """
477
- Search for character chats using FTS5, optionally filtered by character_id.
478
-
479
- Args:
480
- query (str): The search query.
481
- character_id (Optional[int]): The ID of the character to filter chats by.
482
-
483
- Returns:
484
- Tuple[List[Dict], str]: A list of matching chats and a status message.
485
- """
486
- if not query.strip():
487
- return [], "Please enter a search query."
488
-
489
- conn = sqlite3.connect(chat_DB_PATH)
490
- cursor = conn.cursor()
491
- try:
492
- if character_id is not None:
493
- # Search with character_id filter
494
- cursor.execute("""
495
- SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
496
- FROM CharacterChats_fts
497
- JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
498
- WHERE CharacterChats_fts MATCH ? AND CharacterChats.character_id = ?
499
- ORDER BY rank
500
- """, (query, character_id))
501
- else:
502
- # Search without character_id filter
503
- cursor.execute("""
504
- SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
505
- FROM CharacterChats_fts
506
- JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
507
- WHERE CharacterChats_fts MATCH ?
508
- ORDER BY rank
509
- """, (query,))
510
-
511
- rows = cursor.fetchall()
512
- columns = [description[0] for description in cursor.description]
513
- results = [dict(zip(columns, row)) for row in rows]
514
-
515
- if character_id is not None:
516
- status_message = f"Found {len(results)} chat(s) matching '{query}' for the selected character."
517
- else:
518
- status_message = f"Found {len(results)} chat(s) matching '{query}' across all characters."
519
-
520
- return results, status_message
521
- except Exception as e:
522
- logging.error(f"Error searching chats with FTS5: {e}")
523
- return [], f"Error occurred during search: {e}"
524
- finally:
525
- conn.close()
526
-
527
- def update_character_chat(chat_id: int, chat_history: List[Tuple[str, str]]) -> bool:
528
- """Update an existing chat history."""
529
- conn = sqlite3.connect(chat_DB_PATH)
530
- cursor = conn.cursor()
531
- try:
532
- chat_history_json = json.dumps(chat_history)
533
- cursor.execute("""
534
- UPDATE CharacterChats
535
- SET chat_history = ?
536
- WHERE id = ?
537
- """, (
538
- chat_history_json,
539
- chat_id
540
- ))
541
- conn.commit()
542
- return cursor.rowcount > 0
543
- except sqlite3.Error as e:
544
- logging.error(f"Error updating character chat: {e}")
545
- return False
546
- finally:
547
- conn.close()
548
-
549
-
550
- def delete_character_chat(chat_id: int) -> bool:
551
- """Delete a specific chat."""
552
- conn = sqlite3.connect(chat_DB_PATH)
553
- cursor = conn.cursor()
554
- try:
555
- cursor.execute("DELETE FROM CharacterChats WHERE id = ?", (chat_id,))
556
- conn.commit()
557
- return cursor.rowcount > 0
558
- except sqlite3.Error as e:
559
- logging.error(f"Error deleting character chat: {e}")
560
- return False
561
- finally:
562
- conn.close()
563
-
564
- def fetch_keywords_for_chats(keywords: List[str]) -> List[int]:
565
- """
566
- Fetch chat IDs associated with any of the specified keywords.
567
-
568
- Args:
569
- keywords (List[str]): List of keywords to search for.
570
-
571
- Returns:
572
- List[int]: List of chat IDs associated with the keywords.
573
- """
574
- if not keywords:
575
- return []
576
-
577
- conn = sqlite3.connect(chat_DB_PATH)
578
- cursor = conn.cursor()
579
- try:
580
- # Construct the WHERE clause to search for each keyword
581
- keyword_clauses = " OR ".join(["keyword = ?"] * len(keywords))
582
- sql_query = f"SELECT DISTINCT chat_id FROM ChatKeywords WHERE {keyword_clauses}"
583
- cursor.execute(sql_query, keywords)
584
- rows = cursor.fetchall()
585
- chat_ids = [row[0] for row in rows]
586
- return chat_ids
587
- except Exception as e:
588
- logging.error(f"Error in fetch_keywords_for_chats: {e}")
589
- return []
590
- finally:
591
- conn.close()
592
-
593
- def save_chat_history_to_character_db(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]]) -> Optional[int]:
594
- """Save chat history to the CharacterChats table.
595
-
596
- Returns the ID of the inserted chat or None if failed.
597
- """
598
- return add_character_chat(character_id, conversation_name, chat_history)
599
-
600
- def migrate_chat_to_media_db():
601
- pass
602
-
603
-
604
- def search_db(query: str, fields: List[str], where_clause: str = "", page: int = 1, results_per_page: int = 5) -> List[Dict[str, Any]]:
605
- """
606
- Perform a full-text search on specified fields with optional filtering and pagination.
607
-
608
- Args:
609
- query (str): The search query.
610
- fields (List[str]): List of fields to search in.
611
- where_clause (str, optional): Additional SQL WHERE clause to filter results.
612
- page (int, optional): Page number for pagination.
613
- results_per_page (int, optional): Number of results per page.
614
-
615
- Returns:
616
- List[Dict[str, Any]]: List of matching chat records with content and metadata.
617
- """
618
- if not query.strip():
619
- return []
620
-
621
- conn = sqlite3.connect(chat_DB_PATH)
622
- cursor = conn.cursor()
623
- try:
624
- # Construct the MATCH query for FTS5
625
- match_query = " AND ".join(fields) + f" MATCH ?"
626
- # Adjust the query with the fields
627
- fts_query = f"""
628
- SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
629
- FROM CharacterChats_fts
630
- JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
631
- WHERE {match_query}
632
- """
633
- if where_clause:
634
- fts_query += f" AND ({where_clause})"
635
- fts_query += " ORDER BY rank LIMIT ? OFFSET ?"
636
- offset = (page - 1) * results_per_page
637
- cursor.execute(fts_query, (query, results_per_page, offset))
638
- rows = cursor.fetchall()
639
- columns = [description[0] for description in cursor.description]
640
- results = [dict(zip(columns, row)) for row in rows]
641
- return results
642
- except Exception as e:
643
- logging.error(f"Error in search_db: {e}")
644
- return []
645
- finally:
646
- conn.close()
647
-
648
-
649
- def perform_full_text_search_chat(query: str, relevant_chat_ids: List[int], page: int = 1, results_per_page: int = 5) -> \
650
- List[Dict[str, Any]]:
651
- """
652
- Perform a full-text search within the specified chat IDs using FTS5.
653
-
654
- Args:
655
- query (str): The user's query.
656
- relevant_chat_ids (List[int]): List of chat IDs to search within.
657
- page (int): Pagination page number.
658
- results_per_page (int): Number of results per page.
659
-
660
- Returns:
661
- List[Dict[str, Any]]: List of search results with content and metadata.
662
- """
663
- try:
664
- # Construct a WHERE clause to limit the search to relevant chat IDs
665
- where_clause = " OR ".join([f"media_id = {chat_id}" for chat_id in relevant_chat_ids])
666
- if not where_clause:
667
- where_clause = "1" # No restriction if no chat IDs
668
-
669
- # Perform full-text search using FTS5
670
- fts_results = search_db(query, ["content"], where_clause, page=page, results_per_page=results_per_page)
671
-
672
- filtered_fts_results = [
673
- {
674
- "content": result['content'],
675
- "metadata": {"media_id": result['id']}
676
- }
677
- for result in fts_results
678
- if result['id'] in relevant_chat_ids
679
- ]
680
- return filtered_fts_results
681
- except Exception as e:
682
- logging.error(f"Error in perform_full_text_search_chat: {str(e)}")
683
- return []
684
-
685
-
686
- def fetch_all_chats() -> List[Dict[str, Any]]:
687
- """
688
- Fetch all chat messages from the database.
689
-
690
- Returns:
691
- List[Dict[str, Any]]: List of chat messages with relevant metadata.
692
- """
693
- try:
694
- chats = get_character_chats() # Modify this function to retrieve all chats
695
- return chats
696
- except Exception as e:
697
- logging.error(f"Error fetching all chats: {str(e)}")
698
- return []
699
-
700
- #
701
- # End of Character_Chat_DB.py
702
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/DB/DB_Manager.py DELETED
@@ -1,991 +0,0 @@
1
- # DB_Manager.py
2
- # Description: This file contains the DatabaseManager class, which is responsible for managing the database connection, i.e. either SQLite or Elasticsearch.
3
- #
4
- # Imports
5
- import configparser
6
- import os
7
- import logging
8
- import time
9
- from typing import Tuple, List, Union, Dict
10
- #
11
- # 3rd-Party Libraries
12
- from elasticsearch import Elasticsearch
13
- #
14
- # Import your existing SQLite functions
15
- from App_Function_Libraries.DB.SQLite_DB import DatabaseError
16
- from App_Function_Libraries.DB.SQLite_DB import (
17
- update_media_content as sqlite_update_media_content,
18
- list_prompts as sqlite_list_prompts,
19
- search_and_display as sqlite_search_and_display,
20
- fetch_prompt_details as sqlite_fetch_prompt_details,
21
- keywords_browser_interface as sqlite_keywords_browser_interface,
22
- add_keyword as sqlite_add_keyword,
23
- delete_keyword as sqlite_delete_keyword,
24
- export_keywords_to_csv as sqlite_export_keywords_to_csv,
25
- ingest_article_to_db as sqlite_ingest_article_to_db,
26
- add_media_to_database as sqlite_add_media_to_database,
27
- import_obsidian_note_to_db as sqlite_import_obsidian_note_to_db,
28
- add_prompt as sqlite_add_prompt,
29
- delete_chat_message as sqlite_delete_chat_message,
30
- update_chat_message as sqlite_update_chat_message,
31
- add_chat_message as sqlite_add_chat_message,
32
- get_chat_messages as sqlite_get_chat_messages,
33
- search_chat_conversations as sqlite_search_chat_conversations,
34
- create_chat_conversation as sqlite_create_chat_conversation,
35
- save_chat_history_to_database as sqlite_save_chat_history_to_database,
36
- view_database as sqlite_view_database,
37
- get_transcripts as sqlite_get_transcripts,
38
- get_trashed_items as sqlite_get_trashed_items,
39
- user_delete_item as sqlite_user_delete_item,
40
- empty_trash as sqlite_empty_trash,
41
- create_automated_backup as sqlite_create_automated_backup,
42
- add_or_update_prompt as sqlite_add_or_update_prompt,
43
- load_prompt_details as sqlite_load_prompt_details,
44
- load_preset_prompts as sqlite_load_preset_prompts,
45
- insert_prompt_to_db as sqlite_insert_prompt_to_db,
46
- delete_prompt as sqlite_delete_prompt,
47
- search_and_display_items as sqlite_search_and_display_items,
48
- get_conversation_name as sqlite_get_conversation_name,
49
- add_media_with_keywords as sqlite_add_media_with_keywords,
50
- check_media_and_whisper_model as sqlite_check_media_and_whisper_model, \
51
- create_document_version as sqlite_create_document_version,
52
- get_document_version as sqlite_get_document_version, sqlite_search_db, add_media_chunk as sqlite_add_media_chunk,
53
- sqlite_update_fts_for_media, get_unprocessed_media as sqlite_get_unprocessed_media, fetch_item_details as sqlite_fetch_item_details, \
54
- search_media_database as sqlite_search_media_database, mark_as_trash as sqlite_mark_as_trash, \
55
- get_media_transcripts as sqlite_get_media_transcripts, get_specific_transcript as sqlite_get_specific_transcript, \
56
- get_media_summaries as sqlite_get_media_summaries, get_specific_summary as sqlite_get_specific_summary, \
57
- get_media_prompts as sqlite_get_media_prompts, get_specific_prompt as sqlite_get_specific_prompt, \
58
- delete_specific_transcript as sqlite_delete_specific_transcript,
59
- delete_specific_summary as sqlite_delete_specific_summary, \
60
- delete_specific_prompt as sqlite_delete_specific_prompt,
61
- fetch_keywords_for_media as sqlite_fetch_keywords_for_media, \
62
- update_keywords_for_media as sqlite_update_keywords_for_media, check_media_exists as sqlite_check_media_exists, \
63
- search_prompts as sqlite_search_prompts, get_media_content as sqlite_get_media_content, \
64
- get_paginated_files as sqlite_get_paginated_files, get_media_title as sqlite_get_media_title, \
65
- get_all_content_from_database as sqlite_get_all_content_from_database,
66
- get_next_media_id as sqlite_get_next_media_id, \
67
- batch_insert_chunks as sqlite_batch_insert_chunks, Database, save_workflow_chat_to_db as sqlite_save_workflow_chat_to_db, \
68
- get_workflow_chat as sqlite_get_workflow_chat, update_media_content_with_version as sqlite_update_media_content_with_version, \
69
- check_existing_media as sqlite_check_existing_media, get_all_document_versions as sqlite_get_all_document_versions, \
70
- fetch_paginated_data as sqlite_fetch_paginated_data, get_latest_transcription as sqlite_get_latest_transcription, \
71
- mark_media_as_processed as sqlite_mark_media_as_processed,
72
- )
73
- from App_Function_Libraries.DB.Character_Chat_DB import (
74
- add_character_card as sqlite_add_character_card, get_character_cards as sqlite_get_character_cards, \
75
- get_character_card_by_id as sqlite_get_character_card_by_id, update_character_card as sqlite_update_character_card, \
76
- delete_character_card as sqlite_delete_character_card, add_character_chat as sqlite_add_character_chat, \
77
- get_character_chats as sqlite_get_character_chats, get_character_chat_by_id as sqlite_get_character_chat_by_id, \
78
- update_character_chat as sqlite_update_character_chat, delete_character_chat as sqlite_delete_character_chat, \
79
- migrate_chat_to_media_db as sqlite_migrate_chat_to_media_db,
80
- )
81
- #
82
- # Local Imports
83
- from App_Function_Libraries.Utils.Utils import load_comprehensive_config, get_database_path, get_project_relative_path
84
- #
85
- # End of imports
86
- ############################################################################################################
87
-
88
-
89
- ############################################################################################################
90
- #
91
- # Database Config loading
92
-
93
- logger = logging.getLogger(__name__)
94
-
95
- config_path = get_project_relative_path('Config_Files/config.txt')
96
- config = configparser.ConfigParser()
97
- config.read(config_path)
98
-
99
- db_path: str = config.get('Database', 'sqlite_path', fallback='./Databases/media_summary.db')
100
- backup_path: str = config.get('Database', 'backup_path', fallback='database_backups')
101
- backup_dir: Union[str, bytes] = os.environ.get('DB_BACKUP_DIR', backup_path)
102
-
103
- def get_db_config():
104
- try:
105
- config = load_comprehensive_config()
106
-
107
- if 'Database' not in config:
108
- print("Warning: 'Database' section not found in config. Using default values.")
109
- return default_db_config()
110
-
111
- return {
112
- 'type': config.get('Database', 'type', fallback='sqlite'),
113
- 'sqlite_path': config.get('Database', 'sqlite_path', fallback='Databases/media_summary.db'),
114
- 'elasticsearch_host': config.get('Database', 'elasticsearch_host', fallback='localhost'),
115
- 'elasticsearch_port': config.getint('Database', 'elasticsearch_port', fallback=9200)
116
- }
117
- except FileNotFoundError:
118
- print("Warning: Config file not found. Using default database configuration.")
119
- return default_db_config()
120
- except Exception as e:
121
- print(f"Error reading config: {str(e)}. Using default database configuration.")
122
- return default_db_config()
123
-
124
- def default_db_config():
125
- return {
126
- 'type': 'sqlite',
127
- 'sqlite_path': get_database_path('media_summary.db'),
128
- 'elasticsearch_host': 'localhost',
129
- 'elasticsearch_port': 9200
130
- }
131
-
132
- def ensure_directory_exists(file_path):
133
- directory = os.path.dirname(file_path)
134
- if not os.path.exists(directory):
135
- os.makedirs(directory)
136
- print(f"Created directory: {directory}")
137
-
138
- db_config = get_db_config()
139
- db_type = db_config['type']
140
-
141
- if db_type == 'sqlite':
142
- db = Database(os.path.basename(db_config['sqlite_path']))
143
- elif db_type == 'elasticsearch':
144
- raise NotImplementedError("Elasticsearch support not yet implemented")
145
- else:
146
- raise ValueError(f"Unsupported database type: {db_type}")
147
-
148
- print(f"Database path: {db.db_path}")
149
-
150
- def get_db_config():
151
- try:
152
- config = load_comprehensive_config()
153
-
154
- if 'Database' not in config:
155
- print("Warning: 'Database' section not found in config. Using default values.")
156
- return default_db_config()
157
-
158
- return {
159
- 'type': config.get('Database', 'type', fallback='sqlite'),
160
- 'sqlite_path': config.get('Database', 'sqlite_path', fallback='Databases/media_summary.db'),
161
- 'elasticsearch_host': config.get('Database', 'elasticsearch_host', fallback='localhost'),
162
- 'elasticsearch_port': config.getint('Database', 'elasticsearch_port', fallback=9200)
163
- }
164
- except FileNotFoundError:
165
- print("Warning: Config file not found. Using default database configuration.")
166
- return default_db_config()
167
- except Exception as e:
168
- print(f"Error reading config: {str(e)}. Using default database configuration.")
169
- return default_db_config()
170
-
171
-
172
- def default_db_config():
173
- """Return the default database configuration with project-relative paths."""
174
- return {
175
- 'type': 'sqlite',
176
- 'sqlite_path': get_database_path('media_summary.db'),
177
- 'elasticsearch_host': 'localhost',
178
- 'elasticsearch_port': 9200
179
- }
180
-
181
-
182
- def ensure_directory_exists(file_path):
183
- directory = os.path.dirname(file_path)
184
- if not os.path.exists(directory):
185
- os.makedirs(directory)
186
- print(f"Created directory: {directory}")
187
-
188
- # Use the config to set up the database
189
- db_config = get_db_config()
190
- db_type = db_config['type']
191
-
192
- if db_type == 'sqlite':
193
- db = Database(os.path.basename(db_config['sqlite_path']))
194
- elif db_type == 'elasticsearch':
195
- # Implement Elasticsearch setup here if needed
196
- raise NotImplementedError("Elasticsearch support not yet implemented")
197
- else:
198
- raise ValueError(f"Unsupported database type: {db_type}")
199
-
200
- # Print database path for debugging
201
- print(f"Database path: {db.db_path}")
202
-
203
- # Sanity Check for SQLite DB
204
- # FIXME - Remove this after testing / Writing Unit tests
205
- # try:
206
- # db.execute_query("CREATE TABLE IF NOT EXISTS test_table (id INTEGER PRIMARY KEY)")
207
- # logger.info("Successfully created test table")
208
- # except DatabaseError as e:
209
- # logger.error(f"Failed to create test table: {e}")
210
-
211
- #
212
- # End of Database Config loading
213
- ############################################################################################################
214
- #
215
- # DB Search functions
216
-
217
- def search_db(search_query: str, search_fields: List[str], keywords: str, page: int = 1, results_per_page: int = 10):
218
- if db_type == 'sqlite':
219
- return sqlite_search_db(search_query, search_fields, keywords, page, results_per_page)
220
- elif db_type == 'elasticsearch':
221
- # Implement Elasticsearch version when available
222
- raise NotImplementedError("Elasticsearch version of search_db not yet implemented")
223
- else:
224
- raise ValueError(f"Unsupported database type: {db_type}")
225
-
226
- def view_database(*args, **kwargs):
227
- if db_type == 'sqlite':
228
- return sqlite_view_database(*args, **kwargs)
229
- elif db_type == 'elasticsearch':
230
- # Implement Elasticsearch version
231
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
232
-
233
- def search_and_display_items(*args, **kwargs):
234
- if db_type == 'sqlite':
235
- return sqlite_search_and_display_items(*args, **kwargs)
236
- elif db_type == 'elasticsearch':
237
- # Implement Elasticsearch version
238
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
239
-
240
- def get_all_content_from_database():
241
- if db_type == 'sqlite':
242
- return sqlite_get_all_content_from_database()
243
- elif db_type == 'elasticsearch':
244
- # Implement Elasticsearch version
245
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
246
-
247
- def search_and_display(*args, **kwargs):
248
- if db_type == 'sqlite':
249
- return sqlite_search_and_display(*args, **kwargs)
250
- elif db_type == 'elasticsearch':
251
- # Implement Elasticsearch version
252
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
253
-
254
- def check_media_exists(*args, **kwargs):
255
- if db_type == 'sqlite':
256
- return sqlite_check_media_exists(*args, **kwargs)
257
- elif db_type == 'elasticsearch':
258
- # Implement Elasticsearch version
259
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
260
-
261
- def get_paginated_files(*args, **kwargs):
262
- if db_type == 'sqlite':
263
- return sqlite_get_paginated_files(*args, **kwargs)
264
- elif db_type == 'elasticsearch':
265
- # Implement Elasticsearch version
266
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
267
-
268
- def get_media_title(*args, **kwargs):
269
- if db_type == 'sqlite':
270
- return sqlite_get_media_title(*args, **kwargs)
271
- elif db_type == 'elasticsearch':
272
- # Implement Elasticsearch version
273
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
274
-
275
- def get_next_media_id():
276
- if db_type == 'sqlite':
277
- return sqlite_get_next_media_id()
278
- elif db_type == 'elasticsearch':
279
- # Implement Elasticsearch version
280
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
281
-
282
- #
283
- # End of DB-Searching functions
284
- ############################################################################################################
285
-
286
-
287
- ############################################################################################################
288
- #
289
- # Transcript-related Functions
290
-
291
- def get_transcripts(*args, **kwargs):
292
- if db_type == 'sqlite':
293
- return sqlite_get_transcripts(*args, **kwargs)
294
- elif db_type == 'elasticsearch':
295
- # Implement Elasticsearch version
296
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
297
-
298
- #
299
- # End of Transcript-related Functions
300
- ############################################################################################################
301
-
302
-
303
- ############################################################################################################
304
- #
305
- # DB-Ingestion functions
306
-
307
- def add_media_to_database(*args, **kwargs):
308
- if db_type == 'sqlite':
309
- result = sqlite_add_media_to_database(*args, **kwargs)
310
-
311
- # Extract content
312
- segments = kwargs.get('segments') if 'segments' in kwargs else args[2] if len(args) > 2 else None
313
- if segments is None:
314
- raise ValueError("Segments not provided in arguments")
315
-
316
- if isinstance(segments, list):
317
- content = ' '.join([segment.get('Text', '') for segment in segments if 'Text' in segment])
318
- elif isinstance(segments, dict):
319
- content = segments.get('text', '') or segments.get('content', '')
320
- else:
321
- content = str(segments)
322
-
323
- # Extract media_id from the result
324
- # Assuming the result is in the format "Media 'Title' added/updated successfully with ID: {media_id}"
325
- import re
326
- match = re.search(r"with ID: (\d+)", result)
327
- if match:
328
- media_id = int(match.group(1))
329
-
330
- # Create initial document version
331
- sqlite_create_document_version(media_id, content)
332
-
333
- return result
334
- elif db_type == 'elasticsearch':
335
- # Implement Elasticsearch version
336
- raise NotImplementedError("Elasticsearch version of add_media_to_database not yet implemented")
337
-
338
- def check_existing_media(*args, **kwargs):
339
- if db_type == 'sqlite':
340
- return sqlite_check_existing_media(*args, **kwargs)
341
- elif db_type == 'elasticsearch':
342
- # Implement Elasticsearch version
343
- raise NotImplementedError("Elasticsearch version of check_existing_media not yet implemented")
344
-
345
- def update_media_content_with_version(*args, **kwargs):
346
- if db_type == 'sqlite':
347
- return sqlite_update_media_content_with_version(*args, **kwargs)
348
- elif db_type == 'elasticsearch':
349
- # Implement Elasticsearch version
350
- raise NotImplementedError("Elasticsearch version of update_media_content not yet implemented")
351
-
352
- def import_obsidian_note_to_db(*args, **kwargs):
353
- if db_type == 'sqlite':
354
- return sqlite_import_obsidian_note_to_db(*args, **kwargs)
355
- elif db_type == 'elasticsearch':
356
- # Implement Elasticsearch version
357
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
358
-
359
-
360
- def update_media_content(*args, **kwargs):
361
- if db_type == 'sqlite':
362
- result = sqlite_update_media_content(*args, **kwargs)
363
-
364
- # Extract media_id and content
365
- selected_item = args[0]
366
- item_mapping = args[1]
367
- content_input = args[2]
368
-
369
- if selected_item and item_mapping and selected_item in item_mapping:
370
- media_id = item_mapping[selected_item]
371
-
372
- # Create new document version
373
- sqlite_create_document_version(media_id, content_input)
374
-
375
- return result
376
- elif db_type == 'elasticsearch':
377
- # Implement Elasticsearch version
378
- raise NotImplementedError("Elasticsearch version of update_media_content not yet implemented")
379
-
380
-
381
- def add_media_with_keywords(*args, **kwargs):
382
- if db_type == 'sqlite':
383
- return sqlite_add_media_with_keywords(*args, **kwargs)
384
- elif db_type == 'elasticsearch':
385
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
386
-
387
- def check_media_and_whisper_model(*args, **kwargs):
388
- if db_type == 'sqlite':
389
- return sqlite_check_media_and_whisper_model(*args, **kwargs)
390
- elif db_type == 'elasticsearch':
391
- raise NotImplementedError("Elasticsearch version of check_media_and_whisper_model not yet implemented")
392
-
393
- def ingest_article_to_db(url, title, author, content, keywords, summary, ingestion_date, custom_prompt):
394
- if db_type == 'sqlite':
395
- return sqlite_ingest_article_to_db(url, title, author, content, keywords, summary, ingestion_date, custom_prompt)
396
- elif db_type == 'elasticsearch':
397
- # Implement Elasticsearch version
398
- raise NotImplementedError("Elasticsearch version of ingest_article_to_db not yet implemented")
399
- else:
400
- raise ValueError(f"Unsupported database type: {db_type}")
401
-
402
-
403
- def add_media_chunk(*args, **kwargs):
404
- if db_type == 'sqlite':
405
- sqlite_add_media_chunk(*args, **kwargs)
406
- elif db_type == 'elasticsearch':
407
- # Implement Elasticsearch version
408
- raise NotImplementedError("Elasticsearch version not yet implemented")
409
- else:
410
- raise ValueError(f"Unsupported database type: {db_type}")
411
-
412
- def batch_insert_chunks(*args, **kwargs):
413
- if db_type == 'sqlite':
414
- sqlite_batch_insert_chunks(*args, **kwargs)
415
- elif db_type == 'elasticsearch':
416
- # Implement Elasticsearch version
417
- raise NotImplementedError("Elasticsearch version not yet implemented")
418
- else:
419
- raise ValueError(f"Unsupported database type: {db_type}")
420
-
421
- def update_fts_for_media(media_id: int):
422
- if db_type == 'sqlite':
423
- sqlite_update_fts_for_media(db, media_id)
424
- elif db_type == 'elasticsearch':
425
- # Implement Elasticsearch version
426
- raise NotImplementedError("Elasticsearch version not yet implemented")
427
- else:
428
- raise ValueError(f"Unsupported database type: {db_type}")
429
-
430
-
431
- def get_unprocessed_media(*args, **kwargs):
432
- if db_type == 'sqlite':
433
- return sqlite_get_unprocessed_media(db)
434
- elif db_type == 'elasticsearch':
435
- # Implement Elasticsearch version
436
- raise NotImplementedError("Elasticsearch version of get_unprocessed_media not yet implemented")
437
- else:
438
- raise ValueError(f"Unsupported database type: {db_type}")
439
-
440
-
441
- def mark_media_as_processed(*args, **kwargs):
442
- if db_type == 'sqlite':
443
- return sqlite_mark_media_as_processed(*args, **kwargs)
444
- elif db_type == 'elasticsearch':
445
- # Implement Elasticsearch version
446
- raise NotImplementedError("Elasticsearch version of mark_media_as_processed not yet implemented")
447
- else:
448
- raise ValueError(f"Unsupported database type: {db_type}")
449
-
450
-
451
- #
452
- # End of DB-Ingestion functions
453
- ############################################################################################################
454
-
455
-
456
- ############################################################################################################
457
- #
458
- # Prompt-related functions #FIXME rename /resort
459
-
460
- def list_prompts(*args, **kwargs):
461
- if db_type == 'sqlite':
462
- return sqlite_list_prompts(*args, **kwargs)
463
- elif db_type == 'elasticsearch':
464
- # Implement Elasticsearch version
465
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
466
-
467
- def search_prompts(query):
468
- if db_type == 'sqlite':
469
- return sqlite_search_prompts(query)
470
- elif db_type == 'elasticsearch':
471
- # Implement Elasticsearch version
472
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
473
-
474
- def fetch_prompt_details(*args, **kwargs):
475
- if db_type == 'sqlite':
476
- return sqlite_fetch_prompt_details(*args, **kwargs)
477
- elif db_type == 'elasticsearch':
478
- # Implement Elasticsearch version
479
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
480
-
481
- def add_prompt(*args, **kwargs):
482
- if db_type == 'sqlite':
483
- return sqlite_add_prompt(*args, **kwargs)
484
- elif db_type == 'elasticsearch':
485
- # Implement Elasticsearch version
486
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
487
-
488
-
489
- def add_or_update_prompt(*args, **kwargs):
490
- if db_type == 'sqlite':
491
- return sqlite_add_or_update_prompt(*args, **kwargs)
492
- elif db_type == 'elasticsearch':
493
- # Implement Elasticsearch version
494
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
495
-
496
- def load_prompt_details(*args, **kwargs):
497
- if db_type == 'sqlite':
498
- return sqlite_load_prompt_details(*args, **kwargs)
499
- elif db_type == 'elasticsearch':
500
- # Implement Elasticsearch version
501
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
502
-
503
- def load_preset_prompts(*args, **kwargs):
504
- if db_type == 'sqlite':
505
- return sqlite_load_preset_prompts()
506
- elif db_type == 'elasticsearch':
507
- # Implement Elasticsearch version
508
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
509
-
510
- def insert_prompt_to_db(*args, **kwargs):
511
- if db_type == 'sqlite':
512
- return sqlite_insert_prompt_to_db(*args, **kwargs)
513
- elif db_type == 'elasticsearch':
514
- # Implement Elasticsearch version
515
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
516
-
517
- def delete_prompt(*args, **kwargs):
518
- if db_type == 'sqlite':
519
- return sqlite_delete_prompt(*args, **kwargs)
520
- elif db_type == 'elasticsearch':
521
- # Implement Elasticsearch version
522
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
523
-
524
- def search_media_database(*args, **kwargs):
525
- if db_type == 'sqlite':
526
- return sqlite_search_media_database(*args, **kwargs)
527
- elif db_type == 'elasticsearch':
528
- # Implement Elasticsearch version when available
529
- raise NotImplementedError("Elasticsearch version of search_media_database not yet implemented")
530
- else:
531
- raise ValueError(f"Unsupported database type: {db_type}")
532
-
533
- def mark_as_trash(media_id: int) -> None:
534
- if db_type == 'sqlite':
535
- return sqlite_mark_as_trash(media_id)
536
- elif db_type == 'elasticsearch':
537
- # Implement Elasticsearch version when available
538
- raise NotImplementedError("Elasticsearch version of mark_as_trash not yet implemented")
539
- else:
540
- raise ValueError(f"Unsupported database type: {db_type}")
541
-
542
-
543
- def get_latest_transcription(*args, **kwargs):
544
- if db_type == 'sqlite':
545
- return sqlite_get_latest_transcription(*args, **kwargs)
546
- elif db_type == 'elasticsearch':
547
- # Implement Elasticsearch version
548
- raise NotImplementedError("Elasticsearch version of get_latest_transcription not yet implemented")
549
-
550
- def fetch_paginated_data(*args, **kwargs):
551
- if db_type == 'sqlite':
552
- return sqlite_fetch_paginated_data(*args, **kwargs)
553
- elif db_type == 'elasticsearch':
554
- # Implement Elasticsearch version
555
- raise NotImplementedError("Elasticsearch version of fetch_paginated_data not yet implemented")
556
- else:
557
- raise ValueError(f"Unsupported database type: {db_type}")
558
-
559
-
560
- def get_media_content(media_id: int) -> str:
561
- if db_type == 'sqlite':
562
- return sqlite_get_media_content(media_id)
563
- elif db_type == 'elasticsearch':
564
- raise NotImplementedError("Elasticsearch version of get_media_content not yet implemented")
565
- else:
566
- raise ValueError(f"Unsupported database type: {db_type}")
567
-
568
- def get_media_transcripts(media_id: int) -> List[Dict]:
569
- if db_type == 'sqlite':
570
- return sqlite_get_media_transcripts(media_id)
571
- elif db_type == 'elasticsearch':
572
- raise NotImplementedError("Elasticsearch version of get_media_transcripts not yet implemented")
573
- else:
574
- raise ValueError(f"Unsupported database type: {db_type}")
575
-
576
- def get_specific_transcript(transcript_id: int) -> Dict:
577
- if db_type == 'sqlite':
578
- return sqlite_get_specific_transcript(transcript_id)
579
- elif db_type == 'elasticsearch':
580
- raise NotImplementedError("Elasticsearch version of get_specific_transcript not yet implemented")
581
- else:
582
- raise ValueError(f"Unsupported database type: {db_type}")
583
-
584
- def get_media_summaries(media_id: int) -> List[Dict]:
585
- if db_type == 'sqlite':
586
- return sqlite_get_media_summaries(media_id)
587
- elif db_type == 'elasticsearch':
588
- raise NotImplementedError("Elasticsearch version of get_media_summaries not yet implemented")
589
- else:
590
- raise ValueError(f"Unsupported database type: {db_type}")
591
-
592
- def get_specific_summary(summary_id: int) -> Dict:
593
- if db_type == 'sqlite':
594
- return sqlite_get_specific_summary(summary_id)
595
- elif db_type == 'elasticsearch':
596
- raise NotImplementedError("Elasticsearch version of get_specific_summary not yet implemented")
597
- else:
598
- raise ValueError(f"Unsupported database type: {db_type}")
599
-
600
- def fetch_item_details_single(*args, **kwargs):
601
- if db_type == 'sqlite':
602
- return sqlite_fetch_item_details(*args, **kwargs)
603
- elif db_type == 'elasticsearch':
604
- # Implement Elasticsearch version
605
- raise NotImplementedError("Elasticsearch version of fetch_item_details not yet implemented")
606
- else:
607
- raise ValueError(f"Unsupported database type: {db_type}")
608
-
609
- def get_all_document_versions(*args, **kwargs):
610
- if db_type == 'sqlite':
611
- return sqlite_get_all_document_versions(*args, **kwargs)
612
- elif db_type == 'elasticsearch':
613
- # Implement Elasticsearch version
614
- raise NotImplementedError("Elasticsearch version of get_all_document_versions not yet implemented")
615
- else:
616
- raise ValueError(f"Unsupported database type: {db_type}")
617
- #
618
- #
619
- ############################################################################################################
620
- #
621
- # Prompt Functions:
622
-
623
- def get_media_prompts(media_id: int) -> List[Dict]:
624
- if db_type == 'sqlite':
625
- return sqlite_get_media_prompts(media_id)
626
- elif db_type == 'elasticsearch':
627
- raise NotImplementedError("Elasticsearch version of get_media_prompts not yet implemented")
628
- else:
629
- raise ValueError(f"Unsupported database type: {db_type}")
630
-
631
- def get_specific_prompt(prompt_id: int) -> Dict:
632
- if db_type == 'sqlite':
633
- return sqlite_get_specific_prompt(prompt_id)
634
- elif db_type == 'elasticsearch':
635
- raise NotImplementedError("Elasticsearch version of get_specific_prompt not yet implemented")
636
- else:
637
- return {'error': f"Unsupported database type: {db_type}"}
638
-
639
- def delete_specific_transcript(transcript_id: int) -> str:
640
- if db_type == 'sqlite':
641
- return sqlite_delete_specific_transcript(transcript_id)
642
- elif db_type == 'elasticsearch':
643
- raise NotImplementedError("Elasticsearch version of delete_specific_transcript not yet implemented")
644
- else:
645
- raise ValueError(f"Unsupported database type: {db_type}")
646
-
647
- def delete_specific_summary(summary_id: int) -> str:
648
- if db_type == 'sqlite':
649
- return sqlite_delete_specific_summary(summary_id)
650
- elif db_type == 'elasticsearch':
651
- raise NotImplementedError("Elasticsearch version of delete_specific_summary not yet implemented")
652
- else:
653
- raise ValueError(f"Unsupported database type: {db_type}")
654
-
655
- def delete_specific_prompt(prompt_id: int) -> str:
656
- if db_type == 'sqlite':
657
- return sqlite_delete_specific_prompt(prompt_id)
658
- elif db_type == 'elasticsearch':
659
- raise NotImplementedError("Elasticsearch version of delete_specific_prompt not yet implemented")
660
- else:
661
- raise ValueError(f"Unsupported database type: {db_type}")
662
-
663
-
664
- #
665
- # End of Prompt-related functions
666
- ############################################################################################################
667
-
668
- ############################################################################################################
669
- #
670
- # Keywords-related Functions
671
-
672
- def keywords_browser_interface(*args, **kwargs):
673
- if db_type == 'sqlite':
674
- return sqlite_keywords_browser_interface()
675
- elif db_type == 'elasticsearch':
676
- # Implement Elasticsearch version
677
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
678
-
679
- def add_keyword(*args, **kwargs):
680
- if db_type == 'sqlite':
681
- with db.get_connection() as conn:
682
- cursor = conn.cursor()
683
- return sqlite_add_keyword(*args, **kwargs)
684
- elif db_type == 'elasticsearch':
685
- # Implement Elasticsearch version
686
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
687
-
688
- def delete_keyword(*args, **kwargs):
689
- if db_type == 'sqlite':
690
- return sqlite_delete_keyword(*args, **kwargs)
691
- elif db_type == 'elasticsearch':
692
- # Implement Elasticsearch version
693
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
694
-
695
- def export_keywords_to_csv(*args, **kwargs):
696
- if db_type == 'sqlite':
697
- return sqlite_export_keywords_to_csv()
698
- elif db_type == 'elasticsearch':
699
- # Implement Elasticsearch version
700
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
701
-
702
- def update_keywords_for_media(*args, **kwargs):
703
- if db_type == 'sqlite':
704
- return sqlite_update_keywords_for_media(*args, **kwargs)
705
- elif db_type == 'elasticsearch':
706
- # Implement Elasticsearch version
707
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
708
-
709
- def fetch_keywords_for_media(*args, **kwargs):
710
- if db_type == 'sqlite':
711
- return sqlite_fetch_keywords_for_media(*args, **kwargs)
712
- elif db_type == 'elasticsearch':
713
- # Implement Elasticsearch version
714
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
715
-
716
- #
717
- # End of Keywords-related Functions
718
- ############################################################################################################
719
-
720
- ############################################################################################################
721
- #
722
- # Chat-related Functions
723
-
724
- def delete_chat_message(*args, **kwargs):
725
- if db_type == 'sqlite':
726
- return sqlite_delete_chat_message(*args, **kwargs)
727
- elif db_type == 'elasticsearch':
728
- # Implement Elasticsearch version
729
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
730
-
731
- def update_chat_message(*args, **kwargs):
732
- if db_type == 'sqlite':
733
- return sqlite_update_chat_message(*args, **kwargs)
734
- elif db_type == 'elasticsearch':
735
- # Implement Elasticsearch version
736
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
737
-
738
- def add_chat_message(*args, **kwargs):
739
- if db_type == 'sqlite':
740
- return sqlite_add_chat_message(*args, **kwargs)
741
- elif db_type == 'elasticsearch':
742
- # Implement Elasticsearch version
743
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
744
-
745
- def get_chat_messages(*args, **kwargs):
746
- if db_type == 'sqlite':
747
- return sqlite_get_chat_messages(*args, **kwargs)
748
- elif db_type == 'elasticsearch':
749
- # Implement Elasticsearch version
750
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
751
-
752
- def search_chat_conversations(*args, **kwargs):
753
- if db_type == 'sqlite':
754
- return sqlite_search_chat_conversations(*args, **kwargs)
755
- elif db_type == 'elasticsearch':
756
- # Implement Elasticsearch version
757
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
758
-
759
- def create_chat_conversation(*args, **kwargs):
760
- if db_type == 'sqlite':
761
- return sqlite_create_chat_conversation(*args, **kwargs)
762
- elif db_type == 'elasticsearch':
763
- # Implement Elasticsearch version
764
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
765
-
766
- def save_chat_history_to_database(*args, **kwargs):
767
- if db_type == 'sqlite':
768
- return sqlite_save_chat_history_to_database(*args, **kwargs)
769
- elif db_type == 'elasticsearch':
770
- # Implement Elasticsearch version
771
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
772
-
773
- def get_conversation_name(*args, **kwargs):
774
- if db_type == 'sqlite':
775
- return sqlite_get_conversation_name(*args, **kwargs)
776
- elif db_type == 'elasticsearch':
777
- # Implement Elasticsearch version
778
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
779
-
780
- #
781
- # End of Chat-related Functions
782
- ############################################################################################################
783
-
784
-
785
- ############################################################################################################
786
- #
787
- # Character Chat-related Functions
788
-
789
- def add_character_card(*args, **kwargs):
790
- if db_type == 'sqlite':
791
- return sqlite_add_character_card(*args, **kwargs)
792
- elif db_type == 'elasticsearch':
793
- # Implement Elasticsearch version
794
- raise NotImplementedError("Elasticsearch version of add_character_card not yet implemented")
795
-
796
- def get_character_cards():
797
- if db_type == 'sqlite':
798
- return sqlite_get_character_cards()
799
- elif db_type == 'elasticsearch':
800
- # Implement Elasticsearch version
801
- raise NotImplementedError("Elasticsearch version of get_character_cards not yet implemented")
802
-
803
- def get_character_card_by_id(*args, **kwargs):
804
- if db_type == 'sqlite':
805
- return sqlite_get_character_card_by_id(*args, **kwargs)
806
- elif db_type == 'elasticsearch':
807
- # Implement Elasticsearch version
808
- raise NotImplementedError("Elasticsearch version of get_character_card_by_id not yet implemented")
809
-
810
- def update_character_card(*args, **kwargs):
811
- if db_type == 'sqlite':
812
- return sqlite_update_character_card(*args, **kwargs)
813
- elif db_type == 'elasticsearch':
814
- # Implement Elasticsearch version
815
- raise NotImplementedError("Elasticsearch version of update_character_card not yet implemented")
816
-
817
- def delete_character_card(*args, **kwargs):
818
- if db_type == 'sqlite':
819
- return sqlite_delete_character_card(*args, **kwargs)
820
- elif db_type == 'elasticsearch':
821
- # Implement Elasticsearch version
822
- raise NotImplementedError("Elasticsearch version of delete_character_card not yet implemented")
823
-
824
- def add_character_chat(*args, **kwargs):
825
- if db_type == 'sqlite':
826
- return sqlite_add_character_chat(*args, **kwargs)
827
- elif db_type == 'elasticsearch':
828
- # Implement Elasticsearch version
829
- raise NotImplementedError("Elasticsearch version of add_character_chat not yet implemented")
830
-
831
- def get_character_chats(*args, **kwargs):
832
- if db_type == 'sqlite':
833
- return sqlite_get_character_chats(*args, **kwargs)
834
- elif db_type == 'elasticsearch':
835
- # Implement Elasticsearch version
836
- raise NotImplementedError("Elasticsearch version of get_character_chats not yet implemented")
837
-
838
- def get_character_chat_by_id(*args, **kwargs):
839
- if db_type == 'sqlite':
840
- return sqlite_get_character_chat_by_id(*args, **kwargs)
841
- elif db_type == 'elasticsearch':
842
- # Implement Elasticsearch version
843
- raise NotImplementedError("Elasticsearch version of get_character_chat_by_id not yet implemented")
844
-
845
- def update_character_chat(*args, **kwargs):
846
- if db_type == 'sqlite':
847
- return sqlite_update_character_chat(*args, **kwargs)
848
- elif db_type == 'elasticsearch':
849
- # Implement Elasticsearch version
850
- raise NotImplementedError("Elasticsearch version of update_character_chat not yet implemented")
851
-
852
- def delete_character_chat(*args, **kwargs):
853
- if db_type == 'sqlite':
854
- return sqlite_delete_character_chat(*args, **kwargs)
855
- elif db_type == 'elasticsearch':
856
- # Implement Elasticsearch version
857
- raise NotImplementedError("Elasticsearch version of delete_character_chat not yet implemented")
858
-
859
- def migrate_chat_to_media_db(*args, **kwargs):
860
- if db_type == 'sqlite':
861
- return sqlite_migrate_chat_to_media_db(*args, **kwargs)
862
- elif db_type == 'elasticsearch':
863
- # Implement Elasticsearch version
864
- raise NotImplementedError("Elasticsearch version of migrate_chat_to_media_db not yet implemented")
865
-
866
- #
867
- # End of Character Chat-related Functions
868
- ############################################################################################################
869
-
870
-
871
- ############################################################################################################
872
- #
873
- # Trash-related Functions
874
-
875
- def get_trashed_items(*args, **kwargs):
876
- if db_type == 'sqlite':
877
- return sqlite_get_trashed_items()
878
- elif db_type == 'elasticsearch':
879
- # Implement Elasticsearch version
880
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
881
-
882
- def user_delete_item(*args, **kwargs):
883
- if db_type == 'sqlite':
884
- return sqlite_user_delete_item(*args, **kwargs)
885
- elif db_type == 'elasticsearch':
886
- # Implement Elasticsearch version
887
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
888
-
889
- def empty_trash(*args, **kwargs):
890
- if db_type == 'sqlite':
891
- return sqlite_empty_trash(*args, **kwargs)
892
- elif db_type == 'elasticsearch':
893
- # Implement Elasticsearch version
894
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
895
-
896
-
897
- def fetch_item_details(media_id: int) -> Tuple[str, str, str]:
898
- """
899
- Fetch the details of a media item including content, prompt, and summary.
900
-
901
- Args:
902
- media_id (int): The ID of the media item.
903
-
904
- Returns:
905
- Tuple[str, str, str]: A tuple containing (content, prompt, summary).
906
- If an error occurs, it returns empty strings for each field.
907
- """
908
- if db_type == 'sqlite':
909
- return sqlite_fetch_item_details(media_id)
910
- elif db_type == 'elasticsearch':
911
- # Implement Elasticsearch version when available
912
- raise NotImplementedError("Elasticsearch version of fetch_item_details not yet implemented")
913
- else:
914
- raise ValueError(f"Unsupported database type: {db_type}")
915
-
916
- #
917
- # End of Trash-related Functions
918
- ############################################################################################################
919
-
920
-
921
- ############################################################################################################
922
- #
923
- # DB-Backup Functions
924
-
925
- def create_automated_backup(*args, **kwargs):
926
- if db_type == 'sqlite':
927
- return sqlite_create_automated_backup(*args, **kwargs)
928
- elif db_type == 'elasticsearch':
929
- # Implement Elasticsearch version
930
- raise NotImplementedError("Elasticsearch version of add_media_with_keywords not yet implemented")
931
-
932
- #
933
- # End of DB-Backup Functions
934
- ############################################################################################################
935
-
936
-
937
- ############################################################################################################
938
- #
939
- # Document Versioning Functions
940
-
941
- def create_document_version(*args, **kwargs):
942
- if db_type == 'sqlite':
943
- return sqlite_create_document_version(*args, **kwargs)
944
- elif db_type == 'elasticsearch':
945
- # Implement Elasticsearch version
946
- raise NotImplementedError("Elasticsearch version of create_document_version not yet implemented")
947
-
948
- def get_document_version(*args, **kwargs):
949
- if db_type == 'sqlite':
950
- return sqlite_get_document_version(*args, **kwargs)
951
- elif db_type == 'elasticsearch':
952
- # Implement Elasticsearch version
953
- raise NotImplementedError("Elasticsearch version of get_document_version not yet implemented")
954
-
955
- #
956
- # End of Document Versioning Functions
957
- ############################################################################################################
958
-
959
-
960
- ############################################################################################################
961
- #
962
- # Workflow Functions
963
-
964
- def get_workflow_chat(*args, **kwargs):
965
- if db_type == 'sqlite':
966
- return sqlite_get_workflow_chat(*args, **kwargs)
967
- elif db_type == 'elasticsearch':
968
- # Implement Elasticsearch version
969
- raise NotImplementedError("Elasticsearch version of get_workflow_chat not yet implemented")
970
-
971
-
972
- def save_workflow_chat_to_db(*args, **kwargs):
973
- if db_type == 'sqlite':
974
- # FIXME
975
- return sqlite_save_workflow_chat_to_db(*args, **kwargs)
976
- elif db_type == 'elasticsearch':
977
- # Implement Elasticsearch version
978
- raise NotImplementedError("Elasticsearch version of save_workflow_chat_to_db not yet implemented")
979
-
980
- #
981
- # End of Workflow Functions
982
- ############################################################################################################
983
-
984
- # Dead code FIXME
985
- # def close_connection():
986
- # if db_type == 'sqlite':
987
- # db.get_connection().close()
988
-
989
- #
990
- # End of file
991
- ############################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/DB/RAG_QA_Chat_DB.py DELETED
@@ -1,461 +0,0 @@
1
- # RAG_QA_Chat_DB.py
2
- # Description: This file contains the database operations for the RAG QA Chat + Notes system.
3
- #
4
- # Imports
5
- import logging
6
- import re
7
- import sqlite3
8
- from contextlib import contextmanager
9
- from datetime import datetime
10
- #
11
- # External Imports
12
- #
13
- # Local Imports
14
- #
15
- ########################################################################################################################
16
- #
17
- # Functions:
18
-
19
- # Set up logging
20
- logging.basicConfig(level=logging.INFO)
21
- logger = logging.getLogger(__name__)
22
-
23
- # Database schema
24
- SCHEMA_SQL = '''
25
- -- Table for storing chat messages
26
- CREATE TABLE IF NOT EXISTS rag_qa_chats (
27
- id INTEGER PRIMARY KEY AUTOINCREMENT,
28
- conversation_id TEXT NOT NULL,
29
- timestamp DATETIME NOT NULL,
30
- role TEXT NOT NULL,
31
- content TEXT NOT NULL
32
- );
33
-
34
- -- Table for storing conversation metadata
35
- CREATE TABLE IF NOT EXISTS conversation_metadata (
36
- conversation_id TEXT PRIMARY KEY,
37
- created_at DATETIME NOT NULL,
38
- last_updated DATETIME NOT NULL,
39
- title TEXT NOT NULL
40
- );
41
-
42
- -- Table for storing keywords
43
- CREATE TABLE IF NOT EXISTS rag_qa_keywords (
44
- id INTEGER PRIMARY KEY AUTOINCREMENT,
45
- keyword TEXT NOT NULL UNIQUE
46
- );
47
-
48
- -- Table for linking keywords to conversations
49
- CREATE TABLE IF NOT EXISTS rag_qa_conversation_keywords (
50
- id INTEGER PRIMARY KEY AUTOINCREMENT,
51
- conversation_id TEXT NOT NULL,
52
- keyword_id INTEGER NOT NULL,
53
- FOREIGN KEY (conversation_id) REFERENCES conversation_metadata(conversation_id),
54
- FOREIGN KEY (keyword_id) REFERENCES rag_qa_keywords(id)
55
- );
56
-
57
- -- Table for storing keyword collections
58
- CREATE TABLE IF NOT EXISTS rag_qa_keyword_collections (
59
- id INTEGER PRIMARY KEY AUTOINCREMENT,
60
- name TEXT NOT NULL UNIQUE,
61
- parent_id INTEGER,
62
- FOREIGN KEY (parent_id) REFERENCES rag_qa_keyword_collections(id)
63
- );
64
-
65
- -- Table for linking keywords to collections
66
- CREATE TABLE IF NOT EXISTS rag_qa_collection_keywords (
67
- id INTEGER PRIMARY KEY AUTOINCREMENT,
68
- collection_id INTEGER NOT NULL,
69
- keyword_id INTEGER NOT NULL,
70
- FOREIGN KEY (collection_id) REFERENCES rag_qa_keyword_collections(id),
71
- FOREIGN KEY (keyword_id) REFERENCES rag_qa_keywords(id)
72
- );
73
-
74
- -- Table for storing notes
75
- CREATE TABLE IF NOT EXISTS rag_qa_notes (
76
- id INTEGER PRIMARY KEY AUTOINCREMENT,
77
- conversation_id TEXT NOT NULL,
78
- content TEXT NOT NULL,
79
- timestamp DATETIME NOT NULL,
80
- FOREIGN KEY (conversation_id) REFERENCES conversation_metadata(conversation_id)
81
- );
82
-
83
- -- Table for linking notes to keywords
84
- CREATE TABLE IF NOT EXISTS rag_qa_note_keywords (
85
- id INTEGER PRIMARY KEY AUTOINCREMENT,
86
- note_id INTEGER NOT NULL,
87
- keyword_id INTEGER NOT NULL,
88
- FOREIGN KEY (note_id) REFERENCES rag_qa_notes(id),
89
- FOREIGN KEY (keyword_id) REFERENCES rag_qa_keywords(id)
90
- );
91
-
92
- -- Indexes for improved query performance
93
- CREATE INDEX IF NOT EXISTS idx_rag_qa_chats_conversation_id ON rag_qa_chats(conversation_id);
94
- CREATE INDEX IF NOT EXISTS idx_rag_qa_chats_timestamp ON rag_qa_chats(timestamp);
95
- CREATE INDEX IF NOT EXISTS idx_rag_qa_keywords_keyword ON rag_qa_keywords(keyword);
96
- CREATE INDEX IF NOT EXISTS idx_rag_qa_conversation_keywords_conversation_id ON rag_qa_conversation_keywords(conversation_id);
97
- CREATE INDEX IF NOT EXISTS idx_rag_qa_conversation_keywords_keyword_id ON rag_qa_conversation_keywords(keyword_id);
98
- CREATE INDEX IF NOT EXISTS idx_rag_qa_keyword_collections_parent_id ON rag_qa_keyword_collections(parent_id);
99
- CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_collection_id ON rag_qa_collection_keywords(collection_id);
100
- CREATE INDEX IF NOT EXISTS idx_rag_qa_collection_keywords_keyword_id ON rag_qa_collection_keywords(keyword_id);
101
-
102
- -- Full-text search virtual table for chat content
103
- CREATE VIRTUAL TABLE IF NOT EXISTS rag_qa_chats_fts USING fts5(conversation_id, timestamp, role, content);
104
-
105
- -- Trigger to keep the FTS table up to date
106
- CREATE TRIGGER IF NOT EXISTS rag_qa_chats_ai AFTER INSERT ON rag_qa_chats BEGIN
107
- INSERT INTO rag_qa_chats_fts(conversation_id, timestamp, role, content) VALUES (new.conversation_id, new.timestamp, new.role, new.content);
108
- END;
109
- '''
110
-
111
- # Database connection management
112
- @contextmanager
113
- def get_db_connection():
114
- conn = sqlite3.connect('rag_qa_chat.db')
115
- try:
116
- yield conn
117
- finally:
118
- conn.close()
119
-
120
- @contextmanager
121
- def transaction():
122
- with get_db_connection() as conn:
123
- try:
124
- conn.execute('BEGIN TRANSACTION')
125
- yield conn
126
- conn.commit()
127
- except Exception:
128
- conn.rollback()
129
- raise
130
-
131
- def execute_query(query, params=None, transaction_conn=None):
132
- if transaction_conn:
133
- cursor = transaction_conn.cursor()
134
- if params:
135
- cursor.execute(query, params)
136
- else:
137
- cursor.execute(query)
138
- return cursor.fetchall()
139
- else:
140
- with get_db_connection() as conn:
141
- cursor = conn.cursor()
142
- if params:
143
- cursor.execute(query, params)
144
- else:
145
- cursor.execute(query)
146
- conn.commit()
147
- return cursor.fetchall()
148
-
149
- def create_tables():
150
- with get_db_connection() as conn:
151
- conn.executescript(SCHEMA_SQL)
152
- logger.info("All RAG QA Chat tables created successfully")
153
-
154
- # Initialize the database
155
- create_tables()
156
-
157
- # Input validation
158
- def validate_keyword(keyword):
159
- if not isinstance(keyword, str):
160
- raise ValueError("Keyword must be a string")
161
- if not keyword.strip():
162
- raise ValueError("Keyword cannot be empty or just whitespace")
163
- if len(keyword) > 100:
164
- raise ValueError("Keyword is too long (max 100 characters)")
165
- if not re.match(r'^[a-zA-Z0-9\s\-_]+$', keyword):
166
- raise ValueError("Keyword contains invalid characters")
167
- return keyword.strip()
168
-
169
- def validate_collection_name(name):
170
- if not isinstance(name, str):
171
- raise ValueError("Collection name must be a string")
172
- if not name.strip():
173
- raise ValueError("Collection name cannot be empty or just whitespace")
174
- if len(name) > 100:
175
- raise ValueError("Collection name is too long (max 100 characters)")
176
- if not re.match(r'^[a-zA-Z0-9\s\-_]+$', name):
177
- raise ValueError("Collection name contains invalid characters")
178
- return name.strip()
179
-
180
- # Core functions
181
- def add_keyword(keyword):
182
- try:
183
- validated_keyword = validate_keyword(keyword)
184
- query = "INSERT OR IGNORE INTO rag_qa_keywords (keyword) VALUES (?)"
185
- execute_query(query, (validated_keyword,))
186
- logger.info(f"Keyword '{validated_keyword}' added successfully")
187
- except ValueError as e:
188
- logger.error(f"Invalid keyword: {e}")
189
- raise
190
- except Exception as e:
191
- logger.error(f"Error adding keyword '{keyword}': {e}")
192
- raise
193
-
194
- def create_keyword_collection(name, parent_id=None):
195
- try:
196
- validated_name = validate_collection_name(name)
197
- query = "INSERT INTO rag_qa_keyword_collections (name, parent_id) VALUES (?, ?)"
198
- execute_query(query, (validated_name, parent_id))
199
- logger.info(f"Keyword collection '{validated_name}' created successfully")
200
- except ValueError as e:
201
- logger.error(f"Invalid collection name: {e}")
202
- raise
203
- except Exception as e:
204
- logger.error(f"Error creating keyword collection '{name}': {e}")
205
- raise
206
-
207
- def add_keyword_to_collection(collection_name, keyword):
208
- try:
209
- validated_collection_name = validate_collection_name(collection_name)
210
- validated_keyword = validate_keyword(keyword)
211
-
212
- with transaction() as conn:
213
- add_keyword(validated_keyword)
214
-
215
- query = '''
216
- INSERT INTO rag_qa_collection_keywords (collection_id, keyword_id)
217
- SELECT c.id, k.id
218
- FROM rag_qa_keyword_collections c, rag_qa_keywords k
219
- WHERE c.name = ? AND k.keyword = ?
220
- '''
221
- execute_query(query, (validated_collection_name, validated_keyword), conn)
222
-
223
- logger.info(f"Keyword '{validated_keyword}' added to collection '{validated_collection_name}' successfully")
224
- except ValueError as e:
225
- logger.error(f"Invalid input: {e}")
226
- raise
227
- except Exception as e:
228
- logger.error(f"Error adding keyword '{keyword}' to collection '{collection_name}': {e}")
229
- raise
230
-
231
- def add_keywords_to_conversation(conversation_id, keywords):
232
- if not isinstance(keywords, (list, tuple)):
233
- raise ValueError("Keywords must be a list or tuple")
234
- try:
235
- with transaction() as conn:
236
- for keyword in keywords:
237
- validated_keyword = validate_keyword(keyword)
238
-
239
- query = "INSERT OR IGNORE INTO rag_qa_keywords (keyword) VALUES (?)"
240
- execute_query(query, (validated_keyword,), conn)
241
-
242
- query = '''
243
- INSERT INTO rag_qa_conversation_keywords (conversation_id, keyword_id)
244
- SELECT ?, id FROM rag_qa_keywords WHERE keyword = ?
245
- '''
246
- execute_query(query, (conversation_id, validated_keyword), conn)
247
-
248
- logger.info(f"Keywords added to conversation '{conversation_id}' successfully")
249
- except ValueError as e:
250
- logger.error(f"Invalid keyword: {e}")
251
- raise
252
- except Exception as e:
253
- logger.error(f"Error adding keywords to conversation '{conversation_id}': {e}")
254
- raise
255
-
256
- def get_keywords_for_conversation(conversation_id):
257
- try:
258
- query = '''
259
- SELECT k.keyword
260
- FROM rag_qa_keywords k
261
- JOIN rag_qa_conversation_keywords ck ON k.id = ck.keyword_id
262
- WHERE ck.conversation_id = ?
263
- '''
264
- result = execute_query(query, (conversation_id,))
265
- keywords = [row[0] for row in result]
266
- logger.info(f"Retrieved {len(keywords)} keywords for conversation '{conversation_id}'")
267
- return keywords
268
- except Exception as e:
269
- logger.error(f"Error getting keywords for conversation '{conversation_id}': {e}")
270
- raise
271
-
272
- def get_keywords_for_collection(collection_name):
273
- try:
274
- query = '''
275
- SELECT k.keyword
276
- FROM rag_qa_keywords k
277
- JOIN rag_qa_collection_keywords ck ON k.id = ck.keyword_id
278
- JOIN rag_qa_keyword_collections c ON ck.collection_id = c.id
279
- WHERE c.name = ?
280
- '''
281
- result = execute_query(query, (collection_name,))
282
- keywords = [row[0] for row in result]
283
- logger.info(f"Retrieved {len(keywords)} keywords for collection '{collection_name}'")
284
- return keywords
285
- except Exception as e:
286
- logger.error(f"Error getting keywords for collection '{collection_name}': {e}")
287
- raise
288
-
289
- def save_notes(conversation_id, content):
290
- """Save notes to the database."""
291
- try:
292
- query = "INSERT INTO rag_qa_notes (conversation_id, content, timestamp) VALUES (?, ?, ?)"
293
- timestamp = datetime.now().isoformat()
294
- execute_query(query, (conversation_id, content, timestamp))
295
- logger.info(f"Notes saved for conversation '{conversation_id}'")
296
- except Exception as e:
297
- logger.error(f"Error saving notes for conversation '{conversation_id}': {e}")
298
- raise
299
-
300
- def get_notes(conversation_id):
301
- """Retrieve notes for a given conversation."""
302
- try:
303
- query = "SELECT content FROM rag_qa_notes WHERE conversation_id = ?"
304
- result = execute_query(query, (conversation_id,))
305
- notes = [row[0] for row in result]
306
- logger.info(f"Retrieved {len(notes)} notes for conversation '{conversation_id}'")
307
- return notes
308
- except Exception as e:
309
- logger.error(f"Error getting notes for conversation '{conversation_id}': {e}")
310
- raise
311
-
312
- def clear_notes(conversation_id):
313
- """Clear all notes for a given conversation."""
314
- try:
315
- query = "DELETE FROM rag_qa_notes WHERE conversation_id = ?"
316
- execute_query(query, (conversation_id,))
317
- logger.info(f"Cleared notes for conversation '{conversation_id}'")
318
- except Exception as e:
319
- logger.error(f"Error clearing notes for conversation '{conversation_id}': {e}")
320
- raise
321
-
322
- def add_keywords_to_note(note_id, keywords):
323
- """Associate keywords with a note."""
324
- try:
325
- with transaction() as conn:
326
- for keyword in keywords:
327
- validated_keyword = validate_keyword(keyword)
328
-
329
- # Insert the keyword into the rag_qa_keywords table if it doesn't exist
330
- query = "INSERT OR IGNORE INTO rag_qa_keywords (keyword) VALUES (?)"
331
- execute_query(query, (validated_keyword,), conn)
332
-
333
- # Retrieve the keyword ID
334
- query = "SELECT id FROM rag_qa_keywords WHERE keyword = ?"
335
- keyword_id = execute_query(query, (validated_keyword,), conn)[0][0]
336
-
337
- # Link the note and keyword
338
- query = "INSERT INTO rag_qa_note_keywords (note_id, keyword_id) VALUES (?, ?)"
339
- execute_query(query, (note_id, keyword_id), conn)
340
-
341
- logger.info(f"Keywords added to note ID '{note_id}' successfully")
342
- except Exception as e:
343
- logger.error(f"Error adding keywords to note ID '{note_id}': {e}")
344
- raise
345
-
346
- def get_keywords_for_note(note_id):
347
- """Retrieve keywords associated with a given note."""
348
- try:
349
- query = '''
350
- SELECT k.keyword
351
- FROM rag_qa_keywords k
352
- JOIN rag_qa_note_keywords nk ON k.id = nk.keyword_id
353
- WHERE nk.note_id = ?
354
- '''
355
- result = execute_query(query, (note_id,))
356
- keywords = [row[0] for row in result]
357
- logger.info(f"Retrieved {len(keywords)} keywords for note ID '{note_id}'")
358
- return keywords
359
- except Exception as e:
360
- logger.error(f"Error getting keywords for note ID '{note_id}': {e}")
361
- raise
362
-
363
- def clear_keywords_from_note(note_id):
364
- """Clear all keywords from a given note."""
365
- try:
366
- query = "DELETE FROM rag_qa_note_keywords WHERE note_id = ?"
367
- execute_query(query, (note_id,))
368
- logger.info(f"Cleared keywords for note ID '{note_id}'")
369
- except Exception as e:
370
- logger.error(f"Error clearing keywords for note ID '{note_id}': {e}")
371
- raise
372
-
373
- def save_message(conversation_id, role, content):
374
- try:
375
- query = "INSERT INTO rag_qa_chats (conversation_id, timestamp, role, content) VALUES (?, ?, ?, ?)"
376
- timestamp = datetime.now().isoformat()
377
- execute_query(query, (conversation_id, timestamp, role, content))
378
- logger.info(f"Message saved for conversation '{conversation_id}'")
379
- except Exception as e:
380
- logger.error(f"Error saving message for conversation '{conversation_id}': {e}")
381
- raise
382
-
383
- def start_new_conversation(title="Untitled Conversation"):
384
- try:
385
- conversation_id = datetime.now().isoformat()
386
- query = "INSERT INTO conversation_metadata (conversation_id, created_at, last_updated, title) VALUES (?, ?, ?, ?)"
387
- now = datetime.now()
388
- execute_query(query, (conversation_id, now, now, title))
389
- logger.info(f"New conversation '{conversation_id}' started with title '{title}'")
390
- return conversation_id
391
- except Exception as e:
392
- logger.error(f"Error starting new conversation: {e}")
393
- raise
394
-
395
- # Pagination helper function
396
- def get_paginated_results(query, params=None, page=1, page_size=20):
397
- try:
398
- offset = (page - 1) * page_size
399
- paginated_query = f"{query} LIMIT ? OFFSET ?"
400
- if params:
401
- params = tuple(params) + (page_size, offset)
402
- else:
403
- params = (page_size, offset)
404
-
405
- result = execute_query(paginated_query, params)
406
-
407
- count_query = f"SELECT COUNT(*) FROM ({query})"
408
- total_count = execute_query(count_query, params[:-2] if params else None)[0][0]
409
-
410
- total_pages = (total_count + page_size - 1) // page_size
411
-
412
- logger.info(f"Retrieved page {page} of {total_pages} (total items: {total_count})")
413
- return result, total_pages, total_count
414
- except Exception as e:
415
- logger.error(f"Error retrieving paginated results: {e}")
416
- raise
417
-
418
- def get_all_collections(page=1, page_size=20):
419
- try:
420
- query = "SELECT name FROM rag_qa_keyword_collections"
421
- results, total_pages, total_count = get_paginated_results(query, page=page, page_size=page_size)
422
- collections = [row[0] for row in results]
423
- logger.info(f"Retrieved {len(collections)} keyword collections (page {page} of {total_pages})")
424
- return collections, total_pages, total_count
425
- except Exception as e:
426
- logger.error(f"Error getting collections: {e}")
427
- raise
428
-
429
- def search_conversations_by_keywords(keywords, page=1, page_size=20):
430
- try:
431
- placeholders = ','.join(['?' for _ in keywords])
432
- query = f'''
433
- SELECT DISTINCT cm.conversation_id, cm.title
434
- FROM conversation_metadata cm
435
- JOIN rag_qa_conversation_keywords ck ON cm.conversation_id = ck.conversation_id
436
- JOIN rag_qa_keywords k ON ck.keyword_id = k.id
437
- WHERE k.keyword IN ({placeholders})
438
- '''
439
- results, total_pages, total_count = get_paginated_results(query, keywords, page, page_size)
440
- logger.info(
441
- f"Found {total_count} conversations matching keywords: {', '.join(keywords)} (page {page} of {total_pages})")
442
- return results, total_pages, total_count
443
- except Exception as e:
444
- logger.error(f"Error searching conversations by keywords {keywords}: {e}")
445
- raise
446
-
447
- def load_chat_history(conversation_id, page=1, page_size=50):
448
- try:
449
- query = "SELECT role, content FROM rag_qa_chats WHERE conversation_id = ? ORDER BY timestamp"
450
- results, total_pages, total_count = get_paginated_results(query, (conversation_id,), page, page_size)
451
- history = [(msg[1] if msg[0] == 'human' else None, msg[1] if msg[0] == 'ai' else None) for msg in results]
452
- logger.info(
453
- f"Loaded {len(history)} messages for conversation '{conversation_id}' (page {page} of {total_pages})")
454
- return history, total_pages, total_count
455
- except Exception as e:
456
- logger.error(f"Error loading chat history for conversation '{conversation_id}': {e}")
457
- raise
458
-
459
- #
460
- # End of RAG_QA_Chat_DB.py
461
- ####################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/DB/SQLite_DB.py DELETED
The diff for this file is too large to render. See raw diff
 
App_Function_Libraries/DB/Test_SQLite_DB.py DELETED
@@ -1,202 +0,0 @@
1
- # Test_SQLite_DB.py
2
- # Description: Test file for SQLite_DB.py
3
- #
4
- # Usage: python -m unittest test_sqlite_db.py
5
- #
6
- # Imports
7
- import unittest
8
- import sqlite3
9
- import threading
10
- import time
11
- from unittest.mock import patch
12
- #
13
- # Local Imports
14
- from App_Function_Libraries.DB.SQLite_DB import Database, add_media_with_keywords, add_media_version, DatabaseError
15
- #
16
- #######################################################################################################################
17
- #
18
- # Functions:
19
-
20
- class TestDatabase(unittest.TestCase):
21
- def setUp(self):
22
- self.db = Database(':memory:') # Use in-memory database for testing
23
-
24
- def test_connection_management(self):
25
- with self.db.get_connection() as conn:
26
- self.assertIsInstance(conn, sqlite3.Connection)
27
- self.assertEqual(len(self.db.pool), 1)
28
-
29
- def test_execute_query(self):
30
- self.db.execute_query("CREATE TABLE test (id INTEGER PRIMARY KEY, name TEXT)")
31
- self.db.execute_query("INSERT INTO test (name) VALUES (?)", ("test_name",))
32
- with self.db.get_connection() as conn:
33
- cursor = conn.cursor()
34
- cursor.execute("SELECT name FROM test")
35
- result = cursor.fetchone()
36
- self.assertEqual(result[0], "test_name")
37
-
38
- def test_execute_many(self):
39
- self.db.execute_query("CREATE TABLE test (id INTEGER PRIMARY KEY, name TEXT)")
40
- data = [("name1",), ("name2",), ("name3",)]
41
- self.db.execute_many("INSERT INTO test (name) VALUES (?)", data)
42
- with self.db.get_connection() as conn:
43
- cursor = conn.cursor()
44
- cursor.execute("SELECT COUNT(*) FROM test")
45
- count = cursor.fetchone()[0]
46
- self.assertEqual(count, 3)
47
-
48
- def test_connection_retry(self):
49
- def lock_database():
50
- with self.db.get_connection() as conn:
51
- cursor = conn.cursor()
52
- cursor.execute("BEGIN EXCLUSIVE TRANSACTION")
53
- time.sleep(2) # Hold the lock for 2 seconds
54
-
55
- thread = threading.Thread(target=lock_database)
56
- thread.start()
57
- time.sleep(0.1) # Give the thread time to acquire the lock
58
-
59
- with self.assertRaises(DatabaseError):
60
- self.db.execute_query("SELECT 1") # This should retry and eventually fail
61
-
62
- thread.join()
63
-
64
- class TestAddMediaWithKeywords(unittest.TestCase):
65
- def setUp(self):
66
- self.db = Database(':memory:')
67
- self.db.execute_query("""
68
- CREATE TABLE Media (
69
- id INTEGER PRIMARY KEY,
70
- url TEXT,
71
- title TEXT NOT NULL,
72
- type TEXT NOT NULL,
73
- content TEXT,
74
- author TEXT,
75
- ingestion_date TEXT,
76
- transcription_model TEXT
77
- )
78
- """)
79
- self.db.execute_query("CREATE TABLE Keywords (id INTEGER PRIMARY KEY, keyword TEXT NOT NULL UNIQUE)")
80
- self.db.execute_query("""
81
- CREATE TABLE MediaKeywords (
82
- id INTEGER PRIMARY KEY,
83
- media_id INTEGER NOT NULL,
84
- keyword_id INTEGER NOT NULL,
85
- FOREIGN KEY (media_id) REFERENCES Media(id),
86
- FOREIGN KEY (keyword_id) REFERENCES Keywords(id)
87
- )
88
- """)
89
- self.db.execute_query("""
90
- CREATE TABLE MediaModifications (
91
- id INTEGER PRIMARY KEY,
92
- media_id INTEGER NOT NULL,
93
- prompt TEXT,
94
- summary TEXT,
95
- modification_date TEXT,
96
- FOREIGN KEY (media_id) REFERENCES Media(id)
97
- )
98
- """)
99
- self.db.execute_query("""
100
- CREATE TABLE MediaVersion (
101
- id INTEGER PRIMARY KEY,
102
- media_id INTEGER NOT NULL,
103
- version INTEGER NOT NULL,
104
- prompt TEXT,
105
- summary TEXT,
106
- created_at TEXT NOT NULL,
107
- FOREIGN KEY (media_id) REFERENCES Media(id)
108
- )
109
- """)
110
- self.db.execute_query("CREATE VIRTUAL TABLE media_fts USING fts5(title, content)")
111
-
112
- @patch('App_Function_Libraries.DB.SQLite_DB.db', new_callable=lambda: Database(':memory:'))
113
- def test_add_new_media(self, mock_db):
114
- mock_db.get_connection = self.db.get_connection
115
- result = add_media_with_keywords(
116
- url="http://example.com",
117
- title="Test Title",
118
- media_type="article",
119
- content="Test content",
120
- keywords="test,keyword",
121
- prompt="Test prompt",
122
- summary="Test summary",
123
- transcription_model="Test model",
124
- author="Test Author",
125
- ingestion_date="2023-01-01"
126
- )
127
- self.assertIn("added/updated successfully", result)
128
-
129
- with self.db.get_connection() as conn:
130
- cursor = conn.cursor()
131
- cursor.execute("SELECT COUNT(*) FROM Media")
132
- self.assertEqual(cursor.fetchone()[0], 1)
133
-
134
- cursor.execute("SELECT COUNT(*) FROM Keywords")
135
- self.assertEqual(cursor.fetchone()[0], 2)
136
-
137
- cursor.execute("SELECT COUNT(*) FROM MediaKeywords")
138
- self.assertEqual(cursor.fetchone()[0], 2)
139
-
140
- cursor.execute("SELECT COUNT(*) FROM MediaModifications")
141
- self.assertEqual(cursor.fetchone()[0], 1)
142
-
143
- cursor.execute("SELECT COUNT(*) FROM MediaVersion")
144
- self.assertEqual(cursor.fetchone()[0], 1)
145
-
146
- @patch('App_Function_Libraries.DB.SQLite_DB.db', new_callable=lambda: Database(':memory:'))
147
- def test_update_existing_media(self, mock_db):
148
- mock_db.get_connection = self.db.get_connection
149
- add_media_with_keywords(
150
- url="http://example.com",
151
- title="Test Title",
152
- media_type="article",
153
- content="Test content",
154
- keywords="test,keyword",
155
- prompt="Test prompt",
156
- summary="Test summary",
157
- transcription_model="Test model",
158
- author="Test Author",
159
- ingestion_date="2023-01-01"
160
- )
161
-
162
- result = add_media_with_keywords(
163
- url="http://example.com",
164
- title="Updated Title",
165
- media_type="article",
166
- content="Updated content",
167
- keywords="test,new",
168
- prompt="Updated prompt",
169
- summary="Updated summary",
170
- transcription_model="Updated model",
171
- author="Updated Author",
172
- ingestion_date="2023-01-02"
173
- )
174
-
175
- self.assertIn("added/updated successfully", result)
176
-
177
- with self.db.get_connection() as conn:
178
- cursor = conn.cursor()
179
- cursor.execute("SELECT COUNT(*) FROM Media")
180
- self.assertEqual(cursor.fetchone()[0], 1)
181
-
182
- cursor.execute("SELECT title FROM Media")
183
- self.assertEqual(cursor.fetchone()[0], "Updated Title")
184
-
185
- cursor.execute("SELECT COUNT(*) FROM Keywords")
186
- self.assertEqual(cursor.fetchone()[0], 3)
187
-
188
- cursor.execute("SELECT COUNT(*) FROM MediaKeywords")
189
- self.assertEqual(cursor.fetchone()[0], 3)
190
-
191
- cursor.execute("SELECT COUNT(*) FROM MediaModifications")
192
- self.assertEqual(cursor.fetchone()[0], 2)
193
-
194
- cursor.execute("SELECT COUNT(*) FROM MediaVersion")
195
- self.assertEqual(cursor.fetchone()[0], 2)
196
-
197
- if __name__ == '__main__':
198
- unittest.main()
199
-
200
- #
201
- # End of File
202
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/DB/__init__.py DELETED
File without changes
App_Function_Libraries/Databases/chroma_db/chroma.sqlite3 DELETED
Binary file (156 kB)
 
App_Function_Libraries/Databases/media_summary.db DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:74b972252f8784e4c8446ea921a231f333f76a703575be185d813bc60a6b035d
3
- size 13217792
 
 
 
 
App_Function_Libraries/Databases/prompts.db DELETED
Binary file (233 kB)
 
App_Function_Libraries/Gradio_Related.py DELETED
@@ -1,421 +0,0 @@
1
- # Gradio_Related.py
2
- #########################################
3
- # Gradio UI Functions Library
4
- # I fucking hate Gradio.
5
- #
6
- #########################################
7
- #
8
- # Built-In Imports
9
- import logging
10
- import os
11
- import webbrowser
12
-
13
- #
14
- # Import 3rd-Party Libraries
15
- import gradio as gr
16
- #
17
- # Local Imports
18
- from App_Function_Libraries.DB.DB_Manager import get_db_config
19
- from App_Function_Libraries.Gradio_UI.Arxiv_tab import create_arxiv_tab
20
- from App_Function_Libraries.Gradio_UI.Audio_ingestion_tab import create_audio_processing_tab
21
- from App_Function_Libraries.Gradio_UI.Book_Ingestion_tab import create_import_book_tab
22
- from App_Function_Libraries.Gradio_UI.Character_Chat_tab import create_character_card_interaction_tab, create_character_chat_mgmt_tab, create_custom_character_card_tab, \
23
- create_character_card_validation_tab, create_export_characters_tab
24
- from App_Function_Libraries.Gradio_UI.Character_interaction_tab import create_narrator_controlled_conversation_tab, \
25
- create_multiple_character_chat_tab
26
- from App_Function_Libraries.Gradio_UI.Chat_ui import create_chat_management_tab, \
27
- create_chat_interface_four, create_chat_interface_multi_api, create_chat_interface_stacked, create_chat_interface
28
- from App_Function_Libraries.Gradio_UI.Config_tab import create_config_editor_tab
29
- from App_Function_Libraries.Gradio_UI.Explain_summarize_tab import create_summarize_explain_tab
30
- from App_Function_Libraries.Gradio_UI.Export_Functionality import create_export_tab
31
- from App_Function_Libraries.Gradio_UI.Backup_Functionality import create_backup_tab, create_view_backups_tab, \
32
- create_restore_backup_tab
33
- from App_Function_Libraries.Gradio_UI.Import_Functionality import create_import_single_prompt_tab, \
34
- create_import_obsidian_vault_tab, create_import_item_tab, create_import_multiple_prompts_tab
35
- from App_Function_Libraries.Gradio_UI.Introduction_tab import create_introduction_tab
36
- from App_Function_Libraries.Gradio_UI.Keywords import create_view_keywords_tab, create_add_keyword_tab, \
37
- create_delete_keyword_tab, create_export_keywords_tab
38
- from App_Function_Libraries.Gradio_UI.Live_Recording import create_live_recording_tab
39
- from App_Function_Libraries.Gradio_UI.Llamafile_tab import create_chat_with_llamafile_tab
40
- #from App_Function_Libraries.Gradio_UI.MMLU_Pro_tab import create_mmlu_pro_tab
41
- from App_Function_Libraries.Gradio_UI.Media_edit import create_prompt_clone_tab, create_prompt_edit_tab, \
42
- create_media_edit_and_clone_tab, create_media_edit_tab
43
- from App_Function_Libraries.Gradio_UI.Media_wiki_tab import create_mediawiki_import_tab, create_mediawiki_config_tab
44
- from App_Function_Libraries.Gradio_UI.PDF_ingestion_tab import create_pdf_ingestion_tab, create_pdf_ingestion_test_tab
45
- from App_Function_Libraries.Gradio_UI.Plaintext_tab_import import create_plain_text_import_tab
46
- from App_Function_Libraries.Gradio_UI.Podcast_tab import create_podcast_tab
47
- from App_Function_Libraries.Gradio_UI.Prompt_Suggestion_tab import create_prompt_suggestion_tab
48
- from App_Function_Libraries.Gradio_UI.RAG_QA_Chat_tab import create_rag_qa_chat_tab
49
- from App_Function_Libraries.Gradio_UI.Re_summarize_tab import create_resummary_tab
50
- from App_Function_Libraries.Gradio_UI.Search_Tab import create_prompt_search_tab, \
51
- create_search_summaries_tab, create_search_tab
52
- from App_Function_Libraries.Gradio_UI.RAG_Chat_tab import create_rag_tab
53
- from App_Function_Libraries.Gradio_UI.Embeddings_tab import create_embeddings_tab, create_view_embeddings_tab, \
54
- create_purge_embeddings_tab
55
- from App_Function_Libraries.Gradio_UI.Trash import create_view_trash_tab, create_empty_trash_tab, \
56
- create_delete_trash_tab, create_search_and_mark_trash_tab
57
- from App_Function_Libraries.Gradio_UI.Utilities import create_utilities_yt_timestamp_tab, create_utilities_yt_audio_tab, \
58
- create_utilities_yt_video_tab
59
- from App_Function_Libraries.Gradio_UI.Video_transcription_tab import create_video_transcription_tab
60
- from App_Function_Libraries.Gradio_UI.View_tab import create_manage_items_tab
61
- from App_Function_Libraries.Gradio_UI.Website_scraping_tab import create_website_scraping_tab
62
- from App_Function_Libraries.Gradio_UI.Chat_Workflows import chat_workflows_tab
63
- from App_Function_Libraries.Gradio_UI.View_DB_Items_tab import create_prompt_view_tab, \
64
- create_view_all_with_versions_tab, create_viewing_tab
65
- #
66
- # Gradio UI Imports
67
- from App_Function_Libraries.Gradio_UI.Evaluations_Benchmarks_tab import create_geval_tab, create_infinite_bench_tab
68
- #from App_Function_Libraries.Local_LLM.Local_LLM_huggingface import create_huggingface_tab
69
- from App_Function_Libraries.Gradio_UI.RAG_QA_Chat_Notes import create_rag_qa_chat_notes_tab
70
-
71
- #
72
- #######################################################################################################################
73
- # Function Definitions
74
- #
75
-
76
-
77
- # Disable Gradio Analytics
78
- os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
79
-
80
-
81
- custom_prompt_input = None
82
- server_mode = False
83
- share_public = False
84
- custom_prompt_summarize_bulleted_notes = ("""
85
- <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
86
- **Bulleted Note Creation Guidelines**
87
-
88
- **Headings**:
89
- - Based on referenced topics, not categories like quotes or terms
90
- - Surrounded by **bold** formatting
91
- - Not listed as bullet points
92
- - No space between headings and list items underneath
93
-
94
- **Emphasis**:
95
- - **Important terms** set in bold font
96
- - **Text ending in a colon**: also bolded
97
-
98
- **Review**:
99
- - Ensure adherence to specified format
100
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
101
- """)
102
- #
103
- # End of globals
104
- #######################################################################################################################
105
- #
106
- # Start of Video/Audio Transcription and Summarization Functions
107
- #
108
- # Functions:
109
- # FIXME
110
- #
111
- #
112
- ################################################################################################################
113
- # Functions for Re-Summarization
114
- #
115
- # Functions:
116
- # FIXME
117
- # End of Re-Summarization Functions
118
- #
119
- ############################################################################################################################################################################################################################
120
- #
121
- # Explain/Summarize This Tab
122
- #
123
- # Functions:
124
- # FIXME
125
- #
126
- #
127
- ############################################################################################################################################################################################################################
128
- #
129
- # Transcript Comparison Tab
130
- #
131
- # Functions:
132
- # FIXME
133
- #
134
- #
135
- ###########################################################################################################################################################################################################################
136
- #
137
- # Search Tab
138
- #
139
- # Functions:
140
- # FIXME
141
- #
142
- # End of Search Tab Functions
143
- #
144
- ##############################################################################################################################################################################################################################
145
- #
146
- # Llamafile Tab
147
- #
148
- # Functions:
149
- # FIXME
150
- #
151
- # End of Llamafile Tab Functions
152
- ##############################################################################################################################################################################################################################
153
- #
154
- # Chat Interface Tab Functions
155
- #
156
- # Functions:
157
- # FIXME
158
- #
159
- #
160
- # End of Chat Interface Tab Functions
161
- ################################################################################################################################################################################################################################
162
- #
163
- # Media Edit Tab Functions
164
- # Functions:
165
- # Fixme
166
- # create_media_edit_tab():
167
- ##### Trash Tab
168
- # FIXME
169
- # Functions:
170
- #
171
- # End of Media Edit Tab Functions
172
- ################################################################################################################
173
- #
174
- # Import Items Tab Functions
175
- #
176
- # Functions:
177
- #FIXME
178
- # End of Import Items Tab Functions
179
- ################################################################################################################
180
- #
181
- # Export Items Tab Functions
182
- #
183
- # Functions:
184
- # FIXME
185
- #
186
- #
187
- # End of Export Items Tab Functions
188
- ################################################################################################################
189
- #
190
- # Keyword Management Tab Functions
191
- #
192
- # Functions:
193
- # create_view_keywords_tab():
194
- # FIXME
195
- #
196
- # End of Keyword Management Tab Functions
197
- ################################################################################################################
198
- #
199
- # Document Editing Tab Functions
200
- #
201
- # Functions:
202
- # #FIXME
203
- #
204
- #
205
- ################################################################################################################
206
- #
207
- # Utilities Tab Functions
208
- # Functions:
209
- # create_utilities_yt_video_tab():
210
- # #FIXME
211
-
212
- #
213
- # End of Utilities Tab Functions
214
- ################################################################################################################
215
-
216
- # FIXME - Prompt sample box
217
- #
218
- # # Sample data
219
- # prompts_category_1 = [
220
- # "What are the key points discussed in the video?",
221
- # "Summarize the main arguments made by the speaker.",
222
- # "Describe the conclusions of the study presented."
223
- # ]
224
- #
225
- # prompts_category_2 = [
226
- # "How does the proposed solution address the problem?",
227
- # "What are the implications of the findings?",
228
- # "Can you explain the theory behind the observed phenomenon?"
229
- # ]
230
- #
231
- # all_prompts2 = prompts_category_1 + prompts_category_2
232
-
233
-
234
- def launch_ui(share_public=None, server_mode=False):
235
- webbrowser.open_new_tab('http://127.0.0.1:7860/?__theme=dark')
236
- share=share_public
237
- css = """
238
- .result-box {
239
- margin-bottom: 20px;
240
- border: 1px solid #ddd;
241
- padding: 10px;
242
- }
243
- .result-box.error {
244
- border-color: #ff0000;
245
- background-color: #ffeeee;
246
- }
247
- .transcription, .summary {
248
- max-height: 800px;
249
- overflow-y: auto;
250
- border: 1px solid #eee;
251
- padding: 10px;
252
- margin-top: 10px;
253
- }
254
- """
255
-
256
- with gr.Blocks(theme='bethecloud/storj_theme',css=css) as iface:
257
- gr.HTML(
258
- """
259
- <script>
260
- document.addEventListener('DOMContentLoaded', (event) => {
261
- document.body.classList.add('dark');
262
- document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)';
263
- });
264
- </script>
265
- """
266
- )
267
- db_config = get_db_config()
268
- db_type = db_config['type']
269
- gr.Markdown(f"# tl/dw: Your LLM-powered Research Multi-tool")
270
- gr.Markdown(f"(Using {db_type.capitalize()} Database)")
271
- with gr.Tabs():
272
- with gr.TabItem("Transcription / Summarization / Ingestion", id="ingestion-grouping", visible=True):
273
- with gr.Tabs():
274
- create_video_transcription_tab()
275
- create_audio_processing_tab()
276
- create_podcast_tab()
277
- create_import_book_tab()
278
- create_plain_text_import_tab()
279
- create_website_scraping_tab()
280
- create_pdf_ingestion_tab()
281
- create_pdf_ingestion_test_tab()
282
- create_resummary_tab()
283
- create_summarize_explain_tab()
284
- create_live_recording_tab()
285
- create_arxiv_tab()
286
-
287
- with gr.TabItem("Text Search", id="text search", visible=True):
288
- create_search_tab()
289
- create_search_summaries_tab()
290
-
291
- with gr.TabItem("RAG Chat+Notes", id="RAG Chat Notes group", visible=True):
292
- create_rag_qa_chat_notes_tab()
293
-
294
- with gr.TabItem("RAG Search", id="RAG Search group", visible=True):
295
- create_rag_tab()
296
- create_rag_qa_chat_tab()
297
-
298
- with gr.TabItem("Chat with an LLM", id="LLM Chat group", visible=True):
299
- create_chat_interface()
300
- create_chat_interface_stacked()
301
- create_chat_interface_multi_api()
302
- create_chat_interface_four()
303
- create_chat_with_llamafile_tab()
304
- create_chat_management_tab()
305
- chat_workflows_tab()
306
-
307
-
308
- with gr.TabItem("Character Chat", id="character chat group", visible=True):
309
- create_character_card_interaction_tab()
310
- create_character_chat_mgmt_tab()
311
- create_custom_character_card_tab()
312
- create_character_card_validation_tab()
313
- create_multiple_character_chat_tab()
314
- create_narrator_controlled_conversation_tab()
315
- create_export_characters_tab()
316
-
317
-
318
- with gr.TabItem("View DB Items", id="view db items group", visible=True):
319
- # This one works
320
- create_view_all_with_versions_tab()
321
- # This one is WIP
322
- create_viewing_tab()
323
- create_prompt_view_tab()
324
-
325
-
326
- with gr.TabItem("Prompts", id='view prompts group', visible=True):
327
- create_prompt_view_tab()
328
- create_prompt_search_tab()
329
- create_prompt_edit_tab()
330
- create_prompt_clone_tab()
331
- create_prompt_suggestion_tab()
332
-
333
-
334
- with gr.TabItem("Manage / Edit Existing Items", id="manage group", visible=True):
335
- create_media_edit_tab()
336
- create_manage_items_tab()
337
- create_media_edit_and_clone_tab()
338
- # FIXME
339
- #create_compare_transcripts_tab()
340
-
341
-
342
- with gr.TabItem("Embeddings Management", id="embeddings group", visible=True):
343
- create_embeddings_tab()
344
- create_view_embeddings_tab()
345
- create_purge_embeddings_tab()
346
-
347
- with gr.TabItem("Writing Tools", id="writing_tools group", visible=True):
348
- from App_Function_Libraries.Gradio_UI.Writing_tab import create_document_feedback_tab
349
- create_document_feedback_tab()
350
- from App_Function_Libraries.Gradio_UI.Writing_tab import create_grammar_style_check_tab
351
- create_grammar_style_check_tab()
352
- from App_Function_Libraries.Gradio_UI.Writing_tab import create_tone_adjustment_tab
353
- create_tone_adjustment_tab()
354
- from App_Function_Libraries.Gradio_UI.Writing_tab import create_creative_writing_tab
355
- create_creative_writing_tab()
356
- from App_Function_Libraries.Gradio_UI.Writing_tab import create_mikupad_tab
357
- create_mikupad_tab()
358
-
359
-
360
- with gr.TabItem("Keywords", id="keywords group", visible=True):
361
- create_view_keywords_tab()
362
- create_add_keyword_tab()
363
- create_delete_keyword_tab()
364
- create_export_keywords_tab()
365
-
366
- with gr.TabItem("Import", id="import group", visible=True):
367
- create_import_item_tab()
368
- create_import_obsidian_vault_tab()
369
- create_import_single_prompt_tab()
370
- create_import_multiple_prompts_tab()
371
- create_mediawiki_import_tab()
372
- create_mediawiki_config_tab()
373
-
374
- with gr.TabItem("Export", id="export group", visible=True):
375
- create_export_tab()
376
-
377
- with gr.TabItem("Backup Management", id="backup group", visible=True):
378
- create_backup_tab()
379
- create_view_backups_tab()
380
- create_restore_backup_tab()
381
-
382
- with gr.TabItem("Utilities", id="util group", visible=True):
383
- create_utilities_yt_video_tab()
384
- create_utilities_yt_audio_tab()
385
- create_utilities_yt_timestamp_tab()
386
-
387
- with gr.TabItem("Local LLM", id="local llm group", visible=True):
388
- create_chat_with_llamafile_tab()
389
- #create_ollama_tab()
390
- #create_huggingface_tab()
391
-
392
- with gr.TabItem("Trashcan", id="trashcan group", visible=True):
393
- create_search_and_mark_trash_tab()
394
- create_view_trash_tab()
395
- create_delete_trash_tab()
396
- create_empty_trash_tab()
397
-
398
- with gr.TabItem("Evaluations", id="eval", visible=True):
399
- create_geval_tab()
400
- create_infinite_bench_tab()
401
- # FIXME
402
- #create_mmlu_pro_tab()
403
-
404
- with gr.TabItem("Introduction/Help", id="introduction group", visible=True):
405
- create_introduction_tab()
406
-
407
- with gr.TabItem("Config Editor", id="config group"):
408
- create_config_editor_tab()
409
-
410
- # Launch the interface
411
- server_port_variable = 7860
412
- os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
413
- if share==True:
414
- iface.launch(share=True)
415
- elif server_mode and not share_public:
416
- iface.launch(share=False, server_name="0.0.0.0", server_port=server_port_variable, )
417
- else:
418
- try:
419
- iface.launch(share=False, server_name="0.0.0.0", server_port=server_port_variable, )
420
- except Exception as e:
421
- logging.error(f"Error launching interface: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Arxiv_tab.py DELETED
@@ -1,230 +0,0 @@
1
- # Arxiv_tab.py
2
- # Description: This file contains the Gradio UI for searching, browsing, and ingesting arXiv papers.
3
- #
4
- # Imports
5
- import tempfile
6
- from datetime import datetime
7
- import requests
8
-
9
- from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_text_and_format_from_pdf
10
- #
11
- # Local Imports
12
- from App_Function_Libraries.Third_Party.Arxiv import convert_xml_to_markdown, fetch_arxiv_xml, parse_arxiv_feed, \
13
- build_query_url, ARXIV_PAGE_SIZE, fetch_arxiv_pdf_url
14
- from App_Function_Libraries.DB.DB_Manager import add_media_with_keywords
15
- #
16
- import gradio as gr
17
- #
18
- #####################################################################################################
19
- #
20
- # Functions:
21
-
22
- def create_arxiv_tab():
23
- with gr.TabItem("Arxiv Search & Ingest", visible=True):
24
- gr.Markdown("# arXiv Search, Browse, Download, and Ingest")
25
- gr.Markdown("#### Thank you to arXiv for use of its open access interoperability.")
26
- with gr.Row():
27
- with gr.Column(scale=1):
28
- # Search Inputs
29
- with gr.Row():
30
- with gr.Column():
31
- search_query = gr.Textbox(label="Search Query", placeholder="e.g., machine learning")
32
- author_filter = gr.Textbox(label="Author", placeholder="e.g., John Doe")
33
- year_filter = gr.Number(label="Year", precision=0)
34
- search_button = gr.Button("Search")
35
-
36
- with gr.Column(scale=2):
37
- # Pagination Controls
38
- paper_selector = gr.Radio(label="Select a Paper", choices=[], interactive=True)
39
- prev_button = gr.Button("Previous Page")
40
- next_button = gr.Button("Next Page")
41
- page_info = gr.Textbox(label="Page", value="1", interactive=False)
42
-
43
- # Ingestion Section
44
- with gr.Row():
45
- with gr.Column():
46
- # Paper Details View
47
- paper_view = gr.Markdown(label="Paper Details")
48
- arxiv_keywords = gr.Textbox(label="Additional Keywords (comma-separated)",
49
- placeholder="e.g., AI, Deep Learning")
50
- ingest_button = gr.Button("Ingest Selected Paper")
51
- ingest_result = gr.Textbox(label="Ingestion Result", interactive=False)
52
-
53
- # Define States for Pagination and Selection
54
- state = gr.State(value={"start": 0, "current_page": 1, "last_query": None, "entries": []})
55
- selected_paper_id = gr.State(value=None)
56
-
57
- def search_arxiv(query, author, year):
58
- start = 0
59
- url = build_query_url(query, author, year, start)
60
- try:
61
- response = requests.get(url)
62
- response.raise_for_status()
63
- except requests.exceptions.RequestException as e:
64
- return gr.update(value=[]), gr.update(value=f"**Error:** {str(e)}"), state.value
65
-
66
- entries = parse_arxiv_feed(response.text)
67
- state.value = {"start": start, "current_page": 1, "last_query": (query, author, year), "entries": entries}
68
- if not entries:
69
- return gr.update(value=[]), "No results found.", state.value
70
-
71
- # Update the dropdown with paper titles for selection
72
- titles = [entry['title'] for entry in entries]
73
- return gr.update(choices=titles), "1", state.value
74
-
75
- # Dead code? FIXME
76
- def handle_pagination(direction):
77
- current_state = state.value
78
- query, author, year = current_state["last_query"]
79
- new_page = current_state["current_page"] + direction
80
- if new_page < 1:
81
- new_page = 1
82
- start = (new_page - 1) * ARXIV_PAGE_SIZE
83
- url = build_query_url(query, author, year, start)
84
- try:
85
- response = requests.get(url)
86
- response.raise_for_status()
87
- except requests.exceptions.RequestException as e:
88
- return gr.update(), gr.update()
89
-
90
- entries = parse_arxiv_feed(response.text)
91
- if entries:
92
- current_state["start"] = start
93
- current_state["current_page"] = new_page
94
- current_state["entries"] = entries
95
- state.value = current_state
96
-
97
- # Update the dropdown with paper titles for the new page
98
- titles = [entry['title'] for entry in entries]
99
- return gr.update(choices=titles), str(new_page)
100
- else:
101
- # If no entries, do not change the page
102
- return gr.update(), gr.update()
103
-
104
- def load_selected_paper(selected_title):
105
- if not selected_title:
106
- return "Please select a paper to view."
107
-
108
- # Find the selected paper from state
109
- for entry in state.value["entries"]:
110
- if entry['title'] == selected_title:
111
- paper_id = entry['id']
112
- break
113
- else:
114
- return "Paper not found."
115
-
116
- try:
117
- # Fetch the PDF URL and download the full-text
118
- pdf_url = fetch_arxiv_pdf_url(paper_id)
119
- response = requests.get(pdf_url)
120
- response.raise_for_status()
121
-
122
- # Save the PDF temporarily
123
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
124
- temp_pdf.write(response.content)
125
- temp_pdf_path = temp_pdf.name
126
-
127
- # Convert PDF to markdown using your PDF ingestion function
128
- full_text_markdown = extract_text_and_format_from_pdf(temp_pdf_path)
129
-
130
- selected_paper_id.value = paper_id
131
- return full_text_markdown
132
- except Exception as e:
133
- return f"Error loading full paper: {str(e)}"
134
-
135
- def process_and_ingest_arxiv_paper(paper_id, additional_keywords):
136
- try:
137
- if not paper_id:
138
- return "Please select a paper to ingest."
139
-
140
- # Fetch the PDF URL
141
- pdf_url = fetch_arxiv_pdf_url(paper_id)
142
-
143
- # Download the PDF
144
- response = requests.get(pdf_url)
145
- response.raise_for_status()
146
-
147
- # Save the PDF temporarily
148
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
149
- temp_pdf.write(response.content)
150
- temp_pdf_path = temp_pdf.name
151
-
152
- # Convert PDF to markdown using your PDF ingestion function
153
- markdown_text = extract_text_and_format_from_pdf(temp_pdf_path)
154
-
155
- # Fetch metadata from arXiv to get title, authors, and categories
156
- xml_content = fetch_arxiv_xml(paper_id)
157
- _, title, authors, categories = convert_xml_to_markdown(xml_content)
158
-
159
- # Prepare the arXiv paper URL for access/download
160
- paper_url = f"https://arxiv.org/abs/{paper_id}"
161
-
162
- # Prepare the keywords for ingestion
163
- keywords = f"arxiv,{','.join(categories)}"
164
- if additional_keywords:
165
- keywords += f",{additional_keywords}"
166
-
167
- # Ingest full paper markdown content
168
- add_media_with_keywords(
169
- url=paper_url,
170
- title=title,
171
- media_type='document',
172
- content=markdown_text, # Full paper content in markdown
173
- keywords=keywords,
174
- prompt='No prompt for arXiv papers',
175
- summary='Full arXiv paper ingested from PDF',
176
- transcription_model='None',
177
- author=', '.join(authors),
178
- ingestion_date=datetime.now().strftime('%Y-%m-%d')
179
- )
180
-
181
- # Return success message with paper title and authors
182
- return f"arXiv paper '{title}' by {', '.join(authors)} ingested successfully."
183
- except Exception as e:
184
- # Return error message if anything goes wrong
185
- return f"Error processing arXiv paper: {str(e)}"
186
-
187
- # Event Handlers
188
- # Connect Search Button
189
- search_button.click(
190
- fn=search_arxiv,
191
- inputs=[search_query, author_filter, year_filter],
192
- outputs=[paper_selector, page_info, state],
193
- queue=True
194
- )
195
-
196
- # Connect Next Button
197
- next_button.click(
198
- fn=lambda: handle_pagination(1),
199
- inputs=None,
200
- outputs=[paper_selector, page_info],
201
- queue=True
202
- )
203
-
204
- # Connect Previous Button
205
- prev_button.click(
206
- fn=lambda: handle_pagination(-1),
207
- inputs=None,
208
- outputs=[paper_selector, page_info],
209
- queue=True
210
- )
211
-
212
- # When the user selects a paper in the Dropdown
213
- paper_selector.change(
214
- fn=load_selected_paper,
215
- inputs=paper_selector,
216
- outputs=paper_view,
217
- queue=True
218
- )
219
-
220
- # Connect Ingest Button
221
- ingest_button.click(
222
- fn=process_and_ingest_arxiv_paper,
223
- inputs=[selected_paper_id, arxiv_keywords],
224
- outputs=ingest_result,
225
- queue=True
226
- )
227
-
228
- #
229
- # End of File
230
- #####################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py DELETED
@@ -1,167 +0,0 @@
1
- # Audio_ingestion_tab.py
2
- # Description: Gradio UI for ingesting audio files into the database
3
- #
4
- # Imports
5
- #
6
- # External Imports
7
- import gradio as gr
8
- #
9
- # Local Imports
10
- from App_Function_Libraries.Audio.Audio_Files import process_audio_files
11
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
12
- from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
13
- from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models
14
- from App_Function_Libraries.Utils.Utils import cleanup_temp_files
15
- # Import metrics logging
16
- from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
17
- from App_Function_Libraries.Metrics.logger_config import logger
18
- #
19
- #######################################################################################################################
20
- # Functions:
21
-
22
- def create_audio_processing_tab():
23
- with gr.TabItem("Audio File Transcription + Summarization", visible=True):
24
- gr.Markdown("# Transcribe & Summarize Audio Files from URLs or Local Files!")
25
- with gr.Row():
26
- with gr.Column():
27
- audio_url_input = gr.Textbox(label="Audio File URL(s)", placeholder="Enter the URL(s) of the audio file(s), one per line")
28
- audio_file_input = gr.File(label="Upload Audio File", file_types=["audio/*"])
29
- custom_title_input = gr.Textbox(label="Custom Title/Name", placeholder="Enter a custom title or name for the audio file")
30
- use_cookies_input = gr.Checkbox(label="Use cookies for authenticated download", value=False)
31
- cookies_input = gr.Textbox(
32
- label="Audio Download Cookies",
33
- placeholder="Paste your cookies here (JSON format)",
34
- lines=3,
35
- visible=False
36
- )
37
-
38
- use_cookies_input.change(
39
- fn=lambda x: gr.update(visible=x),
40
- inputs=[use_cookies_input],
41
- outputs=[cookies_input]
42
- )
43
-
44
- diarize_input = gr.Checkbox(label="Enable Speaker Diarization", value=False)
45
- whisper_model_input = gr.Dropdown(choices=whisper_models, value="medium", label="Whisper Model")
46
- keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
47
-
48
- with gr.Row():
49
- custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
50
- value=False,
51
- visible=True)
52
- preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
53
- value=False,
54
- visible=True)
55
- with gr.Row():
56
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
57
- choices=load_preset_prompts(),
58
- visible=False)
59
- with gr.Row():
60
- custom_prompt_input = gr.Textbox(label="Custom Prompt",
61
- placeholder="Enter custom prompt here",
62
- lines=3,
63
- visible=False)
64
- with gr.Row():
65
- system_prompt_input = gr.Textbox(label="System Prompt",
66
- value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
67
- **Bulleted Note Creation Guidelines**
68
-
69
- **Headings**:
70
- - Based on referenced topics, not categories like quotes or terms
71
- - Surrounded by **bold** formatting
72
- - Not listed as bullet points
73
- - No space between headings and list items underneath
74
-
75
- **Emphasis**:
76
- - **Important terms** set in bold font
77
- - **Text ending in a colon**: also bolded
78
-
79
- **Review**:
80
- - Ensure adherence to specified format
81
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
82
- """,
83
- lines=3,
84
- visible=False)
85
-
86
- custom_prompt_checkbox.change(
87
- fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
88
- inputs=[custom_prompt_checkbox],
89
- outputs=[custom_prompt_input, system_prompt_input]
90
- )
91
- preset_prompt_checkbox.change(
92
- fn=lambda x: gr.update(visible=x),
93
- inputs=[preset_prompt_checkbox],
94
- outputs=[preset_prompt]
95
- )
96
-
97
- def update_prompts(preset_name):
98
- prompts = update_user_prompt(preset_name)
99
- return (
100
- gr.update(value=prompts["user_prompt"], visible=True),
101
- gr.update(value=prompts["system_prompt"], visible=True)
102
- )
103
-
104
- preset_prompt.change(
105
- update_prompts,
106
- inputs=preset_prompt,
107
- outputs=[custom_prompt_input, system_prompt_input]
108
- )
109
-
110
- api_name_input = gr.Dropdown(
111
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
112
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace", "Custom-OpenAI-API"],
113
- value=None,
114
- label="API for Summarization (Optional)"
115
- )
116
- api_key_input = gr.Textbox(label="API Key (if required)", placeholder="Enter your API key here", type="password")
117
- custom_keywords_input = gr.Textbox(label="Custom Keywords", placeholder="Enter custom keywords, comma-separated")
118
- keep_original_input = gr.Checkbox(label="Keep original audio file", value=False)
119
-
120
- chunking_options_checkbox = gr.Checkbox(label="Show Chunking Options", value=False)
121
- with gr.Row(visible=False) as chunking_options_box:
122
- gr.Markdown("### Chunking Options")
123
- with gr.Column():
124
- chunk_method = gr.Dropdown(choices=['words', 'sentences', 'paragraphs', 'tokens'], label="Chunking Method")
125
- max_chunk_size = gr.Slider(minimum=100, maximum=1000, value=300, step=50, label="Max Chunk Size")
126
- chunk_overlap = gr.Slider(minimum=0, maximum=100, value=0, step=10, label="Chunk Overlap")
127
- use_adaptive_chunking = gr.Checkbox(label="Use Adaptive Chunking")
128
- use_multi_level_chunking = gr.Checkbox(label="Use Multi-level Chunking")
129
- chunk_language = gr.Dropdown(choices=['english', 'french', 'german', 'spanish'], label="Chunking Language")
130
-
131
- chunking_options_checkbox.change(
132
- fn=lambda x: gr.update(visible=x),
133
- inputs=[chunking_options_checkbox],
134
- outputs=[chunking_options_box]
135
- )
136
-
137
- process_audio_button = gr.Button("Process Audio File(s)")
138
-
139
- with gr.Column():
140
- audio_progress_output = gr.Textbox(label="Progress")
141
- audio_transcription_output = gr.Textbox(label="Transcription")
142
- audio_summary_output = gr.Textbox(label="Summary")
143
- download_transcription = gr.File(label="Download All Transcriptions as JSON")
144
- download_summary = gr.File(label="Download All Summaries as Text")
145
-
146
- process_audio_button.click(
147
- fn=process_audio_files,
148
- inputs=[audio_url_input, audio_file_input, whisper_model_input, api_name_input, api_key_input,
149
- use_cookies_input, cookies_input, keep_original_input, custom_keywords_input, custom_prompt_input,
150
- chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking, use_multi_level_chunking,
151
- chunk_language, diarize_input, keep_timestamps_input, custom_title_input],
152
- outputs=[audio_progress_output, audio_transcription_output, audio_summary_output]
153
- )
154
-
155
- def on_file_clear(file):
156
- if file is None:
157
- cleanup_temp_files()
158
-
159
- audio_file_input.clear(
160
- fn=on_file_clear,
161
- inputs=[audio_file_input],
162
- outputs=[]
163
- )
164
-
165
- #
166
- # End of Audio_ingestion_tab.py
167
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Backup_Functionality.py DELETED
@@ -1,71 +0,0 @@
1
- # Backup_Functionality.py
2
- # Functionality for exporting items as markdown files
3
- #
4
- # Imports:
5
- import os
6
- import shutil
7
- import gradio as gr
8
- #
9
- # Local Imports:
10
- from App_Function_Libraries.DB.DB_Manager import create_automated_backup, db_path, backup_dir
11
- #
12
- # End of Imports
13
- #######################################################################################################################
14
- #
15
- # Functions:
16
-
17
- def create_backup():
18
- backup_file = create_automated_backup(db_path, backup_dir)
19
- return f"Backup created: {backup_file}"
20
-
21
-
22
- def list_backups():
23
- backups = [f for f in os.listdir(backup_dir) if f.endswith('.db')]
24
- return "\n".join(backups)
25
-
26
-
27
- def restore_backup(backup_name: str) -> str:
28
- backup_path_location: str = os.path.join(str(backup_dir), backup_name)
29
- if os.path.exists(backup_path_location):
30
- shutil.copy2(str(backup_path_location), str(db_path))
31
- return f"Database restored from {backup_name}"
32
- else:
33
- return "Backup file not found"
34
-
35
-
36
- def create_backup_tab():
37
- with gr.Tab("Create Backup", visible=True):
38
- gr.Markdown("# Create a backup of the database")
39
- gr.Markdown("This will create a backup of the database in the backup directory(the default backup directory is `/tldw_DB_Backups/')")
40
- with gr.Row():
41
- with gr.Column():
42
- create_button = gr.Button("Create Backup")
43
- create_output = gr.Textbox(label="Result")
44
- with gr.Column():
45
- create_button.click(create_backup, inputs=[], outputs=create_output)
46
-
47
-
48
- def create_view_backups_tab():
49
- with gr.TabItem("View Backups", visible=True):
50
- gr.Markdown("# Browse available backups")
51
- with gr.Row():
52
- with gr.Column():
53
- view_button = gr.Button("View Backups")
54
- with gr.Column():
55
- backup_list = gr.Textbox(label="Available Backups")
56
- view_button.click(list_backups, inputs=[], outputs=backup_list)
57
-
58
-
59
- def create_restore_backup_tab():
60
- with gr.TabItem("Restore Backup", visible=True):
61
- gr.Markdown("# Restore a backup of the database")
62
- with gr.Column():
63
- backup_input = gr.Textbox(label="Backup Filename")
64
- restore_button = gr.Button("Restore")
65
- with gr.Column():
66
- restore_output = gr.Textbox(label="Result")
67
- restore_button.click(restore_backup, inputs=[backup_input], outputs=restore_output)
68
-
69
- #
70
- # End of Functions
71
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Book_Ingestion_tab.py DELETED
@@ -1,100 +0,0 @@
1
- # Book_Ingestion_tab.py
2
- # Functionality to import epubs/ebooks into the system.
3
- ####################
4
- # Function List
5
- #
6
- # 1. create_import_book_tab()
7
- # 2. import_epub(epub_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
8
- #
9
- ####################
10
- # Imports
11
- #
12
- # External Imports
13
- import gradio as gr
14
- #
15
- # Local Imports
16
- from App_Function_Libraries.Books.Book_Ingestion_Lib import process_zip_file, import_epub, import_file_handler
17
- #
18
- ########################################################################################################################
19
- #
20
- # Functions:
21
-
22
-
23
-
24
- def create_import_book_tab():
25
- with gr.TabItem("Ebook(epub) Files", visible=True):
26
- with gr.Row():
27
- with gr.Column():
28
- gr.Markdown("# Import .epub files")
29
- gr.Markdown("Upload a single .epub file or a .zip file containing multiple .epub files")
30
- gr.Markdown(
31
- "🔗 **How to remove DRM from your ebooks:** [Reddit Guide](https://www.reddit.com/r/Calibre/comments/1ck4w8e/2024_guide_on_removing_drm_from_kobo_kindle_ebooks/)")
32
- import_file = gr.File(label="Upload file for import", file_types=[".epub", ".zip"])
33
- title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content (for single files)")
34
- author_input = gr.Textbox(label="Author", placeholder="Enter the author's name (for single files)")
35
- keywords_input = gr.Textbox(label="Keywords (like genre or publish year)",
36
- placeholder="Enter keywords, comma-separated")
37
- system_prompt_input = gr.Textbox(label="System Prompt", lines=3,
38
- value=""""
39
- <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
40
- **Bulleted Note Creation Guidelines**
41
-
42
- **Headings**:
43
- - Based on referenced topics, not categories like quotes or terms
44
- - Surrounded by **bold** formatting
45
- - Not listed as bullet points
46
- - No space between headings and list items underneath
47
-
48
- **Emphasis**:
49
- - **Important terms** set in bold font
50
- - **Text ending in a colon**: also bolded
51
-
52
- **Review**:
53
- - Ensure adherence to specified format
54
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
55
- """, )
56
- custom_prompt_input = gr.Textbox(label="Custom User Prompt",
57
- placeholder="Enter a custom user prompt for summarization (optional)")
58
- auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
59
- api_name_input = gr.Dropdown(
60
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
61
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
62
- label="API for Auto-summarization"
63
- )
64
- api_key_input = gr.Textbox(label="API Key", type="password")
65
-
66
- # Chunking options
67
- max_chunk_size = gr.Slider(minimum=100, maximum=2000, value=500, step=50, label="Max Chunk Size")
68
- chunk_overlap = gr.Slider(minimum=0, maximum=500, value=200, step=10, label="Chunk Overlap")
69
- custom_chapter_pattern = gr.Textbox(label="Custom Chapter Pattern (optional)",
70
- placeholder="Enter a custom regex pattern for chapter detection")
71
-
72
-
73
- import_button = gr.Button("Import eBook(s)")
74
- with gr.Column():
75
- with gr.Row():
76
- import_output = gr.Textbox(label="Import Status", lines=10, interactive=False)
77
-
78
- import_button.click(
79
- fn=import_file_handler,
80
- inputs=[
81
- import_file,
82
- title_input,
83
- author_input,
84
- keywords_input,
85
- custom_prompt_input,
86
- auto_summarize_checkbox,
87
- api_name_input,
88
- api_key_input,
89
- max_chunk_size,
90
- chunk_overlap,
91
- custom_chapter_pattern
92
- ],
93
- outputs=import_output
94
- )
95
-
96
- return import_file, title_input, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
97
-
98
- #
99
- # End of File
100
- ########################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Character_Chat_tab.py DELETED
@@ -1,1848 +0,0 @@
1
- # Character_Interaction_Library.py
2
- # Description: Library for character card import functions
3
- #
4
- # Imports
5
- import os
6
- import re
7
- import tempfile
8
- import uuid
9
- from datetime import datetime
10
- import json
11
- import logging
12
- import io
13
- import base64
14
- from typing import Dict, Any, Optional, List, Tuple, Union, cast
15
- import zipfile
16
- #
17
- # External Imports
18
- from PIL import Image
19
- from PIL.PngImagePlugin import PngInfo
20
- import gradio as gr
21
- #
22
- # Local Imports
23
- from App_Function_Libraries.Character_Chat.Character_Chat_Lib import validate_character_book, validate_v2_card, \
24
- replace_placeholders, replace_user_placeholder, extract_json_from_image, parse_character_book, \
25
- load_chat_and_character, load_chat_history, load_character_and_image, extract_character_id, load_character_wrapper
26
- from App_Function_Libraries.Chat import chat
27
- from App_Function_Libraries.DB.Character_Chat_DB import (
28
- add_character_card,
29
- get_character_cards,
30
- get_character_card_by_id,
31
- add_character_chat,
32
- get_character_chats,
33
- get_character_chat_by_id,
34
- update_character_chat,
35
- delete_character_chat,
36
- delete_character_card,
37
- update_character_card, search_character_chats,
38
- )
39
- from App_Function_Libraries.Utils.Utils import sanitize_user_input
40
- #
41
- ############################################################################################################
42
- #
43
- # Functions:
44
-
45
- #################################################################################
46
- #
47
- # Character card import functions:
48
-
49
- def import_character_card(file):
50
- if file is None:
51
- return None, gr.update(), "No file provided for character card import"
52
-
53
- try:
54
- if file.name.lower().endswith(('.png', '.webp')):
55
- json_data = extract_json_from_image(file)
56
- if not json_data:
57
- return None, gr.update(), "No character card data found in the image. This might not be a valid character card image."
58
- elif file.name.lower().endswith('.json'):
59
- with open(file.name, 'r', encoding='utf-8') as f:
60
- json_data = f.read()
61
- else:
62
- return None, gr.update(), "Unsupported file type. Please upload a PNG/WebP image or a JSON file."
63
-
64
- card_data = import_character_card_json(json_data)
65
- if not card_data:
66
- return None, gr.update(), "Failed to parse character card data. The file might not contain valid character information."
67
-
68
- # Save image data for PNG/WebP files
69
- if file.name.lower().endswith(('.png', '.webp')):
70
- with Image.open(file) as img:
71
- img_byte_arr = io.BytesIO()
72
- img.save(img_byte_arr, format='PNG')
73
- card_data['image'] = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
74
-
75
- # Save character card to database
76
- character_id = add_character_card(card_data)
77
- if character_id:
78
- characters = get_character_cards()
79
- character_names = [char['name'] for char in characters]
80
- return card_data, gr.update(
81
- choices=character_names), f"Character card '{card_data['name']}' imported successfully."
82
- else:
83
- return None, gr.update(), f"Failed to save character card '{card_data.get('name', 'Unknown')}'. It may already exist."
84
- except Exception as e:
85
- logging.error(f"Error importing character card: {e}")
86
- return None, gr.update(), f"Error importing character card: {e}"
87
-
88
-
89
- def import_character_card_json(json_content: str) -> Optional[Dict[str, Any]]:
90
- try:
91
- json_content = json_content.strip()
92
- card_data = json.loads(json_content)
93
-
94
- if 'spec' in card_data and card_data['spec'] == 'chara_card_v2':
95
- logging.info("Detected V2 character card")
96
- return parse_v2_card(card_data)
97
- else:
98
- logging.info("Assuming V1 character card")
99
- return parse_v1_card(card_data)
100
- except json.JSONDecodeError as e:
101
- logging.error(f"JSON decode error: {e}")
102
- except Exception as e:
103
- logging.error(f"Unexpected error parsing JSON: {e}")
104
- return None
105
-
106
-
107
-
108
- def parse_v2_card(card_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
109
- try:
110
- # Validate spec_version
111
- if card_data.get('spec_version') != '2.0':
112
- logging.warning(f"Unsupported V2 spec version: {card_data.get('spec_version')}")
113
- return None
114
-
115
- data = card_data['data']
116
-
117
- # Ensure all required fields are present
118
- required_fields = ['name', 'description', 'personality', 'scenario', 'first_mes', 'mes_example']
119
- for field in required_fields:
120
- if field not in data:
121
- logging.error(f"Missing required field in V2 card: {field}")
122
- return None
123
-
124
- # Handle new V2 fields
125
- parsed_data = {
126
- 'name': data['name'],
127
- 'description': data['description'],
128
- 'personality': data['personality'],
129
- 'scenario': data['scenario'],
130
- 'first_mes': data['first_mes'],
131
- 'mes_example': data['mes_example'],
132
- 'creator_notes': data.get('creator_notes', ''),
133
- 'system_prompt': data.get('system_prompt', ''),
134
- 'post_history_instructions': data.get('post_history_instructions', ''),
135
- 'alternate_greetings': data.get('alternate_greetings', []),
136
- 'tags': data.get('tags', []),
137
- 'creator': data.get('creator', ''),
138
- 'character_version': data.get('character_version', ''),
139
- 'extensions': data.get('extensions', {})
140
- }
141
-
142
- # Handle character_book if present
143
- if 'character_book' in data:
144
- parsed_data['character_book'] = parse_character_book(data['character_book'])
145
-
146
- return parsed_data
147
- except KeyError as e:
148
- logging.error(f"Missing key in V2 card structure: {e}")
149
- except Exception as e:
150
- logging.error(f"Error parsing V2 card: {e}")
151
- return None
152
-
153
- def parse_v1_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
154
- # Ensure all required V1 fields are present
155
- required_fields = ['name', 'description', 'personality', 'scenario', 'first_mes', 'mes_example']
156
- for field in required_fields:
157
- if field not in card_data:
158
- logging.error(f"Missing required field in V1 card: {field}")
159
- raise ValueError(f"Missing required field in V1 card: {field}")
160
-
161
- # Convert V1 to V2 format
162
- v2_data: Dict[str, Union[str, List[str], Dict[str, Any]]] = {
163
- 'name': card_data['name'],
164
- 'description': card_data['description'],
165
- 'personality': card_data['personality'],
166
- 'scenario': card_data['scenario'],
167
- 'first_mes': card_data['first_mes'],
168
- 'mes_example': card_data['mes_example'],
169
- 'creator_notes': cast(str, card_data.get('creator_notes', '')),
170
- 'system_prompt': cast(str, card_data.get('system_prompt', '')),
171
- 'post_history_instructions': cast(str, card_data.get('post_history_instructions', '')),
172
- 'alternate_greetings': cast(List[str], card_data.get('alternate_greetings', [])),
173
- 'tags': cast(List[str], card_data.get('tags', [])),
174
- 'creator': cast(str, card_data.get('creator', '')),
175
- 'character_version': cast(str, card_data.get('character_version', '')),
176
- 'extensions': {}
177
- }
178
-
179
- # Move any non-standard V1 fields to extensions
180
- for key, value in card_data.items():
181
- if key not in v2_data:
182
- v2_data['extensions'][key] = value
183
-
184
- return v2_data
185
-
186
- #
187
- # End of Character card import functions
188
- ####################################################
189
-
190
- ####################################################
191
- #
192
- # Character card export functions
193
-
194
- def export_character_as_json(character_id):
195
- character = get_character_card_by_id(character_id)
196
- if character:
197
- # Remove the 'id' field from the character data
198
- character_data = {k: v for k, v in character.items() if k != 'id'}
199
-
200
- # Convert image to base64 if it exists
201
- if 'image' in character_data and character_data['image']:
202
- image_data = base64.b64decode(character_data['image'])
203
- img = Image.open(io.BytesIO(image_data))
204
- buffered = io.BytesIO()
205
- img.save(buffered, format="PNG")
206
- character_data['image'] = base64.b64encode(buffered.getvalue()).decode('utf-8')
207
-
208
- json_data = json.dumps(character_data, indent=2)
209
- return json_data
210
- return None
211
-
212
- def export_all_characters_as_zip():
213
- characters = get_character_cards()
214
- with tempfile.NamedTemporaryFile(mode='wb', delete=False, suffix='.zip') as temp_zip:
215
- with zipfile.ZipFile(temp_zip, 'w') as zf:
216
- for character in characters:
217
- character_data = {k: v for k, v in character.items() if k != 'id'}
218
-
219
- # Convert image to base64 if it exists
220
- if 'image' in character_data and character_data['image']:
221
- image_data = base64.b64decode(character_data['image'])
222
- img = Image.open(io.BytesIO(image_data))
223
- buffered = io.BytesIO()
224
- img.save(buffered, format="PNG")
225
- character_data['image'] = base64.b64encode(buffered.getvalue()).decode('utf-8')
226
- json_data = json.dumps(character_data, indent=2)
227
- zf.writestr(f"{character['name']}.json", json_data)
228
- return temp_zip.name
229
-
230
- def export_single_character(character_selection):
231
- if not character_selection:
232
- return None, "No character selected."
233
-
234
- character_id = int(character_selection.split('(ID: ')[1].rstrip(')'))
235
- json_data = export_character_as_json(character_id)
236
-
237
- if json_data:
238
- with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', encoding='utf-8') as temp_file:
239
- temp_file.write(json_data)
240
- return temp_file.name, f"Character '{character_selection.split(' (ID:')[0]}' exported successfully."
241
- else:
242
- return None, f"Failed to export character '{character_selection.split(' (ID:')[0]}'."
243
-
244
- def export_all_characters():
245
- zip_path = export_all_characters_as_zip()
246
- return zip_path, "All characters exported successfully."
247
-
248
- #
249
- # End of Character card export functions
250
- ####################################################
251
-
252
- ####################################################
253
- #
254
- # Gradio tabs
255
-
256
- def create_character_card_interaction_tab():
257
- with gr.TabItem("Chat with a Character Card", visible=True):
258
- gr.Markdown("# Chat with a Character Card")
259
- with gr.Row():
260
- with gr.Column(scale=1):
261
- character_image = gr.Image(label="Character Image", type="pil")
262
- character_card_upload = gr.File(
263
- label="Upload Character Card (PNG, WEBP, JSON)",
264
- file_types=[".png", ".webp", ".json"]
265
- )
266
- import_card_button = gr.Button("Import Character Card")
267
- load_characters_button = gr.Button("Load Existing Characters")
268
- character_dropdown = gr.Dropdown(label="Select Character", choices=[])
269
- user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here")
270
- api_name_input = gr.Dropdown(
271
- choices=[
272
- "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
273
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
274
- "Custom-OpenAI-API"
275
- ],
276
- value="HuggingFace",
277
- label="API for Interaction (Mandatory)"
278
- )
279
- api_key_input = gr.Textbox(
280
- label="API Key (if not set in Config_Files/config.txt)",
281
- placeholder="Enter your API key here", type="password"
282
- )
283
- temperature_slider = gr.Slider(
284
- minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature"
285
- )
286
- import_chat_button = gr.Button("Import Chat History")
287
- chat_file_upload = gr.File(label="Upload Chat History JSON", visible=True)
288
-
289
- # Chat History Import and Search
290
- gr.Markdown("## Search and Load Existing Chats")
291
- chat_search_query = gr.Textbox(
292
- label="Search Chats",
293
- placeholder="Enter chat name or keywords to search"
294
- )
295
- chat_search_button = gr.Button("Search Chats")
296
- chat_search_dropdown = gr.Dropdown(label="Search Results", choices=[], visible=False)
297
- load_chat_button = gr.Button("Load Selected Chat", visible=False)
298
-
299
- # Checkbox to Decide Whether to Save Chats by Default
300
- auto_save_checkbox = gr.Checkbox(label="Save chats automatically", value=True)
301
- chat_media_name = gr.Textbox(label="Custom Chat Name (optional)", visible=True)
302
- save_chat_history_to_db = gr.Button("Save Chat History to Database")
303
- save_status = gr.Textbox(label="Save Status", interactive=False)
304
-
305
- with gr.Column(scale=2):
306
- chat_history = gr.Chatbot(label="Conversation", height=800)
307
- user_input = gr.Textbox(label="Your message")
308
- send_message_button = gr.Button("Send Message")
309
- answer_for_me_button = gr.Button("Answer for Me")
310
- continue_talking_button = gr.Button("Continue Talking")
311
- regenerate_button = gr.Button("Regenerate Last Message")
312
- clear_chat_button = gr.Button("Clear Chat")
313
- save_snapshot_button = gr.Button("Save Chat Snapshot")
314
- update_chat_dropdown = gr.Dropdown(label="Select Chat to Update", choices=[], visible=False)
315
- load_selected_chat_button = gr.Button("Load Selected Chat", visible=False)
316
- update_chat_button = gr.Button("Update Selected Chat", visible=False)
317
-
318
- # States
319
- character_data = gr.State(None)
320
- user_name = gr.State("")
321
- selected_chat_id = gr.State(None) # To track the selected chat for updates
322
-
323
- # Callback Functions
324
-
325
- def search_existing_chats(query):
326
- results, message = search_character_chats(query)
327
- if results:
328
- # Format search results for dropdown
329
- formatted_results = [
330
- f"{chat['conversation_name']} (ID: {chat['id']})" for chat in results
331
- ]
332
- else:
333
- formatted_results = []
334
- return formatted_results, message
335
-
336
- def load_selected_chat_from_search(selected_chat, user_name):
337
- if not selected_chat:
338
- return None, [], None, "No chat selected."
339
-
340
- try:
341
- chat_id_match = re.search(r'\(ID:\s*(\d+)\)', selected_chat)
342
- if not chat_id_match:
343
- return None, [], None, "Invalid chat selection format."
344
-
345
- chat_id = int(chat_id_match.group(1))
346
-
347
- # Use the new function to load chat and character data
348
- char_data, chat_history, img = load_chat_and_character(chat_id, user_name)
349
-
350
- if not char_data:
351
- return None, [], None, "Failed to load character data for the selected chat."
352
-
353
- return char_data, chat_history, img, f"Chat '{selected_chat}' loaded successfully."
354
- except Exception as e:
355
- logging.error(f"Error loading selected chat: {e}")
356
- return None, [], None, f"Error loading chat: {e}"
357
-
358
-
359
- def import_chat_history(file, current_history, char_data, user_name_val):
360
- """
361
- Imports chat history from a file, replacing '{{user}}' with the actual user name.
362
-
363
- Args:
364
- file (file): The uploaded chat history file.
365
- current_history (list): The current chat history.
366
- char_data (dict): The current character data.
367
- user_name_val (str): The user's name.
368
-
369
- Returns:
370
- tuple: Updated chat history, updated character data, and a status message.
371
- """
372
- loaded_history, char_name = load_chat_history(file)
373
- if loaded_history is None:
374
- return current_history, char_data, "Failed to load chat history."
375
-
376
- # Replace '{{user}}' in the loaded chat history
377
- loaded_history = replace_user_placeholder(loaded_history, user_name_val)
378
-
379
- # Check if the loaded chat is for the current character
380
- if char_data and char_data.get('name') != char_name:
381
- return current_history, char_data, (
382
- f"Warning: Loaded chat is for character '{char_name}', "
383
- f"but current character is '{char_data.get('name')}'. Chat not imported."
384
- )
385
-
386
- # If no character is selected, try to load the character from the chat
387
- if not char_data:
388
- characters = get_character_cards()
389
- character = next((char for char in characters if char['name'] == char_name), None)
390
- if character:
391
- char_data = character
392
- # Replace '{{user}}' in the first_message if necessary
393
- if character.get('first_message'):
394
- character['first_message'] = character['first_message'].replace("{{user}}",
395
- user_name_val if user_name_val else "User")
396
- else:
397
- return current_history, char_data, (
398
- f"Warning: Character '{char_name}' not found. Please select the character manually."
399
- )
400
-
401
- return loaded_history, char_data, f"Chat history for '{char_name}' imported successfully."
402
-
403
- def load_character(name):
404
- characters = get_character_cards()
405
- character = next((char for char in characters if char['name'] == name), None)
406
- if character:
407
- first_message = character.get('first_message', "Hello! I'm ready to chat.")
408
- return character, [(None, first_message)] if first_message else [], None
409
- return None, [], None
410
-
411
- def load_character_image(name):
412
- character = next((char for char in get_character_cards() if char['name'] == name), None)
413
- if character and 'image' in character and character['image']:
414
- try:
415
- # Decode the base64 image
416
- image_data = base64.b64decode(character['image'])
417
- # Load as PIL Image
418
- img = Image.open(io.BytesIO(image_data)).convert("RGBA")
419
- return img
420
- except Exception as e:
421
- logging.error(f"Error loading image for character '{name}': {e}")
422
- return None
423
- return None
424
-
425
- def character_chat_wrapper(
426
- message, history, char_data, api_endpoint, api_key,
427
- temperature, user_name_val, auto_save
428
- ):
429
- if not char_data:
430
- return history, "Please select a character first."
431
-
432
- user_name_val = user_name_val or "User"
433
- char_name = char_data.get('name', 'AI Assistant')
434
-
435
- # Prepare the character's background information
436
- char_background = f"""
437
- Name: {char_name}
438
- Description: {char_data.get('description', 'N/A')}
439
- Personality: {char_data.get('personality', 'N/A')}
440
- Scenario: {char_data.get('scenario', 'N/A')}
441
- """
442
-
443
- # Prepare the system prompt
444
- system_message = f"""You are roleplaying as {char_name}. {char_data.get('system_prompt', '')}"""
445
-
446
- # Prepare chat context
447
- media_content = {
448
- 'id': char_name,
449
- 'title': char_name,
450
- 'content': char_background,
451
- 'description': char_data.get('description', ''),
452
- 'personality': char_data.get('personality', ''),
453
- 'scenario': char_data.get('scenario', '')
454
- }
455
- selected_parts = ['description', 'personality', 'scenario']
456
-
457
- prompt = char_data.get('post_history_instructions', '')
458
-
459
- # Sanitize and format user message
460
- user_message = sanitize_user_input(message)
461
- user_message = replace_placeholders(user_message, char_name, user_name_val)
462
- full_message = f"{user_name_val}: {user_message}"
463
-
464
- # Generate bot response
465
- bot_message = chat(
466
- full_message,
467
- history,
468
- media_content,
469
- selected_parts,
470
- api_endpoint,
471
- api_key,
472
- prompt,
473
- temperature,
474
- system_message
475
- )
476
-
477
- # Replace placeholders in bot message
478
- bot_message = replace_placeholders(bot_message, char_name, user_name_val)
479
-
480
- # Update history
481
- history.append((user_message, bot_message))
482
-
483
- # Auto-save if enabled
484
- save_status = ""
485
- if auto_save:
486
- character_id = char_data.get('id')
487
- if character_id:
488
- conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
489
- add_character_chat(character_id, conversation_name, history)
490
- save_status = "Chat auto-saved."
491
- else:
492
- save_status = "Character ID not found; chat not saved."
493
-
494
- return history, save_status
495
-
496
- def save_chat_history_to_db_wrapper(
497
- chat_history, conversation_id, media_content,
498
- chat_media_name, char_data, auto_save
499
- ):
500
- if not char_data or not chat_history:
501
- return "No character or chat history available.", ""
502
-
503
- character_id = char_data.get('id')
504
- if not character_id:
505
- return "Character ID not found.", ""
506
-
507
- conversation_name = chat_media_name or f"Chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
508
- chat_id = add_character_chat(character_id, conversation_name, chat_history)
509
- if chat_id:
510
- return f"Chat saved successfully with ID {chat_id}.", ""
511
- else:
512
- return "Failed to save chat.", ""
513
-
514
- def update_character_info(name):
515
- return load_character_and_image(name, user_name.value)
516
-
517
- def on_character_select(name, user_name_val):
518
- logging.debug(f"Character selected: {name}")
519
- char_data, chat_history, img = load_character_and_image(name, user_name_val)
520
- return char_data, chat_history, img
521
-
522
- def clear_chat_history(char_data, user_name_val):
523
- """
524
- Clears the chat history and initializes it with the character's first message,
525
- replacing the '{{user}}' placeholder with the actual user name.
526
-
527
- Args:
528
- char_data (dict): The current character data.
529
- user_name_val (str): The user's name.
530
-
531
- Returns:
532
- tuple: Updated chat history and the unchanged char_data.
533
- """
534
- if char_data and 'first_message' in char_data and char_data['first_message']:
535
- # Replace '{{user}}' in the first_message
536
- first_message = char_data['first_message'].replace("{{user}}",
537
- user_name_val if user_name_val else "User")
538
- # Initialize chat history with the updated first_message
539
- return [(None, first_message)], char_data
540
- else:
541
- # If no first_message is defined, simply clear the chat
542
- return [], char_data
543
-
544
- def regenerate_last_message(
545
- history, char_data, api_endpoint, api_key,
546
- temperature, user_name_val, auto_save
547
- ):
548
- """
549
- Regenerates the last bot message by removing it and resending the corresponding user message.
550
-
551
- Args:
552
- history (list): The current chat history as a list of tuples (user_message, bot_message).
553
- char_data (dict): The current character data.
554
- api_endpoint (str): The API endpoint to use for the LLM.
555
- api_key (str): The API key for authentication.
556
- temperature (float): The temperature setting for the LLM.
557
- user_name_val (str): The user's name.
558
- auto_save (bool): Flag indicating whether to auto-save the chat.
559
-
560
- Returns:
561
- tuple: Updated chat history and a save status message.
562
- """
563
- if not history:
564
- return history, "No messages to regenerate."
565
-
566
- last_entry = history[-1]
567
- last_user_message, last_bot_message = last_entry
568
-
569
- # Check if the last bot message exists
570
- if last_bot_message is None:
571
- return history, "The last message is not from the bot."
572
-
573
- # Remove the last bot message
574
- new_history = history[:-1]
575
-
576
- # Resend the last user message to generate a new bot response
577
- if not last_user_message:
578
- return new_history, "No user message to regenerate the bot response."
579
-
580
- # Prepare the character's background information
581
- char_name = char_data.get('name', 'AI Assistant')
582
- char_background = f"""
583
- Name: {char_name}
584
- Description: {char_data.get('description', 'N/A')}
585
- Personality: {char_data.get('personality', 'N/A')}
586
- Scenario: {char_data.get('scenario', 'N/A')}
587
- """
588
-
589
- # Prepare the system prompt for character impersonation
590
- system_message = f"""You are roleplaying as {char_name}, the character described below. Respond to the user's messages in character, maintaining the personality and background provided. Do not break character or refer to yourself as an AI. Always refer to yourself as "{char_name}" and refer to the user as "{user_name_val}".
591
-
592
- {char_background}
593
-
594
- Additional instructions: {char_data.get('post_history_instructions', '')}
595
- """
596
-
597
- # Prepare media_content and selected_parts
598
- media_content = {
599
- 'id': char_name,
600
- 'title': char_name,
601
- 'content': char_background,
602
- 'description': char_data.get('description', ''),
603
- 'personality': char_data.get('personality', ''),
604
- 'scenario': char_data.get('scenario', '')
605
- }
606
- selected_parts = ['description', 'personality', 'scenario']
607
-
608
- prompt = char_data.get('post_history_instructions', '')
609
-
610
- # Prepare the input for the chat function
611
- full_message = f"{user_name_val}: {last_user_message}" if last_user_message else f"{user_name_val}: "
612
-
613
- # Call the chat function to get a new bot message
614
- bot_message = chat(
615
- full_message,
616
- new_history,
617
- media_content,
618
- selected_parts,
619
- api_endpoint,
620
- api_key,
621
- prompt,
622
- temperature,
623
- system_message
624
- )
625
-
626
- # Append the new bot message to the history
627
- new_history.append((last_user_message, bot_message))
628
-
629
- # Auto-save if enabled
630
- if auto_save:
631
- character_id = char_data.get('id')
632
- if character_id:
633
- conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
634
- add_character_chat(character_id, conversation_name, new_history)
635
- save_status = "Chat auto-saved."
636
- else:
637
- save_status = "Character ID not found; chat not saved."
638
- else:
639
- save_status = ""
640
-
641
- return new_history, save_status
642
-
643
- def toggle_chat_file_upload():
644
- return gr.update(visible=True)
645
-
646
- def save_untracked_chat_action(history, char_data):
647
- if not char_data or not history:
648
- return "No chat to save or character not selected."
649
-
650
- character_id = char_data.get('id')
651
- if not character_id:
652
- return "Character ID not found."
653
-
654
- conversation_name = f"Snapshot {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
655
- chat_id = add_character_chat(character_id, conversation_name, history, is_snapshot=True)
656
- if chat_id:
657
- return f"Chat snapshot saved successfully with ID {chat_id}."
658
- else:
659
- return "Failed to save chat snapshot."
660
-
661
- def select_chat_for_update():
662
- # Fetch all chats for the selected character
663
- if character_data.value:
664
- character_id = character_data.value.get('id')
665
- if character_id:
666
- chats = get_character_chats(character_id)
667
- chat_choices = [
668
- f"{chat['conversation_name']} (ID: {chat['id']})" for chat in chats
669
- ]
670
- return gr.update(choices=chat_choices), None
671
- return gr.update(choices=[]), "No character selected."
672
-
673
- def load_selected_chat(chat_selection):
674
- if not chat_selection:
675
- return [], "No chat selected."
676
-
677
- try:
678
- chat_id = int(chat_selection.split('(ID: ')[1].rstrip(')'))
679
- chat = get_character_chat_by_id(chat_id)
680
- if chat:
681
- history = chat['chat_history']
682
- selected_chat_id.value = chat_id # Update the selected_chat_id state
683
- return history, f"Loaded chat '{chat['conversation_name']}' successfully."
684
- else:
685
- return [], "Chat not found."
686
- except Exception as e:
687
- logging.error(f"Error loading selected chat: {e}")
688
- return [], f"Error loading chat: {e}"
689
-
690
- def update_chat(chat_id, updated_history):
691
- success = update_character_chat(chat_id, updated_history)
692
- if success:
693
- return "Chat updated successfully."
694
- else:
695
- return "Failed to update chat."
696
-
697
- def continue_talking(
698
- history, char_data, api_endpoint, api_key,
699
- temperature, user_name_val, auto_save
700
- ):
701
- """
702
- Causes the character to continue the conversation or think out loud.
703
- """
704
- if not char_data:
705
- return history, "Please select a character first."
706
-
707
- user_name_val = user_name_val or "User"
708
- char_name = char_data.get('name', 'AI Assistant')
709
-
710
- # Prepare the character's background information
711
- char_background = f"""
712
- Name: {char_name}
713
- Description: {char_data.get('description', 'N/A')}
714
- Personality: {char_data.get('personality', 'N/A')}
715
- Scenario: {char_data.get('scenario', 'N/A')}
716
- """
717
-
718
- # Prepare the system prompt
719
- system_message = f"""You are roleplaying as {char_name}. {char_data.get('system_prompt', '')}
720
- If the user does not respond, continue expressing your thoughts or continue the conversation by thinking out loud. If thinking out loud, prefix the message with "Thinking: "."""
721
-
722
- # Prepare chat context
723
- media_content = {
724
- 'id': char_name,
725
- 'title': char_name,
726
- 'content': char_background,
727
- 'description': char_data.get('description', ''),
728
- 'personality': char_data.get('personality', ''),
729
- 'scenario': char_data.get('scenario', '')
730
- }
731
- selected_parts = ['description', 'personality', 'scenario']
732
-
733
- prompt = char_data.get('post_history_instructions', '')
734
-
735
- # Simulate empty user input
736
- user_message = ""
737
-
738
- # Generate bot response
739
- bot_message = chat(
740
- user_message,
741
- history,
742
- media_content,
743
- selected_parts,
744
- api_endpoint,
745
- api_key,
746
- prompt,
747
- temperature,
748
- system_message
749
- )
750
-
751
- # Replace placeholders in bot message
752
- bot_message = replace_placeholders(bot_message, char_name, user_name_val)
753
-
754
- # Update history
755
- history.append((None, bot_message))
756
-
757
- # Auto-save if enabled
758
- save_status = ""
759
- if auto_save:
760
- character_id = char_data.get('id')
761
- if character_id:
762
- conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
763
- add_character_chat(character_id, conversation_name, history)
764
- save_status = "Chat auto-saved."
765
- else:
766
- save_status = "Character ID not found; chat not saved."
767
-
768
- return history, save_status
769
-
770
- def answer_for_me(
771
- history, char_data, api_endpoint, api_key,
772
- temperature, user_name_val, auto_save
773
- ):
774
- """
775
- Generates a likely user response and continues the conversation.
776
- """
777
- if not char_data:
778
- return history, "Please select a character first."
779
-
780
- user_name_val = user_name_val or "User"
781
- char_name = char_data.get('name', 'AI Assistant')
782
-
783
- # Prepare the character's background information
784
- char_background = f"""
785
- Name: {char_name}
786
- Description: {char_data.get('description', 'N/A')}
787
- Personality: {char_data.get('personality', 'N/A')}
788
- Scenario: {char_data.get('scenario', 'N/A')}
789
- """
790
-
791
- # Prepare system message for generating user's response
792
- system_message_user = f"""You are simulating the user {user_name_val}. Based on the conversation so far, generate a natural and appropriate response that {user_name_val} might say next. The response should fit the context and flow of the conversation. ONLY SPEAK FOR {user_name_val}."""
793
-
794
- # Prepare chat context
795
- media_content = {
796
- 'id': char_name,
797
- 'title': char_name,
798
- 'content': char_background,
799
- 'description': char_data.get('description', ''),
800
- 'personality': char_data.get('personality', ''),
801
- 'scenario': char_data.get('scenario', '')
802
- }
803
- selected_parts = ['description', 'personality', 'scenario']
804
-
805
- # Generate user response
806
- user_response = chat(
807
- "", # No new message
808
- history,
809
- media_content,
810
- selected_parts,
811
- api_endpoint,
812
- api_key,
813
- prompt="",
814
- temperature=temperature,
815
- system_message=system_message_user
816
- )
817
-
818
- # Append the generated user response to history
819
- history.append((user_response, None))
820
-
821
- # Now generate the character's response to this user response
822
- # Prepare the system message for the character
823
- system_message_bot = f"""You are roleplaying as {char_name}. {char_data.get('system_prompt', '')}"""
824
-
825
- bot_message = chat(
826
- f"{user_name_val}: {user_response}",
827
- history[:-1],
828
- media_content,
829
- selected_parts,
830
- api_endpoint,
831
- api_key,
832
- prompt=char_data.get('post_history_instructions', ''),
833
- temperature=temperature,
834
- system_message=system_message_bot
835
- )
836
-
837
- # Replace placeholders in bot message
838
- bot_message = replace_placeholders(bot_message, char_name, user_name_val)
839
-
840
- # Update history with bot's response
841
- history[-1] = (user_response, bot_message)
842
-
843
- # Auto-save if enabled
844
- save_status = ""
845
- if auto_save:
846
- character_id = char_data.get('id')
847
- if character_id:
848
- conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
849
- add_character_chat(character_id, conversation_name, history)
850
- save_status = "Chat auto-saved."
851
- else:
852
- save_status = "Character ID not found; chat not saved."
853
-
854
- return history, save_status
855
-
856
-
857
- # Define States for conversation_id and media_content, which are required for saving chat history
858
- conversation_id = gr.State(str(uuid.uuid4()))
859
- media_content = gr.State({})
860
-
861
- # Button Callbacks
862
-
863
- # Add the new button callbacks here
864
- answer_for_me_button.click(
865
- fn=answer_for_me,
866
- inputs=[
867
- chat_history,
868
- character_data,
869
- api_name_input,
870
- api_key_input,
871
- temperature_slider,
872
- user_name_input,
873
- auto_save_checkbox
874
- ],
875
- outputs=[chat_history, save_status]
876
- )
877
-
878
- continue_talking_button.click(
879
- fn=continue_talking,
880
- inputs=[
881
- chat_history,
882
- character_data,
883
- api_name_input,
884
- api_key_input,
885
- temperature_slider,
886
- user_name_input,
887
- auto_save_checkbox
888
- ],
889
- outputs=[chat_history, save_status]
890
- )
891
-
892
- import_card_button.click(
893
- fn=import_character_card,
894
- inputs=[character_card_upload],
895
- outputs=[character_data, character_dropdown, save_status]
896
- )
897
-
898
- load_characters_button.click(
899
- fn=lambda: gr.update(choices=[f"{char['name']} (ID: {char['id']})" for char in get_character_cards()]),
900
- outputs=character_dropdown
901
- )
902
-
903
- # FIXME user_name_val = validate_user_name(user_name_val)
904
- clear_chat_button.click(
905
- fn=clear_chat_history,
906
- inputs=[character_data, user_name_input],
907
- outputs=[chat_history, character_data]
908
- )
909
-
910
- character_dropdown.change(
911
- fn=extract_character_id,
912
- inputs=[character_dropdown],
913
- outputs=character_data
914
- ).then(
915
- fn=load_character_wrapper,
916
- inputs=[character_data, user_name_input],
917
- outputs=[character_data, chat_history, character_image]
918
- )
919
-
920
- send_message_button.click(
921
- fn=character_chat_wrapper,
922
- inputs=[
923
- user_input,
924
- chat_history,
925
- character_data,
926
- api_name_input,
927
- api_key_input,
928
- temperature_slider,
929
- user_name_input,
930
- auto_save_checkbox
931
- ],
932
- outputs=[chat_history, save_status]
933
- ).then(lambda: "", outputs=user_input)
934
-
935
- regenerate_button.click(
936
- fn=regenerate_last_message,
937
- inputs=[
938
- chat_history,
939
- character_data,
940
- api_name_input,
941
- api_key_input,
942
- temperature_slider,
943
- user_name_input,
944
- auto_save_checkbox
945
- ],
946
- outputs=[chat_history, save_status]
947
- )
948
-
949
- import_chat_button.click(
950
- fn=lambda: gr.update(visible=True),
951
- outputs=chat_file_upload
952
- )
953
-
954
- chat_file_upload.change(
955
- fn=import_chat_history,
956
- inputs=[chat_file_upload, chat_history, character_data],
957
- outputs=[chat_history, character_data, save_status]
958
- )
959
-
960
- save_chat_history_to_db.click(
961
- fn=save_chat_history_to_db_wrapper,
962
- inputs=[
963
- chat_history,
964
- conversation_id,
965
- media_content,
966
- chat_media_name,
967
- character_data,
968
- auto_save_checkbox # Pass the auto_save state
969
- ],
970
- outputs=[conversation_id, save_status]
971
- )
972
-
973
- # Populate the update_chat_dropdown based on selected character
974
- character_dropdown.change(
975
- fn=select_chat_for_update,
976
- inputs=[],
977
- outputs=[update_chat_dropdown, save_status]
978
- )
979
-
980
- load_selected_chat_button.click(
981
- fn=load_selected_chat,
982
- inputs=[update_chat_dropdown],
983
- outputs=[chat_history, save_status]
984
- )
985
-
986
- save_snapshot_button.click(
987
- fn=save_untracked_chat_action,
988
- inputs=[chat_history, character_data],
989
- outputs=save_status
990
- )
991
-
992
- update_chat_button.click(
993
- fn=update_chat,
994
- inputs=[selected_chat_id, chat_history],
995
- outputs=save_status
996
- )
997
-
998
- # Search Chats
999
- chat_search_button.click(
1000
- fn=search_existing_chats,
1001
- inputs=[chat_search_query],
1002
- outputs=[chat_search_dropdown, save_status]
1003
- ).then(
1004
- fn=lambda choices, msg: gr.update(choices=choices, visible=True) if choices else gr.update(visible=False),
1005
- inputs=[chat_search_dropdown, save_status],
1006
- outputs=[chat_search_dropdown]
1007
- )
1008
-
1009
- # Load Selected Chat from Search
1010
- load_chat_button.click(
1011
- fn=load_selected_chat_from_search,
1012
- inputs=[chat_search_dropdown, user_name_input],
1013
- outputs=[character_data, chat_history, character_image, save_status]
1014
- )
1015
-
1016
- # Show Load Chat Button when a chat is selected
1017
- chat_search_dropdown.change(
1018
- fn=lambda selected: gr.update(visible=True) if selected else gr.update(visible=False),
1019
- inputs=[chat_search_dropdown],
1020
- outputs=[load_chat_button]
1021
- )
1022
-
1023
-
1024
- return character_data, chat_history, user_input, user_name, character_image
1025
-
1026
-
1027
- def create_character_chat_mgmt_tab():
1028
- with gr.TabItem("Character and Chat Management", visible=True):
1029
- gr.Markdown("# Character and Chat Management")
1030
-
1031
- with gr.Row():
1032
- # Left Column: Character Import and Chat Management
1033
- with gr.Column(scale=1):
1034
- gr.Markdown("## Import Characters")
1035
- character_files = gr.File(
1036
- label="Upload Character Files (PNG, WEBP, JSON)",
1037
- file_types=[".png", ".webp", ".json"],
1038
- file_count="multiple"
1039
- )
1040
- import_characters_button = gr.Button("Import Characters")
1041
- import_status = gr.Markdown("")
1042
-
1043
- # Right Column: Character Selection and Image Display
1044
- with gr.Column(scale=2):
1045
- gr.Markdown("## Select Character")
1046
- characters = get_character_cards()
1047
- character_choices = [f"{char['name']} (ID: {char['id']})" for char in characters]
1048
- load_characters_button = gr.Button("Load Existing Characters")
1049
- select_character = gr.Dropdown(label="Select Character", choices=character_choices, interactive=True)
1050
- character_image = gr.Image(label="Character Image", type="pil", interactive=False)
1051
-
1052
- gr.Markdown("## Search Conversations")
1053
- search_query = gr.Textbox(label="Search Conversations", placeholder="Enter search keywords")
1054
- search_button = gr.Button("Search")
1055
- search_results = gr.Dropdown(label="Search Results", choices=[], visible=False)
1056
- search_status = gr.Markdown("", visible=True)
1057
-
1058
- with gr.Row():
1059
- gr.Markdown("## Chat Management")
1060
- select_chat = gr.Dropdown(label="Select Chat", choices=[], visible=False, interactive=True)
1061
- load_chat_button = gr.Button("Load Selected Chat", visible=False)
1062
- conversation_list = gr.Dropdown(label="Select Conversation or Character", choices=[])
1063
- conversation_mapping = gr.State({})
1064
-
1065
- with gr.Tabs():
1066
- with gr.TabItem("Edit", visible=True):
1067
- chat_content = gr.TextArea(label="Chat/Character Content (JSON)", lines=20, max_lines=50)
1068
- save_button = gr.Button("Save Changes")
1069
- delete_button = gr.Button("Delete Conversation/Character", variant="stop")
1070
-
1071
- with gr.TabItem("Preview", visible=True):
1072
- chat_preview = gr.HTML(label="Chat/Character Preview")
1073
- result_message = gr.Markdown("")
1074
-
1075
- # Callback Functions
1076
-
1077
- def load_character_image(character_selection):
1078
- if not character_selection:
1079
- return None
1080
-
1081
- try:
1082
- character_id = int(character_selection.split('(ID: ')[1].rstrip(')'))
1083
- character = get_character_card_by_id(character_id)
1084
- if character and 'image' in character:
1085
- image_data = base64.b64decode(character['image'])
1086
- img = Image.open(io.BytesIO(image_data))
1087
- return img
1088
- except Exception as e:
1089
- logging.error(f"Error loading character image: {e}")
1090
-
1091
- return None
1092
-
1093
- def search_conversations_or_characters(query, selected_character):
1094
- if not query.strip():
1095
- return gr.update(choices=[], visible=False), "Please enter a search query."
1096
-
1097
- try:
1098
- # Extract character ID from the selected character
1099
- character_id = None
1100
- if selected_character:
1101
- character_id = int(selected_character.split('(ID: ')[1].rstrip(')'))
1102
-
1103
- # Search Chats using FTS5, filtered by character_id if provided
1104
- chat_results, chat_message = search_character_chats(query, character_id)
1105
-
1106
- # Format chat results
1107
- formatted_chat_results = [
1108
- f"Chat: {chat['conversation_name']} (ID: {chat['id']})" for chat in chat_results
1109
- ]
1110
-
1111
- # If no character is selected, also search for characters
1112
- if not character_id:
1113
- characters = get_character_cards()
1114
- filtered_characters = [
1115
- char for char in characters
1116
- if query.lower() in char['name'].lower()
1117
- ]
1118
- formatted_character_results = [
1119
- f"Character: {char['name']} (ID: {char['id']})" for char in filtered_characters
1120
- ]
1121
- else:
1122
- formatted_character_results = []
1123
-
1124
- # Combine results
1125
- all_choices = formatted_chat_results + formatted_character_results
1126
-
1127
- if all_choices:
1128
- return gr.update(choices=all_choices, visible=True), chat_message
1129
- else:
1130
- return gr.update(choices=[], visible=False), f"No results found for '{query}'."
1131
-
1132
- except Exception as e:
1133
- logging.error(f"Error during search: {e}")
1134
- return gr.update(choices=[], visible=False), f"Error occurred during search: {e}"
1135
-
1136
- def load_conversation_or_character(selected, conversation_mapping):
1137
- if not selected or selected not in conversation_mapping:
1138
- return "", "<p>No selection made.</p>"
1139
-
1140
- selected_id = conversation_mapping[selected]
1141
- if selected.startswith("Chat:"):
1142
- chat = get_character_chat_by_id(selected_id)
1143
- if chat:
1144
- json_content = json.dumps({
1145
- "conversation_id": chat['id'],
1146
- "conversation_name": chat['conversation_name'],
1147
- "messages": chat['chat_history']
1148
- }, indent=2)
1149
-
1150
- html_preview = create_chat_preview_html(chat['chat_history'])
1151
- return json_content, html_preview
1152
- elif selected.startswith("Character:"):
1153
- character = get_character_card_by_id(selected_id)
1154
- if character:
1155
- json_content = json.dumps({
1156
- "id": character['id'],
1157
- "name": character['name'],
1158
- "description": character['description'],
1159
- "personality": character['personality'],
1160
- "scenario": character['scenario'],
1161
- "post_history_instructions": character['post_history_instructions'],
1162
- "first_mes": character['first_mes'],
1163
- "mes_example": character['mes_example'],
1164
- "creator_notes": character.get('creator_notes', ''),
1165
- "system_prompt": character.get('system_prompt', ''),
1166
- "tags": character.get('tags', []),
1167
- "creator": character.get('creator', ''),
1168
- "character_version": character.get('character_version', ''),
1169
- "extensions": character.get('extensions', {})
1170
- }, indent=2)
1171
-
1172
- html_preview = create_character_preview_html(character)
1173
- return json_content, html_preview
1174
-
1175
- return "", "<p>Unable to load the selected item.</p>"
1176
-
1177
- def validate_content(selected, content):
1178
- try:
1179
- data = json.loads(content)
1180
- if selected.startswith("Chat:"):
1181
- assert "conversation_id" in data and "messages" in data
1182
- elif selected.startswith("Character:"):
1183
- assert "id" in data and "name" in data
1184
- return True, data
1185
- except Exception as e:
1186
- return False, f"Invalid JSON: {e}"
1187
-
1188
- def save_conversation_or_character(selected, conversation_mapping, content):
1189
- if not selected or selected not in conversation_mapping:
1190
- return "Please select an item to save.", "<p>No changes made.</p>"
1191
-
1192
- is_valid, result = validate_content(selected, content)
1193
- if not is_valid:
1194
- return f"Error: {result}", "<p>No changes made due to validation error.</p>"
1195
-
1196
- selected_id = conversation_mapping[selected]
1197
-
1198
- if selected.startswith("Chat:"):
1199
- success = update_character_chat(selected_id, result['messages'])
1200
- return ("Chat updated successfully." if success else "Failed to update chat."), ("<p>Chat updated.</p>" if success else "<p>Failed to update chat.</p>")
1201
- elif selected.startswith("Character:"):
1202
- success = update_character_card(selected_id, result)
1203
- return ("Character updated successfully." if success else "Failed to update character."), ("<p>Character updated.</p>" if success else "<p>Failed to update character.</p>")
1204
-
1205
- return "Unknown item type.", "<p>No changes made.</p>"
1206
-
1207
- def delete_conversation_or_character(selected, conversation_mapping):
1208
- if not selected or selected not in conversation_mapping:
1209
- return "Please select an item to delete.", "<p>No changes made.</p>", gr.update(choices=[])
1210
-
1211
- selected_id = conversation_mapping[selected]
1212
-
1213
- if selected.startswith("Chat:"):
1214
- success = delete_character_chat(selected_id)
1215
- elif selected.startswith("Character:"):
1216
- success = delete_character_card(selected_id)
1217
- else:
1218
- return "Unknown item type.", "<p>No changes made.</p>", gr.update()
1219
-
1220
- if success:
1221
- updated_choices = [choice for choice in conversation_mapping.keys() if choice != selected]
1222
- conversation_mapping.value.pop(selected, None)
1223
- return f"{selected.split(':')[0]} deleted successfully.", f"<p>{selected.split(':')[0]} deleted.</p>", gr.update(choices=updated_choices)
1224
- else:
1225
- return f"Failed to delete {selected.split(':')[0].lower()}.", f"<p>Failed to delete {selected.split(':')[0].lower()}.</p>", gr.update()
1226
-
1227
- def populate_chats(character_selection):
1228
- if not character_selection:
1229
- return gr.update(choices=[], visible=False), "Please select a character first."
1230
-
1231
- try:
1232
- character_id = int(character_selection.split('(ID: ')[1].rstrip(')'))
1233
- chats = get_character_chats(character_id=character_id)
1234
-
1235
- if not chats:
1236
- return gr.update(choices=[], visible=False), f"No chats found for the selected character."
1237
-
1238
- formatted_chats = [f"{chat['conversation_name']} (ID: {chat['id']})" for chat in chats]
1239
- return gr.update(choices=formatted_chats, visible=True), f"Found {len(formatted_chats)} chat(s)."
1240
- except Exception as e:
1241
- logging.error(f"Error populating chats: {e}")
1242
- return gr.update(choices=[], visible=False), f"Error occurred: {e}"
1243
-
1244
- def load_chat_from_character(selected_chat):
1245
- if not selected_chat:
1246
- return "", "<p>No chat selected.</p>"
1247
-
1248
- try:
1249
- chat_id = int(selected_chat.split('(ID: ')[1].rstrip(')'))
1250
- chat = get_character_chat_by_id(chat_id)
1251
- if not chat:
1252
- return "", "<p>Selected chat not found.</p>"
1253
-
1254
- json_content = json.dumps({
1255
- "conversation_id": chat['id'],
1256
- "conversation_name": chat['conversation_name'],
1257
- "messages": chat['chat_history']
1258
- }, indent=2)
1259
-
1260
- html_preview = create_chat_preview_html(chat['chat_history'])
1261
- return json_content, html_preview
1262
- except Exception as e:
1263
- logging.error(f"Error loading chat: {e}")
1264
- return "", f"<p>Error loading chat: {e}</p>"
1265
-
1266
- def create_chat_preview_html(chat_history):
1267
- html_preview = "<div style='max-height: 500px; overflow-y: auto;'>"
1268
- for user_msg, bot_msg in chat_history:
1269
- user_style = "background-color: #e6f3ff; padding: 10px; border-radius: 5px; margin-bottom: 5px;"
1270
- bot_style = "background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin-bottom: 10px;"
1271
- html_preview += f"<div style='{user_style}'><strong>User:</strong> {user_msg}</div>"
1272
- html_preview += f"<div style='{bot_style}'><strong>Bot:</strong> {bot_msg}</div>"
1273
- html_preview += "</div>"
1274
- return html_preview
1275
-
1276
- def create_character_preview_html(character):
1277
- return f"""
1278
- <div>
1279
- <h2>{character['name']}</h2>
1280
- <p><strong>Description:</strong> {character['description']}</p>
1281
- <p><strong>Personality:</strong> {character['personality']}</p>
1282
- <p><strong>Scenario:</strong> {character['scenario']}</p>
1283
- <p><strong>First Message:</strong> {character['first_mes']}</p>
1284
- <p><strong>Example Message:</strong> {character['mes_example']}</p>
1285
- <p><strong>Post History Instructions:</strong> {character['post_history_instructions']}</p>
1286
- <p><strong>System Prompt:</strong> {character.get('system_prompt', 'N/A')}</p>
1287
- <p><strong>Tags:</strong> {', '.join(character.get('tags', []))}</p>
1288
- <p><strong>Creator:</strong> {character.get('creator', 'N/A')}</p>
1289
- <p><strong>Version:</strong> {character.get('character_version', 'N/A')}</p>
1290
- </div>
1291
- """
1292
- def import_multiple_characters(files):
1293
- if not files:
1294
- return "No files provided for character import."
1295
-
1296
- results = []
1297
- for file in files:
1298
- result, _, message = import_character_card(file)
1299
- if result:
1300
- results.append(f"Imported: {result['name']}")
1301
- else:
1302
- results.append(f"Failed: {file.name} - {message}")
1303
-
1304
- # Refresh character choices
1305
- characters = get_character_cards()
1306
- character_choices = [f"{char['name']} (ID: {char['id']})" for char in characters]
1307
- select_character.choices = character_choices
1308
-
1309
- return "Import results:\n" + "\n".join(results)
1310
-
1311
- # Register new callback for character import
1312
- import_characters_button.click(
1313
- fn=import_multiple_characters,
1314
- inputs=[character_files],
1315
- outputs=[import_status]
1316
- ).then(
1317
- fn=lambda: gr.update(choices=[f"{char['name']} (ID: {char['id']})" for char in get_character_cards()]),
1318
- outputs=select_character
1319
- )
1320
-
1321
- # Register Callback Functions with Gradio Components
1322
- search_button.click(
1323
- fn=search_conversations_or_characters,
1324
- inputs=[search_query, select_character],
1325
- outputs=[search_results, search_status]
1326
- )
1327
-
1328
- search_results.change(
1329
- fn=load_conversation_or_character,
1330
- inputs=[search_results, conversation_mapping],
1331
- outputs=[chat_content, chat_preview]
1332
- )
1333
-
1334
- save_button.click(
1335
- fn=save_conversation_or_character,
1336
- inputs=[conversation_list, conversation_mapping, chat_content],
1337
- outputs=[result_message, chat_preview]
1338
- )
1339
-
1340
- delete_button.click(
1341
- fn=delete_conversation_or_character,
1342
- inputs=[conversation_list, conversation_mapping],
1343
- outputs=[result_message, chat_preview, conversation_list]
1344
- )
1345
-
1346
- select_character.change(
1347
- fn=load_character_image,
1348
- inputs=[select_character],
1349
- outputs=[character_image]
1350
- ).then(
1351
- fn=populate_chats,
1352
- inputs=[select_character],
1353
- outputs=[select_chat, search_status]
1354
- )
1355
-
1356
- select_chat.change(
1357
- fn=load_chat_from_character,
1358
- inputs=[select_chat],
1359
- outputs=[chat_content, chat_preview]
1360
- )
1361
-
1362
- load_chat_button.click(
1363
- fn=load_chat_from_character,
1364
- inputs=[select_chat],
1365
- outputs=[chat_content, chat_preview]
1366
- )
1367
-
1368
- load_characters_button.click(
1369
- fn=lambda: gr.update(choices=[f"{char['name']} (ID: {char['id']})" for char in get_character_cards()]),
1370
- outputs=select_character
1371
- )
1372
-
1373
- return (
1374
- character_files, import_characters_button, import_status,
1375
- search_query, search_button, search_results, search_status,
1376
- select_character, select_chat, load_chat_button,
1377
- conversation_list, conversation_mapping,
1378
- chat_content, save_button, delete_button,
1379
- chat_preview, result_message, character_image
1380
- )
1381
-
1382
- def create_custom_character_card_tab():
1383
- with gr.TabItem("Create a New Character Card", visible=True):
1384
- gr.Markdown("# Create a New Character Card (v2)")
1385
-
1386
- with gr.Row():
1387
- with gr.Column():
1388
- # Input fields for character card data
1389
- name_input = gr.Textbox(label="Name", placeholder="Enter character name")
1390
- description_input = gr.TextArea(label="Description", placeholder="Enter character description")
1391
- personality_input = gr.TextArea(label="Personality", placeholder="Enter character personality")
1392
- scenario_input = gr.TextArea(label="Scenario", placeholder="Enter character scenario")
1393
- first_mes_input = gr.TextArea(label="First Message", placeholder="Enter the first message")
1394
- mes_example_input = gr.TextArea(label="Example Messages", placeholder="Enter example messages")
1395
- creator_notes_input = gr.TextArea(label="Creator Notes", placeholder="Enter notes for the creator")
1396
- system_prompt_input = gr.TextArea(label="System Prompt", placeholder="Enter system prompt")
1397
- post_history_instructions_input = gr.TextArea(label="Post History Instructions", placeholder="Enter post history instructions")
1398
- alternate_greetings_input = gr.TextArea(
1399
- label="Alternate Greetings (one per line)",
1400
- placeholder="Enter alternate greetings, one per line"
1401
- )
1402
- tags_input = gr.Textbox(label="Tags", placeholder="Enter tags, separated by commas")
1403
- creator_input = gr.Textbox(label="Creator", placeholder="Enter creator name")
1404
- character_version_input = gr.Textbox(label="Character Version", placeholder="Enter character version")
1405
- extensions_input = gr.TextArea(
1406
- label="Extensions (JSON)",
1407
- placeholder="Enter extensions as JSON (optional)"
1408
- )
1409
- image_input = gr.Image(label="Character Image", type="pil")
1410
-
1411
- # Buttons
1412
- save_button = gr.Button("Save Character Card")
1413
- download_button = gr.Button("Download Character Card")
1414
- download_image_button = gr.Button("Download Character Card as Image")
1415
-
1416
- # Output status and outputs
1417
- save_status = gr.Markdown("")
1418
- download_output = gr.File(label="Download Character Card", interactive=False)
1419
- download_image_output = gr.File(label="Download Character Card as Image", interactive=False)
1420
-
1421
- # Import PngInfo
1422
- from PIL.PngImagePlugin import PngInfo
1423
-
1424
- # Callback Functions
1425
- def build_character_card(
1426
- name, description, personality, scenario, first_mes, mes_example,
1427
- creator_notes, system_prompt, post_history_instructions,
1428
- alternate_greetings_str, tags_str, creator, character_version,
1429
- extensions_str
1430
- ):
1431
- # Parse alternate_greetings from multiline string
1432
- alternate_greetings = [line.strip() for line in alternate_greetings_str.strip().split('\n') if line.strip()]
1433
-
1434
- # Parse tags from comma-separated string
1435
- tags = [tag.strip() for tag in tags_str.strip().split(',') if tag.strip()]
1436
-
1437
- # Parse extensions from JSON string
1438
- try:
1439
- extensions = json.loads(extensions_str) if extensions_str.strip() else {}
1440
- except json.JSONDecodeError as e:
1441
- extensions = {}
1442
- logging.error(f"Error parsing extensions JSON: {e}")
1443
-
1444
- # Build the character card dictionary according to V2 spec
1445
- character_card = {
1446
- 'spec': 'chara_card_v2',
1447
- 'spec_version': '2.0',
1448
- 'data': {
1449
- 'name': name,
1450
- 'description': description,
1451
- 'personality': personality,
1452
- 'scenario': scenario,
1453
- 'first_mes': first_mes,
1454
- 'mes_example': mes_example,
1455
- 'creator_notes': creator_notes,
1456
- 'system_prompt': system_prompt,
1457
- 'post_history_instructions': post_history_instructions,
1458
- 'alternate_greetings': alternate_greetings,
1459
- 'tags': tags,
1460
- 'creator': creator,
1461
- 'character_version': character_version,
1462
- 'extensions': extensions,
1463
- }
1464
- }
1465
- return character_card
1466
-
1467
- def validate_character_card_data(character_card):
1468
- """
1469
- Validates the character card data using the extended validation logic.
1470
- """
1471
- is_valid, validation_messages = validate_v2_card(character_card)
1472
- return is_valid, validation_messages
1473
-
1474
- def save_character_card(
1475
- name, description, personality, scenario, first_mes, mes_example,
1476
- creator_notes, system_prompt, post_history_instructions,
1477
- alternate_greetings_str, tags_str, creator, character_version,
1478
- extensions_str, image
1479
- ):
1480
- # Build the character card
1481
- character_card = build_character_card(
1482
- name, description, personality, scenario, first_mes, mes_example,
1483
- creator_notes, system_prompt, post_history_instructions,
1484
- alternate_greetings_str, tags_str, creator, character_version,
1485
- extensions_str
1486
- )
1487
-
1488
- # Validate the character card
1489
- is_valid, validation_messages = validate_character_card_data(character_card)
1490
- if not is_valid:
1491
- # Return validation errors
1492
- validation_output = "Character card validation failed:\n"
1493
- validation_output += "\n".join(validation_messages)
1494
- return validation_output
1495
-
1496
- # If image is provided, encode it to base64
1497
- if image:
1498
- img_byte_arr = io.BytesIO()
1499
- image.save(img_byte_arr, format='PNG')
1500
- character_card['data']['image'] = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
1501
-
1502
- # Save character card to database
1503
- character_id = add_character_card(character_card['data'])
1504
- if character_id:
1505
- return f"Character card '{name}' saved successfully."
1506
- else:
1507
- return f"Failed to save character card '{name}'. It may already exist."
1508
-
1509
- def download_character_card(
1510
- name, description, personality, scenario, first_mes, mes_example,
1511
- creator_notes, system_prompt, post_history_instructions,
1512
- alternate_greetings_str, tags_str, creator, character_version,
1513
- extensions_str, image
1514
- ):
1515
- # Build the character card
1516
- character_card = build_character_card(
1517
- name, description, personality, scenario, first_mes, mes_example,
1518
- creator_notes, system_prompt, post_history_instructions,
1519
- alternate_greetings_str, tags_str, creator, character_version,
1520
- extensions_str
1521
- )
1522
-
1523
- # Validate the character card
1524
- is_valid, validation_messages = validate_character_card_data(character_card)
1525
- if not is_valid:
1526
- # Return validation errors
1527
- validation_output = "Character card validation failed:\n"
1528
- validation_output += "\n".join(validation_messages)
1529
- return gr.update(value=None), validation_output # Return None for the file output
1530
-
1531
- # If image is provided, include it as base64
1532
- if image:
1533
- img_byte_arr = io.BytesIO()
1534
- image.save(img_byte_arr, format='PNG')
1535
- character_card['data']['image'] = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
1536
-
1537
- # Convert to JSON string
1538
- json_str = json.dumps(character_card, indent=2)
1539
-
1540
- # Write the JSON to a temporary file
1541
- with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', encoding='utf-8') as temp_file:
1542
- temp_file.write(json_str)
1543
- temp_file_path = temp_file.name
1544
-
1545
- # Return the file path and clear validation output
1546
- return temp_file_path, ""
1547
-
1548
- def download_character_card_as_image(
1549
- name, description, personality, scenario, first_mes, mes_example,
1550
- creator_notes, system_prompt, post_history_instructions,
1551
- alternate_greetings_str, tags_str, creator, character_version,
1552
- extensions_str, image
1553
- ):
1554
- # Build the character card
1555
- character_card = build_character_card(
1556
- name, description, personality, scenario, first_mes, mes_example,
1557
- creator_notes, system_prompt, post_history_instructions,
1558
- alternate_greetings_str, tags_str, creator, character_version,
1559
- extensions_str
1560
- )
1561
-
1562
- # Validate the character card
1563
- is_valid, validation_messages = validate_character_card_data(character_card)
1564
- if not is_valid:
1565
- # Return validation errors
1566
- validation_output = "Character card validation failed:\n"
1567
- validation_output += "\n".join(validation_messages)
1568
- return gr.update(value=None), validation_output # Return None for the file output
1569
-
1570
- # Convert the character card JSON to a string
1571
- json_str = json.dumps(character_card, indent=2)
1572
-
1573
- # Encode the JSON string to base64
1574
- chara_content = base64.b64encode(json_str.encode('utf-8')).decode('utf-8')
1575
-
1576
- # Create PNGInfo object to hold metadata
1577
- png_info = PngInfo()
1578
- png_info.add_text('chara', chara_content)
1579
-
1580
- # If image is provided, use it; otherwise, create a blank image
1581
- if image:
1582
- img = image.copy()
1583
- else:
1584
- # Create a default blank image
1585
- img = Image.new('RGB', (512, 512), color='white')
1586
-
1587
- # Save the image to a temporary file with metadata
1588
- with tempfile.NamedTemporaryFile(mode='wb', delete=False, suffix='.png') as temp_file:
1589
- img.save(temp_file, format='PNG', pnginfo=png_info)
1590
- temp_file_path = temp_file.name
1591
-
1592
- # Return the file path and clear validation output
1593
- return temp_file_path, ""
1594
-
1595
- # Include the validate_v2_card function here (from previous code)
1596
-
1597
- # Button Callbacks
1598
- save_button.click(
1599
- fn=save_character_card,
1600
- inputs=[
1601
- name_input, description_input, personality_input, scenario_input,
1602
- first_mes_input, mes_example_input, creator_notes_input, system_prompt_input,
1603
- post_history_instructions_input, alternate_greetings_input, tags_input,
1604
- creator_input, character_version_input, extensions_input, image_input
1605
- ],
1606
- outputs=[save_status]
1607
- )
1608
-
1609
- download_button.click(
1610
- fn=download_character_card,
1611
- inputs=[
1612
- name_input, description_input, personality_input, scenario_input,
1613
- first_mes_input, mes_example_input, creator_notes_input, system_prompt_input,
1614
- post_history_instructions_input, alternate_greetings_input, tags_input,
1615
- creator_input, character_version_input, extensions_input, image_input
1616
- ],
1617
- outputs=[download_output, save_status]
1618
- )
1619
-
1620
- download_image_button.click(
1621
- fn=download_character_card_as_image,
1622
- inputs=[
1623
- name_input, description_input, personality_input, scenario_input,
1624
- first_mes_input, mes_example_input, creator_notes_input, system_prompt_input,
1625
- post_history_instructions_input, alternate_greetings_input, tags_input,
1626
- creator_input, character_version_input, extensions_input, image_input
1627
- ],
1628
- outputs=[download_image_output, save_status]
1629
- )
1630
-
1631
- #v1
1632
- def create_character_card_validation_tab():
1633
- with gr.TabItem("Validate Character Card", visible=True):
1634
- gr.Markdown("# Validate Character Card (v2)")
1635
- gr.Markdown("Upload a character card (PNG, WEBP, or JSON) to validate whether it conforms to the Character Card V2 specification.")
1636
-
1637
- with gr.Row():
1638
- with gr.Column():
1639
- # File uploader
1640
- file_upload = gr.File(
1641
- label="Upload Character Card (PNG, WEBP, JSON)",
1642
- file_types=[".png", ".webp", ".json"]
1643
- )
1644
- # Validation button
1645
- validate_button = gr.Button("Validate Character Card")
1646
- # Output area for validation results
1647
- validation_output = gr.Markdown("")
1648
-
1649
- # Callback Functions
1650
- def validate_character_card(file):
1651
- if file is None:
1652
- return "No file provided for validation."
1653
-
1654
- try:
1655
- if file.name.lower().endswith(('.png', '.webp')):
1656
- json_data = extract_json_from_image(file)
1657
- if not json_data:
1658
- return "Failed to extract JSON data from the image. The image might not contain embedded character card data."
1659
- elif file.name.lower().endswith('.json'):
1660
- with open(file.name, 'r', encoding='utf-8') as f:
1661
- json_data = f.read()
1662
- else:
1663
- return "Unsupported file type. Please upload a PNG, WEBP, or JSON file."
1664
-
1665
- # Parse the JSON content
1666
- try:
1667
- card_data = json.loads(json_data)
1668
- except json.JSONDecodeError as e:
1669
- return f"JSON decoding error: {e}"
1670
-
1671
- # Validate the character card
1672
- is_valid, validation_messages = validate_v2_card(card_data)
1673
-
1674
- # Prepare the validation output
1675
- if is_valid:
1676
- return "Character card is valid according to the V2 specification."
1677
- else:
1678
- # Concatenate all validation error messages
1679
- validation_output = "Character card validation failed:\n"
1680
- validation_output += "\n".join(validation_messages)
1681
- return validation_output
1682
-
1683
- except Exception as e:
1684
- logging.error(f"Error validating character card: {e}")
1685
- return f"An unexpected error occurred during validation: {e}"
1686
-
1687
- def validate_v2_card(card_data):
1688
- """
1689
- Validate a character card according to the V2 specification.
1690
-
1691
- Args:
1692
- card_data (dict): The parsed character card data.
1693
-
1694
- Returns:
1695
- Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
1696
- """
1697
- validation_messages = []
1698
-
1699
- # Check top-level fields
1700
- if 'spec' not in card_data:
1701
- validation_messages.append("Missing 'spec' field.")
1702
- elif card_data['spec'] != 'chara_card_v2':
1703
- validation_messages.append(f"Invalid 'spec' value: {card_data['spec']}. Expected 'chara_card_v2'.")
1704
-
1705
- if 'spec_version' not in card_data:
1706
- validation_messages.append("Missing 'spec_version' field.")
1707
- else:
1708
- # Ensure 'spec_version' is '2.0' or higher
1709
- try:
1710
- spec_version = float(card_data['spec_version'])
1711
- if spec_version < 2.0:
1712
- validation_messages.append(f"'spec_version' must be '2.0' or higher. Found '{card_data['spec_version']}'.")
1713
- except ValueError:
1714
- validation_messages.append(f"Invalid 'spec_version' format: {card_data['spec_version']}. Must be a number as a string.")
1715
-
1716
- if 'data' not in card_data:
1717
- validation_messages.append("Missing 'data' field.")
1718
- return False, validation_messages # Cannot proceed without 'data' field
1719
-
1720
- data = card_data['data']
1721
-
1722
- # Required fields in 'data'
1723
- required_fields = ['name', 'description', 'personality', 'scenario', 'first_mes', 'mes_example']
1724
- for field in required_fields:
1725
- if field not in data:
1726
- validation_messages.append(f"Missing required field in 'data': '{field}'.")
1727
- elif not isinstance(data[field], str):
1728
- validation_messages.append(f"Field '{field}' must be a string.")
1729
- elif not data[field].strip():
1730
- validation_messages.append(f"Field '{field}' cannot be empty.")
1731
-
1732
- # Optional fields with expected types
1733
- optional_fields = {
1734
- 'creator_notes': str,
1735
- 'system_prompt': str,
1736
- 'post_history_instructions': str,
1737
- 'alternate_greetings': list,
1738
- 'tags': list,
1739
- 'creator': str,
1740
- 'character_version': str,
1741
- 'extensions': dict,
1742
- 'character_book': dict # If present, should be a dict
1743
- }
1744
-
1745
- for field, expected_type in optional_fields.items():
1746
- if field in data:
1747
- if not isinstance(data[field], expected_type):
1748
- validation_messages.append(f"Field '{field}' must be of type '{expected_type.__name__}'.")
1749
- elif field == 'extensions':
1750
- # Validate that extensions keys are properly namespaced
1751
- for key in data[field].keys():
1752
- if '/' not in key and '_' not in key:
1753
- validation_messages.append(f"Extension key '{key}' in 'extensions' should be namespaced to prevent conflicts.")
1754
-
1755
- # If 'alternate_greetings' is present, check that it's a list of non-empty strings
1756
- if 'alternate_greetings' in data and isinstance(data['alternate_greetings'], list):
1757
- for idx, greeting in enumerate(data['alternate_greetings']):
1758
- if not isinstance(greeting, str) or not greeting.strip():
1759
- validation_messages.append(f"Element {idx} in 'alternate_greetings' must be a non-empty string.")
1760
-
1761
- # If 'tags' is present, check that it's a list of non-empty strings
1762
- if 'tags' in data and isinstance(data['tags'], list):
1763
- for idx, tag in enumerate(data['tags']):
1764
- if not isinstance(tag, str) or not tag.strip():
1765
- validation_messages.append(f"Element {idx} in 'tags' must be a non-empty string.")
1766
-
1767
- # Validate 'extensions' field
1768
- if 'extensions' in data and not isinstance(data['extensions'], dict):
1769
- validation_messages.append("Field 'extensions' must be a dictionary.")
1770
-
1771
- # Validate 'character_book' if present
1772
- if 'character_book' in data:
1773
- is_valid_book, book_messages = validate_character_book(data['character_book'])
1774
- if not is_valid_book:
1775
- validation_messages.extend(book_messages)
1776
-
1777
- is_valid = len(validation_messages) == 0
1778
- return is_valid, validation_messages
1779
-
1780
- # Button Callback
1781
- validate_button.click(
1782
- fn=validate_character_card,
1783
- inputs=[file_upload],
1784
- outputs=[validation_output]
1785
- )
1786
-
1787
-
1788
- def create_export_characters_tab():
1789
- with gr.TabItem("Export Characters", visible=True):
1790
- gr.Markdown("# Export Characters")
1791
- gr.Markdown("Export character cards individually as JSON files or all together as a ZIP file.")
1792
-
1793
- with gr.Row():
1794
- with gr.Column(scale=1):
1795
- # Dropdown to select a character for individual export
1796
- characters = get_character_cards()
1797
- character_choices = [f"{char['name']} (ID: {char['id']})" for char in characters]
1798
- export_character_dropdown = gr.Dropdown(
1799
- label="Select Character to Export",
1800
- choices=character_choices
1801
- )
1802
- load_characters_button = gr.Button("Load Existing Characters")
1803
- export_single_button = gr.Button("Export Selected Character")
1804
- export_all_button = gr.Button("Export All Characters")
1805
-
1806
- with gr.Column(scale=1):
1807
- # Output components
1808
- export_output = gr.File(label="Exported Character(s)", interactive=False)
1809
- export_status = gr.Markdown("")
1810
-
1811
- # FIXME
1812
- def export_single_character_wrapper(character_selection):
1813
- file_path, status_message = export_single_character(character_selection)
1814
- if file_path:
1815
- return gr.File.update(value=file_path), status_message
1816
- else:
1817
- return gr.File.update(value=None), status_message
1818
-
1819
- def export_all_characters_wrapper():
1820
- zip_path = export_all_characters_as_zip()
1821
- characters = get_character_cards()
1822
- exported_characters = [char['name'] for char in characters]
1823
- status_message = f"Exported {len(exported_characters)} characters successfully:\n" + "\n".join(exported_characters)
1824
- return gr.File.update(value=zip_path), status_message
1825
-
1826
- # Event listeners
1827
- load_characters_button.click(
1828
- fn=lambda: gr.update(choices=[f"{char['name']} (ID: {char['id']})" for char in get_character_cards()]),
1829
- outputs=export_character_dropdown
1830
- )
1831
-
1832
- export_single_button.click(
1833
- fn=export_single_character_wrapper,
1834
- inputs=[export_character_dropdown],
1835
- outputs=[export_output, export_status]
1836
- )
1837
-
1838
- export_all_button.click(
1839
- fn=export_all_characters_wrapper,
1840
- inputs=[],
1841
- outputs=[export_output, export_status]
1842
- )
1843
-
1844
- return export_character_dropdown, load_characters_button, export_single_button, export_all_button, export_output, export_status
1845
-
1846
- #
1847
- # End of Character_Chat_tab.py
1848
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Character_Interaction_tab.py DELETED
@@ -1,837 +0,0 @@
1
- # Character_Interaction_tab.py
2
- # Description: This file contains the functions that are used for Character Interactions in the Gradio UI.
3
- #
4
- # Imports
5
- import base64
6
- import io
7
- import uuid
8
- from datetime import datetime as datetime
9
- import logging
10
- import json
11
- import os
12
- from typing import List, Dict, Tuple, Union
13
-
14
- #
15
- # External Imports
16
- import gradio as gr
17
- from PIL import Image
18
- #
19
- # Local Imports
20
- from App_Function_Libraries.Chat import chat, load_characters, save_chat_history_to_db_wrapper
21
- from App_Function_Libraries.Gradio_UI.Chat_ui import chat_wrapper
22
- from App_Function_Libraries.Gradio_UI.Writing_tab import generate_writing_feedback
23
- #
24
- ########################################################################################################################
25
- #
26
- # Single-Character chat Functions:
27
-
28
-
29
- def chat_with_character(user_message, history, char_data, api_name_input, api_key):
30
- if char_data is None:
31
- return history, "Please import a character card first."
32
-
33
- bot_message = generate_writing_feedback(user_message, char_data['name'], "Overall", api_name_input,
34
- api_key)
35
- history.append((user_message, bot_message))
36
- return history, ""
37
-
38
-
39
- def import_character_card(file):
40
- if file is None:
41
- logging.warning("No file provided for character card import")
42
- return None
43
- try:
44
- if file.name.lower().endswith(('.png', '.webp')):
45
- logging.info(f"Attempting to import character card from image: {file.name}")
46
- json_data = extract_json_from_image(file)
47
- if json_data:
48
- logging.info("JSON data extracted from image, attempting to parse")
49
- card_data = import_character_card_json(json_data)
50
- if card_data:
51
- # Save the image data
52
- with Image.open(file) as img:
53
- img_byte_arr = io.BytesIO()
54
- img.save(img_byte_arr, format='PNG')
55
- card_data['image'] = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
56
- return card_data
57
- else:
58
- logging.warning("No JSON data found in the image")
59
- else:
60
- logging.info(f"Attempting to import character card from JSON file: {file.name}")
61
- content = file.read().decode('utf-8')
62
- return import_character_card_json(content)
63
- except Exception as e:
64
- logging.error(f"Error importing character card: {e}")
65
- return None
66
-
67
-
68
- def import_character_card_json(json_content):
69
- try:
70
- # Remove any leading/trailing whitespace
71
- json_content = json_content.strip()
72
-
73
- # Log the first 100 characters of the content
74
- logging.debug(f"JSON content (first 100 chars): {json_content[:100]}...")
75
-
76
- card_data = json.loads(json_content)
77
- logging.debug(f"Parsed JSON data keys: {list(card_data.keys())}")
78
- if 'spec' in card_data and card_data['spec'] == 'chara_card_v2':
79
- logging.info("Detected V2 character card")
80
- return card_data['data']
81
- else:
82
- logging.info("Assuming V1 character card")
83
- return card_data
84
- except json.JSONDecodeError as e:
85
- logging.error(f"JSON decode error: {e}")
86
- logging.error(f"Problematic JSON content: {json_content[:500]}...")
87
- except Exception as e:
88
- logging.error(f"Unexpected error parsing JSON: {e}")
89
- return None
90
-
91
-
92
- def extract_json_from_image(image_file):
93
- logging.debug(f"Attempting to extract JSON from image: {image_file.name}")
94
- try:
95
- with Image.open(image_file) as img:
96
- logging.debug("Image opened successfully")
97
- metadata = img.info
98
- if 'chara' in metadata:
99
- logging.debug("Found 'chara' in image metadata")
100
- chara_content = metadata['chara']
101
- logging.debug(f"Content of 'chara' metadata (first 100 chars): {chara_content[:100]}...")
102
- try:
103
- decoded_content = base64.b64decode(chara_content).decode('utf-8')
104
- logging.debug(f"Decoded content (first 100 chars): {decoded_content[:100]}...")
105
- return decoded_content
106
- except Exception as e:
107
- logging.error(f"Error decoding base64 content: {e}")
108
-
109
- logging.debug("'chara' not found in metadata, checking for base64 encoded data")
110
- raw_data = img.tobytes()
111
- possible_json = raw_data.split(b'{', 1)[-1].rsplit(b'}', 1)[0]
112
- if possible_json:
113
- try:
114
- decoded = base64.b64decode(possible_json).decode('utf-8')
115
- if decoded.startswith('{') and decoded.endswith('}'):
116
- logging.debug("Found and decoded base64 JSON data")
117
- return '{' + decoded + '}'
118
- except Exception as e:
119
- logging.error(f"Error decoding base64 data: {e}")
120
-
121
- logging.warning("No JSON data found in the image")
122
- except Exception as e:
123
- logging.error(f"Error extracting JSON from image: {e}")
124
- return None
125
-
126
-
127
- def load_chat_history(file):
128
- try:
129
- content = file.read().decode('utf-8')
130
- chat_data = json.loads(content)
131
- return chat_data['history'], chat_data['character']
132
- except Exception as e:
133
- logging.error(f"Error loading chat history: {e}")
134
- return None, None
135
-
136
-
137
- def create_character_card_interaction_tab():
138
- with gr.TabItem("Chat with a Character Card"):
139
- gr.Markdown("# Chat with a Character Card")
140
- with gr.Row():
141
- with gr.Column(scale=1):
142
- character_image = gr.Image(label="Character Image", type="filepath")
143
- character_card_upload = gr.File(label="Upload Character Card")
144
- import_card_button = gr.Button("Import Character Card")
145
- load_characters_button = gr.Button("Load Existing Characters")
146
- from App_Function_Libraries.Chat import get_character_names
147
- character_dropdown = gr.Dropdown(label="Select Character", choices=get_character_names())
148
- user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here")
149
- api_name_input = gr.Dropdown(
150
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
151
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
152
- "Custom-OpenAI-API"],
153
- value="HuggingFace",
154
- # FIXME - make it so the user cant' click `Send Message` without first setting an API + Chatbot
155
- label="API for Interaction(Mandatory)"
156
- )
157
- api_key_input = gr.Textbox(label="API Key (if not set in Config_Files/config.txt)",
158
- placeholder="Enter your API key here", type="password")
159
- temperature_slider = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature")
160
- import_chat_button = gr.Button("Import Chat History")
161
- chat_file_upload = gr.File(label="Upload Chat History JSON", visible=False)
162
-
163
- with gr.Column(scale=2):
164
- chat_history = gr.Chatbot(label="Conversation", height=800)
165
- user_input = gr.Textbox(label="Your message")
166
- send_message_button = gr.Button("Send Message")
167
- regenerate_button = gr.Button("Regenerate Last Message")
168
- clear_chat_button = gr.Button("Clear Chat")
169
- chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True)
170
- save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
171
- save_status = gr.Textbox(label="Save Status", interactive=False)
172
-
173
- character_data = gr.State(None)
174
- user_name = gr.State("")
175
-
176
- def import_chat_history(file, current_history, char_data):
177
- loaded_history, char_name = load_chat_history(file)
178
- if loaded_history is None:
179
- return current_history, char_data, "Failed to load chat history."
180
-
181
- # Check if the loaded chat is for the current character
182
- if char_data and char_data.get('name') != char_name:
183
- return current_history, char_data, f"Warning: Loaded chat is for character '{char_name}', but current character is '{char_data.get('name')}'. Chat not imported."
184
-
185
- # If no character is selected, try to load the character from the chat
186
- if not char_data:
187
- new_char_data = load_character(char_name)[0]
188
- if new_char_data:
189
- char_data = new_char_data
190
- else:
191
- return current_history, char_data, f"Warning: Character '{char_name}' not found. Please select the character manually."
192
-
193
- return loaded_history, char_data, f"Chat history for '{char_name}' imported successfully."
194
-
195
- def import_character(file):
196
- card_data = import_character_card(file)
197
- if card_data:
198
- from App_Function_Libraries.Chat import save_character
199
- save_character(card_data)
200
- return card_data, gr.update(choices=get_character_names())
201
- else:
202
- return None, gr.update()
203
-
204
- def load_character(name):
205
- from App_Function_Libraries.Chat import load_characters
206
- characters = load_characters()
207
- char_data = characters.get(name)
208
- if char_data:
209
- first_message = char_data.get('first_mes', "Hello! I'm ready to chat.")
210
- return char_data, [(None, first_message)] if first_message else [], None
211
- return None, [], None
212
-
213
- def load_character_image(name):
214
- from App_Function_Libraries.Chat import load_characters
215
- characters = load_characters()
216
- char_data = characters.get(name)
217
- if char_data and 'image_path' in char_data:
218
- image_path = char_data['image_path']
219
- if os.path.exists(image_path):
220
- return image_path
221
- else:
222
- logging.warning(f"Image file not found: {image_path}")
223
- return None
224
-
225
- def load_character_and_image(name):
226
- char_data, chat_history, _ = load_character(name)
227
- image_path = load_character_image(name)
228
- logging.debug(f"Character: {name}")
229
- logging.debug(f"Character data: {char_data}")
230
- logging.debug(f"Image path: {image_path}")
231
- return char_data, chat_history, image_path
232
-
233
- def character_chat_wrapper(message, history, char_data, api_endpoint, api_key, temperature, user_name):
234
- logging.debug("Entered character_chat_wrapper")
235
- if char_data is None:
236
- return "Please select a character first.", history
237
-
238
- if not user_name:
239
- user_name = "User"
240
-
241
- char_name = char_data.get('name', 'AI Assistant')
242
-
243
- # Prepare the character's background information
244
- char_background = f"""
245
- Name: {char_name}
246
- Description: {char_data.get('description', 'N/A')}
247
- Personality: {char_data.get('personality', 'N/A')}
248
- Scenario: {char_data.get('scenario', 'N/A')}
249
- """
250
-
251
- # Prepare the system prompt for character impersonation
252
- system_message = f"""You are roleplaying as {char_name}, the character described below. Respond to the user's messages in character, maintaining the personality and background provided. Do not break character or refer to yourself as an AI. Always refer to yourself as "{char_name}" and refer to the user as "{user_name}".
253
-
254
- {char_background}
255
-
256
- Additional instructions: {char_data.get('post_history_instructions', '')}
257
- """
258
-
259
- # Prepare media_content and selected_parts
260
- media_content = {
261
- 'id': char_name,
262
- 'title': char_name,
263
- 'content': char_background,
264
- 'description': char_data.get('description', ''),
265
- 'personality': char_data.get('personality', ''),
266
- 'scenario': char_data.get('scenario', '')
267
- }
268
- selected_parts = ['description', 'personality', 'scenario']
269
-
270
- prompt = char_data.get('post_history_instructions', '')
271
-
272
- # Prepare the input for the chat function
273
- if not history:
274
- full_message = f"{prompt}\n\n{user_name}: {message}" if prompt else f"{user_name}: {message}"
275
- else:
276
- full_message = f"{user_name}: {message}"
277
-
278
- # Call the chat function
279
- bot_message = chat(
280
- full_message,
281
- history,
282
- media_content,
283
- selected_parts,
284
- api_endpoint,
285
- api_key,
286
- prompt,
287
- temperature,
288
- system_message
289
- )
290
-
291
- # Update history
292
- history.append((message, bot_message))
293
- return history
294
-
295
- def save_chat_history(history, character_name):
296
- # Create the Saved_Chats folder if it doesn't exist
297
- save_directory = "Saved_Chats"
298
- os.makedirs(save_directory, exist_ok=True)
299
-
300
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
301
- filename = f"chat_history_{character_name}_{timestamp}.json"
302
- filepath = os.path.join(save_directory, filename)
303
-
304
- chat_data = {
305
- "character": character_name,
306
- "timestamp": timestamp,
307
- "history": history
308
- }
309
-
310
- try:
311
- with open(filepath, 'w', encoding='utf-8') as f:
312
- json.dump(chat_data, f, ensure_ascii=False, indent=2)
313
- return filepath
314
- except Exception as e:
315
- return f"Error saving chat: {str(e)}"
316
-
317
- def save_current_chat(history, char_data):
318
- if not char_data or not history:
319
- return "No chat to save or character not selected."
320
-
321
- character_name = char_data.get('name', 'Unknown')
322
- result = save_chat_history(history, character_name)
323
- if result.startswith("Error"):
324
- return result
325
- return f"Chat saved successfully as {result}"
326
-
327
- def regenerate_last_message(history, char_data, api_name, api_key, temperature, user_name):
328
- if not history:
329
- return history
330
-
331
- last_user_message = history[-1][0]
332
- new_history = history[:-1]
333
-
334
- return character_chat_wrapper(last_user_message, new_history, char_data, api_name, api_key, temperature,
335
- user_name)
336
-
337
- import_chat_button.click(
338
- fn=lambda: gr.update(visible=True),
339
- outputs=chat_file_upload
340
- )
341
-
342
- chat_file_upload.change(
343
- fn=import_chat_history,
344
- inputs=[chat_file_upload, chat_history, character_data],
345
- outputs=[chat_history, character_data, save_status]
346
- )
347
-
348
- def update_character_info(name):
349
- from App_Function_Libraries.Chat import load_characters
350
- characters = load_characters()
351
- char_data = characters.get(name)
352
-
353
- image_path = char_data.get('image_path') if char_data else None
354
-
355
- logging.debug(f"Character: {name}")
356
- logging.debug(f"Character data: {char_data}")
357
- logging.debug(f"Image path: {image_path}")
358
-
359
- if image_path:
360
- if os.path.exists(image_path):
361
- logging.debug(f"Image file exists at {image_path}")
362
- if os.access(image_path, os.R_OK):
363
- logging.debug(f"Image file is readable")
364
- else:
365
- logging.warning(f"Image file is not readable: {image_path}")
366
- image_path = None
367
- else:
368
- logging.warning(f"Image file does not exist: {image_path}")
369
- image_path = None
370
- else:
371
- logging.warning("No image path provided for the character")
372
-
373
- return char_data, None, image_path # Return None for chat_history
374
-
375
- def on_character_select(name):
376
- logging.debug(f"Character selected: {name}")
377
- return update_character_info_with_error_handling(name)
378
-
379
- def clear_chat_history():
380
- return [], None # Return empty list for chat_history and None for character_data
381
-
382
- def update_character_info_with_error_handling(name):
383
- logging.debug(f"Entering update_character_info_with_error_handling for character: {name}")
384
- try:
385
- char_data, _, image_path = update_character_info(name)
386
- logging.debug(f"Retrieved data: char_data={bool(char_data)}, image_path={image_path}")
387
-
388
- if char_data:
389
- first_message = char_data.get('first_mes', "Hello! I'm ready to chat.")
390
- chat_history = [(None, first_message)] if first_message else []
391
- else:
392
- chat_history = []
393
-
394
- logging.debug(f"Created chat_history with length: {len(chat_history)}")
395
-
396
- if image_path and os.path.exists(image_path):
397
- logging.debug(f"Image file exists at {image_path}")
398
- return char_data, chat_history, image_path
399
- else:
400
- logging.warning(f"Image not found or invalid path: {image_path}")
401
- return char_data, chat_history, None
402
- except Exception as e:
403
- logging.error(f"Error updating character info: {str(e)}", exc_info=True)
404
- return None, [], None
405
- finally:
406
- logging.debug("Exiting update_character_info_with_error_handling")
407
-
408
- # Define States for conversation_id and media_content, which are required for saving chat history
409
- conversation_id = gr.State(str(uuid.uuid4()))
410
- media_content = gr.State({})
411
-
412
- import_card_button.click(
413
- fn=import_character,
414
- inputs=[character_card_upload],
415
- outputs=[character_data, character_dropdown]
416
- )
417
-
418
- load_characters_button.click(
419
- fn=lambda: gr.update(choices=get_character_names()),
420
- outputs=character_dropdown
421
- )
422
-
423
- clear_chat_button.click(
424
- fn=clear_chat_history,
425
- inputs=[],
426
- outputs=[chat_history, character_data]
427
- )
428
-
429
- character_dropdown.change(
430
- fn=on_character_select,
431
- inputs=[character_dropdown],
432
- outputs=[character_data, chat_history, character_image]
433
- )
434
-
435
- send_message_button.click(
436
- fn=character_chat_wrapper,
437
- inputs=[user_input, chat_history, character_data, api_name_input, api_key_input, temperature_slider,
438
- user_name_input],
439
- outputs=[chat_history]
440
- ).then(lambda: "", outputs=user_input)
441
-
442
- regenerate_button.click(
443
- fn=regenerate_last_message,
444
- inputs=[chat_history, character_data, api_name_input, api_key_input, temperature_slider, user_name_input],
445
- outputs=[chat_history]
446
- )
447
-
448
- user_name_input.change(
449
- fn=lambda name: name,
450
- inputs=[user_name_input],
451
- outputs=[user_name]
452
- )
453
-
454
- # FIXME - Implement saving chat history to database; look at Chat_UI.py for reference
455
- save_chat_history_to_db.click(
456
- save_chat_history_to_db_wrapper,
457
- inputs=[chat_history, conversation_id, media_content, chat_media_name],
458
- outputs=[conversation_id, gr.Textbox(label="Save Status")]
459
- )
460
-
461
- return character_data, chat_history, user_input, user_name, character_image
462
-
463
-
464
- #
465
- # End of Character chat tab
466
- ######################################################################################################################
467
- #
468
- # Multi-Character Chat Interface
469
-
470
- def character_interaction_setup():
471
- characters = load_characters()
472
- return characters, [], None, None
473
-
474
-
475
- def extract_character_response(response: Union[str, Tuple]) -> str:
476
- if isinstance(response, tuple):
477
- # If it's a tuple, try to extract the first string element
478
- for item in response:
479
- if isinstance(item, str):
480
- return item.strip()
481
- # If no string found, return a default message
482
- return "I'm not sure how to respond."
483
- elif isinstance(response, str):
484
- # If it's already a string, just return it
485
- return response.strip()
486
- else:
487
- # For any other type, return a default message
488
- return "I'm having trouble forming a response."
489
-
490
- # def process_character_response(response: str) -> str:
491
- # # Remove any leading explanatory text before the first '---'
492
- # parts = response.split('---')
493
- # if len(parts) > 1:
494
- # return '---' + '---'.join(parts[1:])
495
- # return response.strip()
496
- def process_character_response(response: Union[str, Tuple]) -> str:
497
- if isinstance(response, tuple):
498
- response = ' '.join(str(item) for item in response if isinstance(item, str))
499
-
500
- if isinstance(response, str):
501
- # Remove any leading explanatory text before the first '---'
502
- parts = response.split('---')
503
- if len(parts) > 1:
504
- return '---' + '---'.join(parts[1:])
505
- return response.strip()
506
- else:
507
- return "I'm having trouble forming a response."
508
-
509
- def character_turn(characters: Dict, conversation: List[Tuple[str, str]],
510
- current_character: str, other_characters: List[str],
511
- api_endpoint: str, api_key: str, temperature: float,
512
- scenario: str = "") -> Tuple[List[Tuple[str, str]], str]:
513
- if not current_character or current_character not in characters:
514
- return conversation, current_character
515
-
516
- if not conversation and scenario:
517
- conversation.append(("Scenario", scenario))
518
-
519
- current_char = characters[current_character]
520
- other_chars = [characters[char] for char in other_characters if char in characters and char != current_character]
521
-
522
- prompt = f"{current_char['name']}'s personality: {current_char['personality']}\n"
523
- for char in other_chars:
524
- prompt += f"{char['name']}'s personality: {char['personality']}\n"
525
- prompt += "Conversation so far:\n" + "\n".join([f"{sender}: {message}" for sender, message in conversation])
526
- prompt += f"\n\nHow would {current_char['name']} respond?"
527
-
528
- try:
529
- response = chat_wrapper(prompt, conversation, {}, [], api_endpoint, api_key, "", None, False, temperature, "")
530
- processed_response = process_character_response(response)
531
- conversation.append((current_char['name'], processed_response))
532
- except Exception as e:
533
- error_message = f"Error generating response: {str(e)}"
534
- conversation.append((current_char['name'], error_message))
535
-
536
- return conversation, current_character
537
-
538
-
539
- def character_interaction(character1: str, character2: str, api_endpoint: str, api_key: str,
540
- num_turns: int, scenario: str, temperature: float,
541
- user_interjection: str = "") -> List[str]:
542
- characters = load_characters()
543
- char1 = characters[character1]
544
- char2 = characters[character2]
545
- conversation = []
546
- current_speaker = char1
547
- other_speaker = char2
548
-
549
- # Add scenario to the conversation start
550
- if scenario:
551
- conversation.append(f"Scenario: {scenario}")
552
-
553
- for turn in range(num_turns):
554
- # Construct the prompt for the current speaker
555
- prompt = f"{current_speaker['name']}'s personality: {current_speaker['personality']}\n"
556
- prompt += f"{other_speaker['name']}'s personality: {other_speaker['personality']}\n"
557
- prompt += f"Conversation so far:\n" + "\n".join(
558
- [msg if isinstance(msg, str) else f"{msg[0]}: {msg[1]}" for msg in conversation])
559
-
560
- # Add user interjection if provided
561
- if user_interjection and turn == num_turns // 2:
562
- prompt += f"\n\nUser interjection: {user_interjection}\n"
563
- conversation.append(f"User: {user_interjection}")
564
-
565
- prompt += f"\n\nHow would {current_speaker['name']} respond?"
566
-
567
- # FIXME - figure out why the double print is happening
568
- # Get response from the LLM
569
- response = chat_wrapper(prompt, conversation, {}, [], api_endpoint, api_key, "", None, False, temperature, "")
570
-
571
- # Add the response to the conversation
572
- conversation.append((current_speaker['name'], response))
573
-
574
- # Switch speakers
575
- current_speaker, other_speaker = other_speaker, current_speaker
576
-
577
- # Convert the conversation to a list of strings for output
578
- return [f"{msg[0]}: {msg[1]}" if isinstance(msg, tuple) else msg for msg in conversation]
579
-
580
-
581
- def create_multiple_character_chat_tab():
582
- with gr.TabItem("Multi-Character Chat"):
583
- characters, conversation, current_character, other_character = character_interaction_setup()
584
-
585
- with gr.Blocks() as character_interaction:
586
- gr.Markdown("# Multi-Character Chat")
587
-
588
- with gr.Row():
589
- num_characters = gr.Dropdown(label="Number of Characters", choices=["2", "3", "4"], value="2")
590
- character_selectors = [gr.Dropdown(label=f"Character {i + 1}", choices=list(characters.keys())) for i in
591
- range(4)]
592
-
593
- api_endpoint = gr.Dropdown(label="API Endpoint",
594
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
595
- "Mistral",
596
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM",
597
- "ollama", "HuggingFace",
598
- "Custom-OpenAI-API"],
599
- value="HuggingFace")
600
- api_key = gr.Textbox(label="API Key (if required)", type="password")
601
- temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
602
- scenario = gr.Textbox(label="Scenario (optional)", lines=3)
603
-
604
- chat_display = gr.Chatbot(label="Character Interaction")
605
- current_index = gr.State(0)
606
-
607
- next_turn_btn = gr.Button("Next Turn")
608
- narrator_input = gr.Textbox(label="Narrator Input", placeholder="Add a narration or description...")
609
- add_narration_btn = gr.Button("Add Narration")
610
- error_box = gr.Textbox(label="Error Messages", visible=False)
611
- reset_btn = gr.Button("Reset Conversation")
612
- chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True)
613
- save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
614
-
615
- def update_character_selectors(num):
616
- return [gr.update(visible=True) if i < int(num) else gr.update(visible=False) for i in range(4)]
617
-
618
- num_characters.change(
619
- update_character_selectors,
620
- inputs=[num_characters],
621
- outputs=character_selectors
622
- )
623
-
624
- def reset_conversation():
625
- return [], 0, gr.update(value=""), gr.update(value="")
626
-
627
- def take_turn(conversation, current_index, char1, char2, char3, char4, api_endpoint, api_key, temperature,
628
- scenario):
629
- char_selectors = [char for char in [char1, char2, char3, char4] if char] # Remove None values
630
- num_chars = len(char_selectors)
631
-
632
- if num_chars == 0:
633
- return conversation, current_index # No characters selected, return without changes
634
-
635
- if not conversation:
636
- conversation = []
637
- if scenario:
638
- conversation.append(("Scenario", scenario))
639
-
640
- current_character = char_selectors[current_index % num_chars]
641
- next_index = (current_index + 1) % num_chars
642
-
643
- prompt = f"Character speaking: {current_character}\nOther characters: {', '.join(char for char in char_selectors if char != current_character)}\n"
644
- prompt += "Generate the next part of the conversation, including character dialogues and actions. Characters should speak in first person."
645
-
646
- response, new_conversation, _ = chat_wrapper(prompt, conversation, {}, [], api_endpoint, api_key, "",
647
- None, False, temperature, "")
648
-
649
- # Format the response
650
- formatted_lines = []
651
- for line in response.split('\n'):
652
- if ':' in line:
653
- speaker, text = line.split(':', 1)
654
- formatted_lines.append(f"**{speaker.strip()}**: {text.strip()}")
655
- else:
656
- formatted_lines.append(line)
657
-
658
- formatted_response = '\n'.join(formatted_lines)
659
-
660
- # Update the last message in the conversation with the formatted response
661
- if new_conversation:
662
- new_conversation[-1] = (new_conversation[-1][0], formatted_response)
663
- else:
664
- new_conversation.append((current_character, formatted_response))
665
-
666
- return new_conversation, next_index
667
-
668
- def add_narration(narration, conversation):
669
- if narration:
670
- conversation.append(("Narrator", narration))
671
- return conversation, ""
672
-
673
- def take_turn_with_error_handling(conversation, current_index, char1, char2, char3, char4, api_endpoint,
674
- api_key, temperature, scenario):
675
- try:
676
- new_conversation, next_index = take_turn(conversation, current_index, char1, char2, char3, char4,
677
- api_endpoint, api_key, temperature, scenario)
678
- return new_conversation, next_index, gr.update(visible=False, value="")
679
- except Exception as e:
680
- error_message = f"An error occurred: {str(e)}"
681
- return conversation, current_index, gr.update(visible=True, value=error_message)
682
-
683
- # Define States for conversation_id and media_content, which are required for saving chat history
684
- media_content = gr.State({})
685
- conversation_id = gr.State(str(uuid.uuid4()))
686
-
687
- next_turn_btn.click(
688
- take_turn_with_error_handling,
689
- inputs=[chat_display, current_index] + character_selectors + [api_endpoint, api_key, temperature,
690
- scenario],
691
- outputs=[chat_display, current_index, error_box]
692
- )
693
-
694
- add_narration_btn.click(
695
- add_narration,
696
- inputs=[narrator_input, chat_display],
697
- outputs=[chat_display, narrator_input]
698
- )
699
-
700
- reset_btn.click(
701
- reset_conversation,
702
- outputs=[chat_display, current_index, scenario, narrator_input]
703
- )
704
-
705
- # FIXME - Implement saving chat history to database; look at Chat_UI.py for reference
706
- save_chat_history_to_db.click(
707
- save_chat_history_to_db_wrapper,
708
- inputs=[chat_display, conversation_id, media_content, chat_media_name],
709
- outputs=[conversation_id, gr.Textbox(label="Save Status")]
710
- )
711
-
712
- return character_interaction
713
-
714
- #
715
- # End of Multi-Character chat tab
716
- ########################################################################################################################
717
- #
718
- # Narrator-Controlled Conversation Tab
719
-
720
- # From `Fuzzlewumper` on Reddit.
721
- def create_narrator_controlled_conversation_tab():
722
- with gr.TabItem("Narrator-Controlled Conversation"):
723
- gr.Markdown("# Narrator-Controlled Conversation")
724
-
725
- with gr.Row():
726
- with gr.Column(scale=1):
727
- api_endpoint = gr.Dropdown(
728
- label="API Endpoint",
729
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
730
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
731
- "Custom-OpenAI-API"],
732
- value="HuggingFace"
733
- )
734
- api_key = gr.Textbox(label="API Key (if required)", type="password")
735
- temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
736
-
737
- with gr.Column(scale=2):
738
- narrator_input = gr.Textbox(
739
- label="Narrator Input",
740
- placeholder="Set the scene or provide context...",
741
- lines=3
742
- )
743
-
744
- character_inputs = []
745
- for i in range(4): # Allow up to 4 characters
746
- with gr.Row():
747
- name = gr.Textbox(label=f"Character {i + 1} Name")
748
- description = gr.Textbox(label=f"Character {i + 1} Description", lines=3)
749
- character_inputs.append((name, description))
750
-
751
- conversation_display = gr.Chatbot(label="Conversation", height=400)
752
- user_input = gr.Textbox(label="Your Input (optional)", placeholder="Add your own dialogue or action...")
753
-
754
- with gr.Row():
755
- generate_btn = gr.Button("Generate Next Interaction")
756
- reset_btn = gr.Button("Reset Conversation")
757
- chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True)
758
- save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
759
-
760
- error_box = gr.Textbox(label="Error Messages", visible=False)
761
-
762
- # Define States for conversation_id and media_content, which are required for saving chat history
763
- conversation_id = gr.State(str(uuid.uuid4()))
764
- media_content = gr.State({})
765
-
766
- def generate_interaction(conversation, narrator_text, user_text, api_endpoint, api_key, temperature,
767
- *character_data):
768
- try:
769
- characters = [{"name": name.strip(), "description": desc.strip()}
770
- for name, desc in zip(character_data[::2], character_data[1::2])
771
- if name.strip() and desc.strip()]
772
-
773
- if not characters:
774
- raise ValueError("At least one character must be defined.")
775
-
776
- prompt = f"Narrator: {narrator_text}\n\n"
777
- for char in characters:
778
- prompt += f"Character '{char['name']}': {char['description']}\n"
779
- prompt += "\nGenerate the next part of the conversation, including character dialogues and actions. "
780
- prompt += "Characters should speak in first person. "
781
- if user_text:
782
- prompt += f"\nIncorporate this user input: {user_text}"
783
- prompt += "\nResponse:"
784
-
785
- response, conversation, _ = chat_wrapper(prompt, conversation, {}, [], api_endpoint, api_key, "", None,
786
- False, temperature, "")
787
-
788
- # Format the response
789
- formatted_lines = []
790
- for line in response.split('\n'):
791
- if ':' in line:
792
- speaker, text = line.split(':', 1)
793
- formatted_lines.append(f"**{speaker.strip()}**: {text.strip()}")
794
- else:
795
- formatted_lines.append(line)
796
-
797
- formatted_response = '\n'.join(formatted_lines)
798
-
799
- # Update the last message in the conversation with the formatted response
800
- if conversation:
801
- conversation[-1] = (conversation[-1][0], formatted_response)
802
- else:
803
- conversation.append((None, formatted_response))
804
-
805
- return conversation, gr.update(value=""), gr.update(value=""), gr.update(visible=False, value="")
806
- except Exception as e:
807
- error_message = f"An error occurred: {str(e)}"
808
- return conversation, gr.update(), gr.update(), gr.update(visible=True, value=error_message)
809
-
810
- def reset_conversation():
811
- return [], gr.update(value=""), gr.update(value=""), gr.update(visible=False, value="")
812
-
813
- generate_btn.click(
814
- generate_interaction,
815
- inputs=[conversation_display, narrator_input, user_input, api_endpoint, api_key, temperature] +
816
- [input for char_input in character_inputs for input in char_input],
817
- outputs=[conversation_display, narrator_input, user_input, error_box]
818
- )
819
-
820
- reset_btn.click(
821
- reset_conversation,
822
- outputs=[conversation_display, narrator_input, user_input, error_box]
823
- )
824
-
825
- # FIXME - Implement saving chat history to database; look at Chat_UI.py for reference
826
- save_chat_history_to_db.click(
827
- save_chat_history_to_db_wrapper,
828
- inputs=[conversation_display, conversation_id, media_content, chat_media_name],
829
- outputs=[conversation_id, gr.Textbox(label="Save Status")]
830
- )
831
-
832
-
833
- return api_endpoint, api_key, temperature, narrator_input, conversation_display, user_input, generate_btn, reset_btn, error_box
834
-
835
- #
836
- # End of Narrator-Controlled Conversation tab
837
- ########################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Character_interaction_tab.py DELETED
@@ -1,511 +0,0 @@
1
- # Character_Interaction_tab.py
2
- # Description: This file contains the functions that are used for Character Interactions in the Gradio UI.
3
- #
4
- # Imports
5
- import base64
6
- import io
7
- import uuid
8
- from datetime import datetime as datetime
9
- import logging
10
- import json
11
- import os
12
- from typing import List, Dict, Tuple, Union
13
-
14
- #
15
- # External Imports
16
- import gradio as gr
17
- from PIL import Image
18
- #
19
- # Local Imports
20
- from App_Function_Libraries.Chat import chat, load_characters, save_chat_history_to_db_wrapper
21
- from App_Function_Libraries.Gradio_UI.Chat_ui import chat_wrapper
22
- from App_Function_Libraries.Gradio_UI.Writing_tab import generate_writing_feedback
23
- #
24
- ########################################################################################################################
25
- #
26
- # Single-Character chat Functions:
27
- # FIXME - add these functions to the Personas library
28
-
29
- def chat_with_character(user_message, history, char_data, api_name_input, api_key):
30
- if char_data is None:
31
- return history, "Please import a character card first."
32
-
33
- bot_message = generate_writing_feedback(user_message, char_data['name'], "Overall", api_name_input,
34
- api_key)
35
- history.append((user_message, bot_message))
36
- return history, ""
37
-
38
-
39
- def import_character_card(file):
40
- if file is None:
41
- logging.warning("No file provided for character card import")
42
- return None
43
- try:
44
- if file.name.lower().endswith(('.png', '.webp')):
45
- logging.info(f"Attempting to import character card from image: {file.name}")
46
- json_data = extract_json_from_image(file)
47
- if json_data:
48
- logging.info("JSON data extracted from image, attempting to parse")
49
- card_data = import_character_card_json(json_data)
50
- if card_data:
51
- # Save the image data
52
- with Image.open(file) as img:
53
- img_byte_arr = io.BytesIO()
54
- img.save(img_byte_arr, format='PNG')
55
- card_data['image'] = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
56
- return card_data
57
- else:
58
- logging.warning("No JSON data found in the image")
59
- else:
60
- logging.info(f"Attempting to import character card from JSON file: {file.name}")
61
- content = file.read().decode('utf-8')
62
- return import_character_card_json(content)
63
- except Exception as e:
64
- logging.error(f"Error importing character card: {e}")
65
- return None
66
-
67
-
68
- def import_character_card_json(json_content):
69
- try:
70
- # Remove any leading/trailing whitespace
71
- json_content = json_content.strip()
72
-
73
- # Log the first 100 characters of the content
74
- logging.debug(f"JSON content (first 100 chars): {json_content[:100]}...")
75
-
76
- card_data = json.loads(json_content)
77
- logging.debug(f"Parsed JSON data keys: {list(card_data.keys())}")
78
- if 'spec' in card_data and card_data['spec'] == 'chara_card_v2':
79
- logging.info("Detected V2 character card")
80
- return card_data['data']
81
- else:
82
- logging.info("Assuming V1 character card")
83
- return card_data
84
- except json.JSONDecodeError as e:
85
- logging.error(f"JSON decode error: {e}")
86
- logging.error(f"Problematic JSON content: {json_content[:500]}...")
87
- except Exception as e:
88
- logging.error(f"Unexpected error parsing JSON: {e}")
89
- return None
90
-
91
-
92
- def extract_json_from_image(image_file):
93
- logging.debug(f"Attempting to extract JSON from image: {image_file.name}")
94
- try:
95
- with Image.open(image_file) as img:
96
- logging.debug("Image opened successfully")
97
- metadata = img.info
98
- if 'chara' in metadata:
99
- logging.debug("Found 'chara' in image metadata")
100
- chara_content = metadata['chara']
101
- logging.debug(f"Content of 'chara' metadata (first 100 chars): {chara_content[:100]}...")
102
- try:
103
- decoded_content = base64.b64decode(chara_content).decode('utf-8')
104
- logging.debug(f"Decoded content (first 100 chars): {decoded_content[:100]}...")
105
- return decoded_content
106
- except Exception as e:
107
- logging.error(f"Error decoding base64 content: {e}")
108
-
109
- logging.debug("'chara' not found in metadata, checking for base64 encoded data")
110
- raw_data = img.tobytes()
111
- possible_json = raw_data.split(b'{', 1)[-1].rsplit(b'}', 1)[0]
112
- if possible_json:
113
- try:
114
- decoded = base64.b64decode(possible_json).decode('utf-8')
115
- if decoded.startswith('{') and decoded.endswith('}'):
116
- logging.debug("Found and decoded base64 JSON data")
117
- return '{' + decoded + '}'
118
- except Exception as e:
119
- logging.error(f"Error decoding base64 data: {e}")
120
-
121
- logging.warning("No JSON data found in the image")
122
- except Exception as e:
123
- logging.error(f"Error extracting JSON from image: {e}")
124
- return None
125
-
126
-
127
- def load_chat_history(file):
128
- try:
129
- content = file.read().decode('utf-8')
130
- chat_data = json.loads(content)
131
- return chat_data['history'], chat_data['character']
132
- except Exception as e:
133
- logging.error(f"Error loading chat history: {e}")
134
- return None, None
135
-
136
-
137
- #
138
- # End of X
139
- ######################################################################################################################
140
- #
141
- # Multi-Character Chat Interface
142
-
143
- # FIXME - refactor and move these functions to the Character_Chat library so that it uses the same functions
144
- def character_interaction_setup():
145
- characters = load_characters()
146
- return characters, [], None, None
147
-
148
-
149
- def extract_character_response(response: Union[str, Tuple]) -> str:
150
- if isinstance(response, tuple):
151
- # If it's a tuple, try to extract the first string element
152
- for item in response:
153
- if isinstance(item, str):
154
- return item.strip()
155
- # If no string found, return a default message
156
- return "I'm not sure how to respond."
157
- elif isinstance(response, str):
158
- # If it's already a string, just return it
159
- return response.strip()
160
- else:
161
- # For any other type, return a default message
162
- return "I'm having trouble forming a response."
163
-
164
- # def process_character_response(response: str) -> str:
165
- # # Remove any leading explanatory text before the first '---'
166
- # parts = response.split('---')
167
- # if len(parts) > 1:
168
- # return '---' + '---'.join(parts[1:])
169
- # return response.strip()
170
- def process_character_response(response: Union[str, Tuple]) -> str:
171
- if isinstance(response, tuple):
172
- response = ' '.join(str(item) for item in response if isinstance(item, str))
173
-
174
- if isinstance(response, str):
175
- # Remove any leading explanatory text before the first '---'
176
- parts = response.split('---')
177
- if len(parts) > 1:
178
- return '---' + '---'.join(parts[1:])
179
- return response.strip()
180
- else:
181
- return "I'm having trouble forming a response."
182
-
183
- def character_turn(characters: Dict, conversation: List[Tuple[str, str]],
184
- current_character: str, other_characters: List[str],
185
- api_endpoint: str, api_key: str, temperature: float,
186
- scenario: str = "") -> Tuple[List[Tuple[str, str]], str]:
187
- if not current_character or current_character not in characters:
188
- return conversation, current_character
189
-
190
- if not conversation and scenario:
191
- conversation.append(("Scenario", scenario))
192
-
193
- current_char = characters[current_character]
194
- other_chars = [characters[char] for char in other_characters if char in characters and char != current_character]
195
-
196
- prompt = f"{current_char['name']}'s personality: {current_char['personality']}\n"
197
- for char in other_chars:
198
- prompt += f"{char['name']}'s personality: {char['personality']}\n"
199
- prompt += "Conversation so far:\n" + "\n".join([f"{sender}: {message}" for sender, message in conversation])
200
- prompt += f"\n\nHow would {current_char['name']} respond?"
201
-
202
- try:
203
- response = chat_wrapper(prompt, conversation, {}, [], api_endpoint, api_key, "", None, False, temperature, "")
204
- processed_response = process_character_response(response)
205
- conversation.append((current_char['name'], processed_response))
206
- except Exception as e:
207
- error_message = f"Error generating response: {str(e)}"
208
- conversation.append((current_char['name'], error_message))
209
-
210
- return conversation, current_character
211
-
212
-
213
- def character_interaction(character1: str, character2: str, api_endpoint: str, api_key: str,
214
- num_turns: int, scenario: str, temperature: float,
215
- user_interjection: str = "") -> List[str]:
216
- characters = load_characters()
217
- char1 = characters[character1]
218
- char2 = characters[character2]
219
- conversation = []
220
- current_speaker = char1
221
- other_speaker = char2
222
-
223
- # Add scenario to the conversation start
224
- if scenario:
225
- conversation.append(f"Scenario: {scenario}")
226
-
227
- for turn in range(num_turns):
228
- # Construct the prompt for the current speaker
229
- prompt = f"{current_speaker['name']}'s personality: {current_speaker['personality']}\n"
230
- prompt += f"{other_speaker['name']}'s personality: {other_speaker['personality']}\n"
231
- prompt += f"Conversation so far:\n" + "\n".join(
232
- [msg if isinstance(msg, str) else f"{msg[0]}: {msg[1]}" for msg in conversation])
233
-
234
- # Add user interjection if provided
235
- if user_interjection and turn == num_turns // 2:
236
- prompt += f"\n\nUser interjection: {user_interjection}\n"
237
- conversation.append(f"User: {user_interjection}")
238
-
239
- prompt += f"\n\nHow would {current_speaker['name']} respond?"
240
-
241
- # FIXME - figure out why the double print is happening
242
- # Get response from the LLM
243
- response = chat_wrapper(prompt, conversation, {}, [], api_endpoint, api_key, "", None, False, temperature, "")
244
-
245
- # Add the response to the conversation
246
- conversation.append((current_speaker['name'], response))
247
-
248
- # Switch speakers
249
- current_speaker, other_speaker = other_speaker, current_speaker
250
-
251
- # Convert the conversation to a list of strings for output
252
- return [f"{msg[0]}: {msg[1]}" if isinstance(msg, tuple) else msg for msg in conversation]
253
-
254
-
255
- def create_multiple_character_chat_tab():
256
- with gr.TabItem("Multi-Character Chat", visible=True):
257
- characters, conversation, current_character, other_character = character_interaction_setup()
258
-
259
- with gr.Blocks() as character_interaction:
260
- gr.Markdown("# Multi-Character Chat")
261
-
262
- with gr.Row():
263
- num_characters = gr.Dropdown(label="Number of Characters", choices=["2", "3", "4"], value="2")
264
- character_selectors = [gr.Dropdown(label=f"Character {i + 1}", choices=list(characters.keys())) for i in
265
- range(4)]
266
-
267
- api_endpoint = gr.Dropdown(label="API Endpoint",
268
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
269
- "Mistral",
270
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM",
271
- "ollama", "HuggingFace",
272
- "Custom-OpenAI-API"],
273
- value="HuggingFace")
274
- api_key = gr.Textbox(label="API Key (if required)", type="password")
275
- temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
276
- scenario = gr.Textbox(label="Scenario (optional)", lines=3)
277
-
278
- chat_display = gr.Chatbot(label="Character Interaction")
279
- current_index = gr.State(0)
280
-
281
- next_turn_btn = gr.Button("Next Turn")
282
- narrator_input = gr.Textbox(label="Narrator Input", placeholder="Add a narration or description...")
283
- add_narration_btn = gr.Button("Add Narration")
284
- error_box = gr.Textbox(label="Error Messages", visible=False)
285
- reset_btn = gr.Button("Reset Conversation")
286
- chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True)
287
- save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
288
-
289
- def update_character_selectors(num):
290
- return [gr.update(visible=True) if i < int(num) else gr.update(visible=False) for i in range(4)]
291
-
292
- num_characters.change(
293
- update_character_selectors,
294
- inputs=[num_characters],
295
- outputs=character_selectors
296
- )
297
-
298
- def reset_conversation():
299
- return [], 0, gr.update(value=""), gr.update(value="")
300
-
301
- def take_turn(conversation, current_index, char1, char2, char3, char4, api_endpoint, api_key, temperature,
302
- scenario):
303
- char_selectors = [char for char in [char1, char2, char3, char4] if char] # Remove None values
304
- num_chars = len(char_selectors)
305
-
306
- if num_chars == 0:
307
- return conversation, current_index # No characters selected, return without changes
308
-
309
- if not conversation:
310
- conversation = []
311
- if scenario:
312
- conversation.append(("Scenario", scenario))
313
-
314
- current_character = char_selectors[current_index % num_chars]
315
- next_index = (current_index + 1) % num_chars
316
-
317
- prompt = f"Character speaking: {current_character}\nOther characters: {', '.join(char for char in char_selectors if char != current_character)}\n"
318
- prompt += "Generate the next part of the conversation, including character dialogues and actions. Characters should speak in first person."
319
-
320
- response, new_conversation, _ = chat_wrapper(prompt, conversation, {}, [], api_endpoint, api_key, "",
321
- None, False, temperature, "")
322
-
323
- # Format the response
324
- formatted_lines = []
325
- for line in response.split('\n'):
326
- if ':' in line:
327
- speaker, text = line.split(':', 1)
328
- formatted_lines.append(f"**{speaker.strip()}**: {text.strip()}")
329
- else:
330
- formatted_lines.append(line)
331
-
332
- formatted_response = '\n'.join(formatted_lines)
333
-
334
- # Update the last message in the conversation with the formatted response
335
- if new_conversation:
336
- new_conversation[-1] = (new_conversation[-1][0], formatted_response)
337
- else:
338
- new_conversation.append((current_character, formatted_response))
339
-
340
- return new_conversation, next_index
341
-
342
- def add_narration(narration, conversation):
343
- if narration:
344
- conversation.append(("Narrator", narration))
345
- return conversation, ""
346
-
347
- def take_turn_with_error_handling(conversation, current_index, char1, char2, char3, char4, api_endpoint,
348
- api_key, temperature, scenario):
349
- try:
350
- new_conversation, next_index = take_turn(conversation, current_index, char1, char2, char3, char4,
351
- api_endpoint, api_key, temperature, scenario)
352
- return new_conversation, next_index, gr.update(visible=False, value="")
353
- except Exception as e:
354
- error_message = f"An error occurred: {str(e)}"
355
- return conversation, current_index, gr.update(visible=True, value=error_message)
356
-
357
- # Define States for conversation_id and media_content, which are required for saving chat history
358
- media_content = gr.State({})
359
- conversation_id = gr.State(str(uuid.uuid4()))
360
-
361
- next_turn_btn.click(
362
- take_turn_with_error_handling,
363
- inputs=[chat_display, current_index] + character_selectors + [api_endpoint, api_key, temperature,
364
- scenario],
365
- outputs=[chat_display, current_index, error_box]
366
- )
367
-
368
- add_narration_btn.click(
369
- add_narration,
370
- inputs=[narrator_input, chat_display],
371
- outputs=[chat_display, narrator_input]
372
- )
373
-
374
- reset_btn.click(
375
- reset_conversation,
376
- outputs=[chat_display, current_index, scenario, narrator_input]
377
- )
378
-
379
- # FIXME - Implement saving chat history to database; look at Chat_UI.py for reference
380
- save_chat_history_to_db.click(
381
- save_chat_history_to_db_wrapper,
382
- inputs=[chat_display, conversation_id, media_content, chat_media_name],
383
- outputs=[conversation_id, gr.Textbox(label="Save Status")]
384
- )
385
-
386
- return character_interaction
387
-
388
- #
389
- # End of Multi-Character chat tab
390
- ########################################################################################################################
391
- #
392
- # Narrator-Controlled Conversation Tab
393
-
394
- # From `Fuzzlewumper` on Reddit.
395
- def create_narrator_controlled_conversation_tab():
396
- with gr.TabItem("Narrator-Controlled Conversation", visible=True):
397
- gr.Markdown("# Narrator-Controlled Conversation")
398
-
399
- with gr.Row():
400
- with gr.Column(scale=1):
401
- api_endpoint = gr.Dropdown(
402
- label="API Endpoint",
403
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
404
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
405
- "Custom-OpenAI-API"],
406
- value="HuggingFace"
407
- )
408
- api_key = gr.Textbox(label="API Key (if required)", type="password")
409
- temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
410
-
411
- with gr.Column(scale=2):
412
- narrator_input = gr.Textbox(
413
- label="Narrator Input",
414
- placeholder="Set the scene or provide context...",
415
- lines=3
416
- )
417
-
418
- character_inputs = []
419
- for i in range(4): # Allow up to 4 characters
420
- with gr.Row():
421
- name = gr.Textbox(label=f"Character {i + 1} Name")
422
- description = gr.Textbox(label=f"Character {i + 1} Description", lines=3)
423
- character_inputs.append((name, description))
424
-
425
- conversation_display = gr.Chatbot(label="Conversation", height=400)
426
- user_input = gr.Textbox(label="Your Input (optional)", placeholder="Add your own dialogue or action...")
427
-
428
- with gr.Row():
429
- generate_btn = gr.Button("Generate Next Interaction")
430
- reset_btn = gr.Button("Reset Conversation")
431
- chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True)
432
- save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
433
-
434
- error_box = gr.Textbox(label="Error Messages", visible=False)
435
-
436
- # Define States for conversation_id and media_content, which are required for saving chat history
437
- conversation_id = gr.State(str(uuid.uuid4()))
438
- media_content = gr.State({})
439
-
440
- def generate_interaction(conversation, narrator_text, user_text, api_endpoint, api_key, temperature,
441
- *character_data):
442
- try:
443
- characters = [{"name": name.strip(), "description": desc.strip()}
444
- for name, desc in zip(character_data[::2], character_data[1::2])
445
- if name.strip() and desc.strip()]
446
-
447
- if not characters:
448
- raise ValueError("At least one character must be defined.")
449
-
450
- prompt = f"Narrator: {narrator_text}\n\n"
451
- for char in characters:
452
- prompt += f"Character '{char['name']}': {char['description']}\n"
453
- prompt += "\nGenerate the next part of the conversation, including character dialogues and actions. "
454
- prompt += "Characters should speak in first person. "
455
- if user_text:
456
- prompt += f"\nIncorporate this user input: {user_text}"
457
- prompt += "\nResponse:"
458
-
459
- response, conversation, _ = chat_wrapper(prompt, conversation, {}, [], api_endpoint, api_key, "", None,
460
- False, temperature, "")
461
-
462
- # Format the response
463
- formatted_lines = []
464
- for line in response.split('\n'):
465
- if ':' in line:
466
- speaker, text = line.split(':', 1)
467
- formatted_lines.append(f"**{speaker.strip()}**: {text.strip()}")
468
- else:
469
- formatted_lines.append(line)
470
-
471
- formatted_response = '\n'.join(formatted_lines)
472
-
473
- # Update the last message in the conversation with the formatted response
474
- if conversation:
475
- conversation[-1] = (conversation[-1][0], formatted_response)
476
- else:
477
- conversation.append((None, formatted_response))
478
-
479
- return conversation, gr.update(value=""), gr.update(value=""), gr.update(visible=False, value="")
480
- except Exception as e:
481
- error_message = f"An error occurred: {str(e)}"
482
- return conversation, gr.update(), gr.update(), gr.update(visible=True, value=error_message)
483
-
484
- def reset_conversation():
485
- return [], gr.update(value=""), gr.update(value=""), gr.update(visible=False, value="")
486
-
487
- generate_btn.click(
488
- generate_interaction,
489
- inputs=[conversation_display, narrator_input, user_input, api_endpoint, api_key, temperature] +
490
- [input for char_input in character_inputs for input in char_input],
491
- outputs=[conversation_display, narrator_input, user_input, error_box]
492
- )
493
-
494
- reset_btn.click(
495
- reset_conversation,
496
- outputs=[conversation_display, narrator_input, user_input, error_box]
497
- )
498
-
499
- # FIXME - Implement saving chat history to database; look at Chat_UI.py for reference
500
- save_chat_history_to_db.click(
501
- save_chat_history_to_db_wrapper,
502
- inputs=[conversation_display, conversation_id, media_content, chat_media_name],
503
- outputs=[conversation_id, gr.Textbox(label="Save Status")]
504
- )
505
-
506
-
507
- return api_endpoint, api_key, temperature, narrator_input, conversation_display, user_input, generate_btn, reset_btn, error_box
508
-
509
- #
510
- # End of Narrator-Controlled Conversation tab
511
- ########################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Chat_Workflows.py DELETED
@@ -1,178 +0,0 @@
1
- # Chat_Workflows.py
2
- # Description: UI for Chat Workflows
3
- #
4
- # Imports
5
- import json
6
- import logging
7
- from pathlib import Path
8
- #
9
- # External Imports
10
- import gradio as gr
11
- #
12
- from App_Function_Libraries.Gradio_UI.Chat_ui import chat_wrapper, search_conversations, \
13
- load_conversation
14
- from App_Function_Libraries.Chat import save_chat_history_to_db_wrapper
15
- #
16
- ############################################################################################################
17
- #
18
- # Functions:
19
-
20
- # Load workflows from a JSON file
21
- json_path = Path('./Helper_Scripts/Workflows/Workflows.json')
22
- with json_path.open('r') as f:
23
- workflows = json.load(f)
24
-
25
-
26
- def chat_workflows_tab():
27
- with gr.TabItem("Chat Workflows", visible=True):
28
- gr.Markdown("# Workflows using LLMs")
29
- chat_history = gr.State([])
30
- media_content = gr.State({})
31
- selected_parts = gr.State([])
32
- conversation_id = gr.State(None)
33
- workflow_state = gr.State({"current_step": 0, "max_steps": 0, "conversation_id": None})
34
-
35
- with gr.Row():
36
- with gr.Column():
37
- workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
38
- api_selector = gr.Dropdown(
39
- label="Select API Endpoint",
40
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
41
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
42
- "Custom-OpenAI-API"],
43
- value="HuggingFace"
44
- )
45
- api_key_input = gr.Textbox(label="API Key (optional)", type="password")
46
- temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
47
- save_conversation = gr.Checkbox(label="Save Conversation", value=False)
48
- with gr.Column():
49
- gr.Markdown("Placeholder")
50
- with gr.Row():
51
- with gr.Column():
52
- conversation_search = gr.Textbox(label="Search Conversations")
53
- search_conversations_btn = gr.Button("Search Conversations")
54
- with gr.Column():
55
- previous_conversations = gr.Dropdown(label="Select Conversation", choices=[], interactive=True)
56
- load_conversations_btn = gr.Button("Load Selected Conversation")
57
- with gr.Row():
58
- with gr.Column():
59
- context_input = gr.Textbox(label="Initial Context", lines=5)
60
- chatbot = gr.Chatbot(label="Workflow Chat")
61
- msg = gr.Textbox(label="Your Input")
62
- submit_btn = gr.Button("Submit")
63
- clear_btn = gr.Button("Clear Chat")
64
- chat_media_name = gr.Textbox(label="Custom Chat Name(optional)")
65
- save_btn = gr.Button("Save Chat to Database")
66
-
67
- def update_workflow_ui(workflow_name):
68
- if not workflow_name:
69
- return {"current_step": 0, "max_steps": 0, "conversation_id": None}, "", []
70
- selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None)
71
- if selected_workflow:
72
- num_prompts = len(selected_workflow['prompts'])
73
- context = selected_workflow.get('context', '')
74
- first_prompt = selected_workflow['prompts'][0]
75
- initial_chat = [(None, f"{first_prompt}")]
76
- logging.info(f"Initializing workflow: {workflow_name} with {num_prompts} steps")
77
- return {"current_step": 0, "max_steps": num_prompts, "conversation_id": None}, context, initial_chat
78
- else:
79
- logging.error(f"Selected workflow not found: {workflow_name}")
80
- return {"current_step": 0, "max_steps": 0, "conversation_id": None}, "", []
81
-
82
- def process_workflow_step(message, history, context, workflow_name, api_endpoint, api_key, workflow_state,
83
- save_conv, temp):
84
- logging.info(f"Process workflow step called with message: {message}")
85
- logging.info(f"Current workflow state: {workflow_state}")
86
- try:
87
- selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None)
88
- if not selected_workflow:
89
- logging.error(f"Selected workflow not found: {workflow_name}")
90
- return history, workflow_state, gr.update(interactive=True)
91
-
92
- current_step = workflow_state["current_step"]
93
- max_steps = workflow_state["max_steps"]
94
-
95
- logging.info(f"Current step: {current_step}, Max steps: {max_steps}")
96
-
97
- if current_step >= max_steps:
98
- logging.info("Workflow completed, disabling input")
99
- return history, workflow_state, gr.update(interactive=False)
100
-
101
- prompt = selected_workflow['prompts'][current_step]
102
- full_message = f"{context}\n\nStep {current_step + 1}: {prompt}\nUser: {message}"
103
-
104
- logging.info(f"Calling chat_wrapper with full_message: {full_message[:100]}...")
105
- bot_message, new_history, new_conversation_id = chat_wrapper(
106
- full_message, history, media_content.value, selected_parts.value,
107
- api_endpoint, api_key, "", workflow_state["conversation_id"],
108
- save_conv, temp, "You are a helpful assistant guiding through a workflow."
109
- )
110
-
111
- logging.info(f"Received bot_message: {bot_message[:100]}...")
112
-
113
- next_step = current_step + 1
114
- new_workflow_state = {
115
- "current_step": next_step,
116
- "max_steps": max_steps,
117
- "conversation_id": new_conversation_id
118
- }
119
-
120
- if next_step >= max_steps:
121
- logging.info("Workflow completed after this step")
122
- return new_history, new_workflow_state, gr.update(interactive=False)
123
- else:
124
- next_prompt = selected_workflow['prompts'][next_step]
125
- new_history.append((None, f"Step {next_step + 1}: {next_prompt}"))
126
- logging.info(f"Moving to next step: {next_step}")
127
- return new_history, new_workflow_state, gr.update(interactive=True)
128
- except Exception as e:
129
- logging.error(f"Error in process_workflow_step: {str(e)}")
130
- return history, workflow_state, gr.update(interactive=True)
131
-
132
- workflow_selector.change(
133
- update_workflow_ui,
134
- inputs=[workflow_selector],
135
- outputs=[workflow_state, context_input, chatbot]
136
- )
137
-
138
- submit_btn.click(
139
- process_workflow_step,
140
- inputs=[msg, chatbot, context_input, workflow_selector, api_selector, api_key_input, workflow_state,
141
- save_conversation, temperature],
142
- outputs=[chatbot, workflow_state, msg]
143
- ).then(
144
- lambda: gr.update(value=""),
145
- outputs=[msg]
146
- )
147
-
148
- clear_btn.click(
149
- lambda: ([], {"current_step": 0, "max_steps": 0, "conversation_id": None}, ""),
150
- outputs=[chatbot, workflow_state, context_input]
151
- )
152
-
153
- save_btn.click(
154
- save_chat_history_to_db_wrapper,
155
- inputs=[chatbot, conversation_id, media_content, chat_media_name],
156
- outputs=[conversation_id, gr.Textbox(label="Save Status")]
157
- )
158
-
159
- search_conversations_btn.click(
160
- search_conversations,
161
- inputs=[conversation_search],
162
- outputs=[previous_conversations]
163
- )
164
-
165
- load_conversations_btn.click(
166
- lambda: ([], {"current_step": 0, "max_steps": 0, "conversation_id": None}, ""),
167
- outputs=[chatbot, workflow_state, context_input]
168
- ).then(
169
- load_conversation,
170
- inputs=[previous_conversations],
171
- outputs=[chatbot, conversation_id]
172
- )
173
-
174
- return workflow_selector, api_selector, api_key_input, context_input, chatbot, msg, submit_btn, clear_btn, save_btn
175
-
176
- #
177
- # End of script
178
- ############################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Chat_ui.py DELETED
@@ -1,1185 +0,0 @@
1
- # Chat_ui.py
2
- # Description: Chat interface functions for Gradio
3
- #
4
- # Imports
5
- import html
6
- import json
7
- import logging
8
- import os
9
- import sqlite3
10
- from datetime import datetime
11
- #
12
- # External Imports
13
- import gradio as gr
14
- #
15
- # Local Imports
16
- from App_Function_Libraries.Chat import chat, save_chat_history, update_chat_content, save_chat_history_to_db_wrapper
17
- from App_Function_Libraries.DB.DB_Manager import add_chat_message, search_chat_conversations, create_chat_conversation, \
18
- get_chat_messages, update_chat_message, delete_chat_message, load_preset_prompts, db
19
- from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_user_prompt
20
-
21
-
22
- #
23
- #
24
- ########################################################################################################################
25
- #
26
- # Functions:
27
-
28
-
29
- def show_edit_message(selected):
30
- if selected:
31
- return gr.update(value=selected[0], visible=True), gr.update(value=selected[1], visible=True), gr.update(
32
- visible=True)
33
- return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
34
-
35
-
36
- def show_delete_message(selected):
37
- if selected:
38
- return gr.update(value=selected[1], visible=True), gr.update(visible=True)
39
- return gr.update(visible=False), gr.update(visible=False)
40
-
41
-
42
- def debug_output(media_content, selected_parts):
43
- print(f"Debug - Media Content: {media_content}")
44
- print(f"Debug - Selected Parts: {selected_parts}")
45
- return ""
46
-
47
-
48
- def update_selected_parts(use_content, use_summary, use_prompt):
49
- selected_parts = []
50
- if use_content:
51
- selected_parts.append("content")
52
- if use_summary:
53
- selected_parts.append("summary")
54
- if use_prompt:
55
- selected_parts.append("prompt")
56
- print(f"Debug - Update Selected Parts: {selected_parts}")
57
- return selected_parts
58
-
59
-
60
- # Old update_user_prompt shim for backwards compatibility
61
- def get_system_prompt(preset_name):
62
- # For backwards compatibility
63
- prompts = update_user_prompt(preset_name)
64
- return prompts["system_prompt"]
65
-
66
- def clear_chat():
67
- """
68
- Return empty list for chatbot and None for conversation_id
69
- @return:
70
- """
71
- return gr.update(value=[]), None
72
-
73
-
74
- def clear_chat_single():
75
- """
76
- Clears the chatbot and chat history.
77
-
78
- Returns:
79
- list: Empty list for chatbot messages.
80
- list: Empty list for chat history.
81
- """
82
- return [], []
83
-
84
- # FIXME - add additional features....
85
- def chat_wrapper(message, history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, conversation_id,
86
- save_conversation, temperature, system_prompt, max_tokens=None, top_p=None, frequency_penalty=None,
87
- presence_penalty=None, stop_sequence=None):
88
- try:
89
- if save_conversation:
90
- if conversation_id is None:
91
- # Create a new conversation
92
- media_id = media_content.get('id', None)
93
- conversation_name = f"Chat about {media_content.get('title', 'Unknown Media')} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
94
- conversation_id = create_chat_conversation(media_id, conversation_name)
95
-
96
- # Add user message to the database
97
- user_message_id = add_chat_message(conversation_id, "user", message)
98
-
99
- # Include the selected parts and custom_prompt only for the first message
100
- if not history and selected_parts:
101
- message_body = "\n".join(selected_parts)
102
- full_message = f"{custom_prompt}\n\n{message}\n\n{message_body}"
103
- elif custom_prompt:
104
- full_message = f"{custom_prompt}\n\n{message}"
105
- else:
106
- full_message = message
107
-
108
- # Generate bot response
109
- bot_message = chat(full_message, history, media_content, selected_parts, api_endpoint, api_key, custom_prompt,
110
- temperature, system_prompt)
111
-
112
- logging.debug(f"Bot message being returned: {bot_message}")
113
-
114
- if save_conversation:
115
- # Add assistant message to the database
116
- add_chat_message(conversation_id, "assistant", bot_message)
117
-
118
- # Update history
119
- new_history = history + [(message, bot_message)]
120
-
121
- return bot_message, new_history, conversation_id
122
- except Exception as e:
123
- logging.error(f"Error in chat wrapper: {str(e)}")
124
- return "An error occurred.", history, conversation_id
125
-
126
- def search_conversations(query):
127
- try:
128
- conversations = search_chat_conversations(query)
129
- if not conversations:
130
- print(f"Debug - Search Conversations - No results found for query: {query}")
131
- return gr.update(choices=[])
132
-
133
- conversation_options = [
134
- (f"{c['conversation_name']} (Media: {c['media_title']}, ID: {c['id']})", c['id'])
135
- for c in conversations
136
- ]
137
- print(f"Debug - Search Conversations - Options: {conversation_options}")
138
- return gr.update(choices=conversation_options)
139
- except Exception as e:
140
- print(f"Debug - Search Conversations - Error: {str(e)}")
141
- return gr.update(choices=[])
142
-
143
-
144
- def load_conversation(conversation_id):
145
- if not conversation_id:
146
- return [], None
147
-
148
- messages = get_chat_messages(conversation_id)
149
- history = [
150
- (msg['message'], None) if msg['sender'] == 'user' else (None, msg['message'])
151
- for msg in messages
152
- ]
153
- return history, conversation_id
154
-
155
-
156
- def update_message_in_chat(message_id, new_text, history):
157
- update_chat_message(message_id, new_text)
158
- updated_history = [(msg1, msg2) if msg1[1] != message_id and msg2[1] != message_id
159
- else ((new_text, msg1[1]) if msg1[1] == message_id else (new_text, msg2[1]))
160
- for msg1, msg2 in history]
161
- return updated_history
162
-
163
-
164
- def delete_message_from_chat(message_id, history):
165
- delete_chat_message(message_id)
166
- updated_history = [(msg1, msg2) for msg1, msg2 in history if msg1[1] != message_id and msg2[1] != message_id]
167
- return updated_history
168
-
169
-
170
- def regenerate_last_message(history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature, system_prompt):
171
- if not history:
172
- return history, "No messages to regenerate."
173
-
174
- last_entry = history[-1]
175
- last_user_message, last_bot_message = last_entry
176
-
177
- if last_bot_message is None:
178
- return history, "The last message is not from the bot."
179
-
180
- new_history = history[:-1]
181
-
182
- if not last_user_message:
183
- return new_history, "No user message to regenerate the bot response."
184
-
185
- full_message = last_user_message
186
-
187
- bot_message = chat(
188
- full_message,
189
- new_history,
190
- media_content,
191
- selected_parts,
192
- api_endpoint,
193
- api_key,
194
- custom_prompt,
195
- temperature,
196
- system_prompt
197
- )
198
-
199
- new_history.append((last_user_message, bot_message))
200
-
201
- return new_history, "Last message regenerated successfully."
202
-
203
- def create_chat_interface():
204
- custom_css = """
205
- .chatbot-container .message-wrap .message {
206
- font-size: 14px !important;
207
- }
208
- """
209
- with gr.TabItem("Remote LLM Chat (Horizontal)", visible=True):
210
- gr.Markdown("# Chat with a designated LLM Endpoint, using your selected item as starting context")
211
- chat_history = gr.State([])
212
- media_content = gr.State({})
213
- selected_parts = gr.State([])
214
- conversation_id = gr.State(None)
215
-
216
- with gr.Row():
217
- with gr.Column(scale=1):
218
- search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
219
- search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title",
220
- label="Search By")
221
- search_button = gr.Button("Search")
222
- items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
223
- item_mapping = gr.State({})
224
- with gr.Row():
225
- use_content = gr.Checkbox(label="Use Content")
226
- use_summary = gr.Checkbox(label="Use Summary")
227
- use_prompt = gr.Checkbox(label="Use Prompt")
228
- save_conversation = gr.Checkbox(label="Save Conversation", value=False, visible=True)
229
- with gr.Row():
230
- temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
231
- with gr.Row():
232
- conversation_search = gr.Textbox(label="Search Conversations")
233
- with gr.Row():
234
- search_conversations_btn = gr.Button("Search Conversations")
235
- with gr.Row():
236
- previous_conversations = gr.Dropdown(label="Select Conversation", choices=[], interactive=True)
237
- with gr.Row():
238
- load_conversations_btn = gr.Button("Load Selected Conversation")
239
-
240
- api_endpoint = gr.Dropdown(label="Select API Endpoint",
241
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
242
- "Mistral", "OpenRouter",
243
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama",
244
- "HuggingFace"])
245
- api_key = gr.Textbox(label="API Key (if required)", type="password")
246
- custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
247
- value=False,
248
- visible=True)
249
- preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
250
- value=False,
251
- visible=True)
252
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
253
- choices=load_preset_prompts(),
254
- visible=False)
255
- user_prompt = gr.Textbox(label="Custom Prompt",
256
- placeholder="Enter custom prompt here",
257
- lines=3,
258
- visible=False)
259
- system_prompt_input = gr.Textbox(label="System Prompt",
260
- value="You are a helpful AI assitant",
261
- lines=3,
262
- visible=False)
263
- with gr.Column(scale=2):
264
- chatbot = gr.Chatbot(height=600, elem_classes="chatbot-container")
265
- msg = gr.Textbox(label="Enter your message")
266
- submit = gr.Button("Submit")
267
- regenerate_button = gr.Button("Regenerate Last Message")
268
- clear_chat_button = gr.Button("Clear Chat")
269
-
270
- edit_message_id = gr.Number(label="Message ID to Edit", visible=False)
271
- edit_message_text = gr.Textbox(label="Edit Message", visible=False)
272
- update_message_button = gr.Button("Update Message", visible=False)
273
-
274
- delete_message_id = gr.Number(label="Message ID to Delete", visible=False)
275
- delete_message_button = gr.Button("Delete Message", visible=False)
276
-
277
- chat_media_name = gr.Textbox(label="Custom Chat Name(optional)")
278
- save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
279
- save_chat_history_as_file = gr.Button("Save Chat History as File")
280
- download_file = gr.File(label="Download Chat History")
281
- save_status = gr.Textbox(label="Save Status", interactive=False)
282
-
283
- # Restore original functionality
284
- search_button.click(
285
- fn=update_dropdown,
286
- inputs=[search_query_input, search_type_input],
287
- outputs=[items_output, item_mapping]
288
- )
289
-
290
- def save_chat_wrapper(history, conversation_id, media_content):
291
- file_path = save_chat_history(history, conversation_id, media_content)
292
- if file_path:
293
- return file_path, f"Chat history saved successfully as {os.path.basename(file_path)}!"
294
- else:
295
- return None, "Error saving chat history. Please check the logs and try again."
296
-
297
- save_chat_history_as_file.click(
298
- save_chat_wrapper,
299
- inputs=[chatbot, conversation_id, media_content],
300
- outputs=[download_file, save_status]
301
- )
302
-
303
- def update_prompts(preset_name):
304
- prompts = update_user_prompt(preset_name)
305
- return (
306
- gr.update(value=prompts["user_prompt"], visible=True),
307
- gr.update(value=prompts["system_prompt"], visible=True)
308
- )
309
-
310
- def clear_chat():
311
- return [], None # Return empty list for chatbot and None for conversation_id
312
-
313
- clear_chat_button.click(
314
- clear_chat,
315
- outputs=[chatbot, conversation_id]
316
- )
317
- preset_prompt.change(
318
- update_prompts,
319
- inputs=preset_prompt,
320
- outputs=[user_prompt, system_prompt_input]
321
- )
322
- custom_prompt_checkbox.change(
323
- fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
324
- inputs=[custom_prompt_checkbox],
325
- outputs=[user_prompt, system_prompt_input]
326
- )
327
- preset_prompt_checkbox.change(
328
- fn=lambda x: gr.update(visible=x),
329
- inputs=[preset_prompt_checkbox],
330
- outputs=[preset_prompt]
331
- )
332
- submit.click(
333
- chat_wrapper,
334
- inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, conversation_id,
335
- save_conversation, temperature, system_prompt_input],
336
- outputs=[msg, chatbot, conversation_id]
337
- ).then( # Clear the message box after submission
338
- lambda x: gr.update(value=""),
339
- inputs=[chatbot],
340
- outputs=[msg]
341
- ).then( # Clear the user prompt after the first message
342
- lambda: (gr.update(value=""), gr.update(value="")),
343
- outputs=[user_prompt, system_prompt_input]
344
- )
345
-
346
- items_output.change(
347
- update_chat_content,
348
- inputs=[items_output, use_content, use_summary, use_prompt, item_mapping],
349
- outputs=[media_content, selected_parts]
350
- )
351
- use_content.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
352
- outputs=[selected_parts])
353
- use_summary.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
354
- outputs=[selected_parts])
355
- use_prompt.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
356
- outputs=[selected_parts])
357
- items_output.change(debug_output, inputs=[media_content, selected_parts], outputs=[])
358
-
359
- search_conversations_btn.click(
360
- search_conversations,
361
- inputs=[conversation_search],
362
- outputs=[previous_conversations]
363
- )
364
-
365
- load_conversations_btn.click(
366
- clear_chat,
367
- outputs=[chatbot, chat_history]
368
- ).then(
369
- load_conversation,
370
- inputs=[previous_conversations],
371
- outputs=[chatbot, conversation_id]
372
- )
373
-
374
- previous_conversations.change(
375
- load_conversation,
376
- inputs=[previous_conversations],
377
- outputs=[chat_history]
378
- )
379
-
380
- update_message_button.click(
381
- update_message_in_chat,
382
- inputs=[edit_message_id, edit_message_text, chat_history],
383
- outputs=[chatbot]
384
- )
385
-
386
- delete_message_button.click(
387
- delete_message_from_chat,
388
- inputs=[delete_message_id, chat_history],
389
- outputs=[chatbot]
390
- )
391
-
392
- save_chat_history_as_file.click(
393
- save_chat_history,
394
- inputs=[chatbot, conversation_id],
395
- outputs=[download_file]
396
- )
397
-
398
- save_chat_history_to_db.click(
399
- save_chat_history_to_db_wrapper,
400
- inputs=[chatbot, conversation_id, media_content, chat_media_name],
401
- outputs=[conversation_id, gr.Textbox(label="Save Status")]
402
- )
403
-
404
- regenerate_button.click(
405
- regenerate_last_message,
406
- inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temperature, system_prompt_input],
407
- outputs=[chatbot, save_status]
408
- )
409
-
410
- chatbot.select(show_edit_message, None, [edit_message_text, edit_message_id, update_message_button])
411
- chatbot.select(show_delete_message, None, [delete_message_id, delete_message_button])
412
-
413
-
414
- def create_chat_interface_stacked():
415
- custom_css = """
416
- .chatbot-container .message-wrap .message {
417
- font-size: 14px !important;
418
- }
419
- """
420
- with gr.TabItem("Remote LLM Chat - Stacked", visible=True):
421
- gr.Markdown("# Stacked Chat")
422
- chat_history = gr.State([])
423
- media_content = gr.State({})
424
- selected_parts = gr.State([])
425
- conversation_id = gr.State(None)
426
-
427
- with gr.Row():
428
- with gr.Column():
429
- search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
430
- search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title",
431
- label="Search By")
432
- search_button = gr.Button("Search")
433
- items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
434
- item_mapping = gr.State({})
435
- with gr.Row():
436
- use_content = gr.Checkbox(label="Use Content")
437
- use_summary = gr.Checkbox(label="Use Summary")
438
- use_prompt = gr.Checkbox(label="Use Prompt")
439
- save_conversation = gr.Checkbox(label="Save Conversation", value=False, visible=True)
440
- temp = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
441
- with gr.Row():
442
- conversation_search = gr.Textbox(label="Search Conversations")
443
- with gr.Row():
444
- previous_conversations = gr.Dropdown(label="Select Conversation", choices=[], interactive=True)
445
- with gr.Row():
446
- search_conversations_btn = gr.Button("Search Conversations")
447
- load_conversations_btn = gr.Button("Load Selected Conversation")
448
- with gr.Column():
449
- api_endpoint = gr.Dropdown(label="Select API Endpoint",
450
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
451
- "OpenRouter", "Mistral", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi",
452
- "VLLM", "ollama", "HuggingFace"])
453
- api_key = gr.Textbox(label="API Key (if required)", type="password")
454
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
455
- choices=load_preset_prompts(),
456
- visible=True)
457
- system_prompt = gr.Textbox(label="System Prompt",
458
- value="You are a helpful AI assistant.",
459
- lines=3,
460
- visible=True)
461
- user_prompt = gr.Textbox(label="Custom User Prompt",
462
- placeholder="Enter custom prompt here",
463
- lines=3,
464
- visible=True)
465
- gr.Markdown("Scroll down for the chat window...")
466
- with gr.Row():
467
- with gr.Column(scale=1):
468
- chatbot = gr.Chatbot(height=600, elem_classes="chatbot-container")
469
- msg = gr.Textbox(label="Enter your message")
470
- with gr.Row():
471
- with gr.Column():
472
- submit = gr.Button("Submit")
473
- regenerate_button = gr.Button("Regenerate Last Message")
474
- clear_chat_button = gr.Button("Clear Chat")
475
- chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True)
476
- save_chat_history_to_db = gr.Button("Save Chat History to DataBase")
477
- save_chat_history_as_file = gr.Button("Save Chat History as File")
478
- with gr.Column():
479
- download_file = gr.File(label="Download Chat History")
480
-
481
- # Restore original functionality
482
- search_button.click(
483
- fn=update_dropdown,
484
- inputs=[search_query_input, search_type_input],
485
- outputs=[items_output, item_mapping]
486
- )
487
-
488
- def update_prompts(preset_name):
489
- prompts = update_user_prompt(preset_name)
490
- return (
491
- gr.update(value=prompts["user_prompt"], visible=True),
492
- gr.update(value=prompts["system_prompt"], visible=True)
493
- )
494
-
495
- clear_chat_button.click(
496
- clear_chat,
497
- outputs=[chatbot, conversation_id]
498
- )
499
- preset_prompt.change(
500
- update_prompts,
501
- inputs=preset_prompt,
502
- outputs=[user_prompt, system_prompt]
503
- )
504
-
505
- submit.click(
506
- chat_wrapper,
507
- inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt,
508
- conversation_id, save_conversation, temp, system_prompt],
509
- outputs=[msg, chatbot, conversation_id]
510
- ).then( # Clear the message box after submission
511
- lambda x: gr.update(value=""),
512
- inputs=[chatbot],
513
- outputs=[msg]
514
- ).then( # Clear the user prompt after the first message
515
- lambda: gr.update(value=""),
516
- outputs=[user_prompt, system_prompt]
517
- )
518
-
519
- items_output.change(
520
- update_chat_content,
521
- inputs=[items_output, use_content, use_summary, use_prompt, item_mapping],
522
- outputs=[media_content, selected_parts]
523
- )
524
- use_content.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
525
- outputs=[selected_parts])
526
- use_summary.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
527
- outputs=[selected_parts])
528
- use_prompt.change(update_selected_parts, inputs=[use_content, use_summary, use_prompt],
529
- outputs=[selected_parts])
530
- items_output.change(debug_output, inputs=[media_content, selected_parts], outputs=[])
531
-
532
- search_conversations_btn.click(
533
- search_conversations,
534
- inputs=[conversation_search],
535
- outputs=[previous_conversations]
536
- )
537
-
538
- load_conversations_btn.click(
539
- clear_chat,
540
- outputs=[chatbot, chat_history]
541
- ).then(
542
- load_conversation,
543
- inputs=[previous_conversations],
544
- outputs=[chatbot, conversation_id]
545
- )
546
-
547
- previous_conversations.change(
548
- load_conversation,
549
- inputs=[previous_conversations],
550
- outputs=[chat_history]
551
- )
552
-
553
- save_chat_history_as_file.click(
554
- save_chat_history,
555
- inputs=[chatbot, conversation_id],
556
- outputs=[download_file]
557
- )
558
-
559
- save_chat_history_to_db.click(
560
- save_chat_history_to_db_wrapper,
561
- inputs=[chatbot, conversation_id, media_content, chat_media_name],
562
- outputs=[conversation_id, gr.Textbox(label="Save Status")]
563
- )
564
-
565
- regenerate_button.click(
566
- regenerate_last_message,
567
- inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temp, system_prompt],
568
- outputs=[chatbot, gr.Textbox(label="Regenerate Status")]
569
- )
570
-
571
-
572
- # FIXME - System prompts
573
- def create_chat_interface_multi_api():
574
- custom_css = """
575
- .chatbot-container .message-wrap .message {
576
- font-size: 14px !important;
577
- }
578
- .chat-window {
579
- height: 400px;
580
- overflow-y: auto;
581
- }
582
- """
583
- with gr.TabItem("One Prompt - Multiple APIs", visible=True):
584
- gr.Markdown("# One Prompt but Multiple APIs Chat Interface")
585
-
586
- with gr.Row():
587
- with gr.Column(scale=1):
588
- search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
589
- search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title",
590
- label="Search By")
591
- search_button = gr.Button("Search")
592
- items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
593
- item_mapping = gr.State({})
594
- with gr.Row():
595
- use_content = gr.Checkbox(label="Use Content")
596
- use_summary = gr.Checkbox(label="Use Summary")
597
- use_prompt = gr.Checkbox(label="Use Prompt")
598
- with gr.Column():
599
- preset_prompt = gr.Dropdown(label="Select Preset Prompt", choices=load_preset_prompts(), visible=True)
600
- system_prompt = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", lines=5)
601
- user_prompt = gr.Textbox(label="Modify Prompt (Prefixed to your message every time)", lines=5, value="", visible=True)
602
-
603
- with gr.Row():
604
- chatbots = []
605
- api_endpoints = []
606
- api_keys = []
607
- temperatures = []
608
- regenerate_buttons = []
609
- for i in range(3):
610
- with gr.Column():
611
- gr.Markdown(f"### Chat Window {i + 1}")
612
- api_endpoint = gr.Dropdown(label=f"API Endpoint {i + 1}",
613
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq",
614
- "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold",
615
- "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"])
616
- api_key = gr.Textbox(label=f"API Key {i + 1} (if required)", type="password")
617
- temperature = gr.Slider(label=f"Temperature {i + 1}", minimum=0.0, maximum=1.0, step=0.05,
618
- value=0.7)
619
- chatbot = gr.Chatbot(height=800, elem_classes="chat-window")
620
- regenerate_button = gr.Button(f"Regenerate Last Message {i + 1}")
621
- chatbots.append(chatbot)
622
- api_endpoints.append(api_endpoint)
623
- api_keys.append(api_key)
624
- temperatures.append(temperature)
625
- regenerate_buttons.append(regenerate_button)
626
-
627
- with gr.Row():
628
- msg = gr.Textbox(label="Enter your message", scale=4)
629
- submit = gr.Button("Submit", scale=1)
630
- clear_chat_button = gr.Button("Clear All Chats")
631
-
632
- # State variables
633
- chat_history = [gr.State([]) for _ in range(3)]
634
- media_content = gr.State({})
635
- selected_parts = gr.State([])
636
- conversation_id = gr.State(None)
637
-
638
- # Event handlers
639
- search_button.click(
640
- fn=update_dropdown,
641
- inputs=[search_query_input, search_type_input],
642
- outputs=[items_output, item_mapping]
643
- )
644
-
645
- preset_prompt.change(update_user_prompt, inputs=preset_prompt, outputs=user_prompt)
646
-
647
-
648
- def clear_all_chats():
649
- return [[]] * 3 + [[]] * 3
650
-
651
- clear_chat_button.click(
652
- clear_all_chats,
653
- outputs=chatbots + chat_history
654
- )
655
- def chat_wrapper_multi(message, custom_prompt, system_prompt, *args):
656
- chat_histories = args[:3]
657
- chatbots = args[3:6]
658
- api_endpoints = args[6:9]
659
- api_keys = args[9:12]
660
- temperatures = args[12:15]
661
- media_content = args[15]
662
- selected_parts = args[16]
663
-
664
- new_chat_histories = []
665
- new_chatbots = []
666
-
667
- for i in range(3):
668
- # Call chat_wrapper with dummy values for conversation_id and save_conversation
669
- bot_message, new_history, _ = chat_wrapper(
670
- message, chat_histories[i], media_content, selected_parts,
671
- api_endpoints[i], api_keys[i], custom_prompt, None, # None for conversation_id
672
- False, # False for save_conversation
673
- temperature=temperatures[i],
674
- system_prompt=system_prompt
675
- )
676
-
677
- new_chatbot = chatbots[i] + [(message, bot_message)]
678
-
679
- new_chat_histories.append(new_history)
680
- new_chatbots.append(new_chatbot)
681
-
682
- return [gr.update(value="")] + new_chatbots + new_chat_histories
683
-
684
-
685
- def regenerate_last_message(chat_history, chatbot, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature, system_prompt):
686
- if not chat_history:
687
- return chatbot, chat_history, "No messages to regenerate."
688
-
689
- last_entry = chat_history[-1]
690
- last_user_message, last_bot_message = last_entry
691
-
692
- if last_bot_message is None:
693
- return chatbot, chat_history, "The last message is not from the bot."
694
-
695
- new_history = chat_history[:-1]
696
-
697
- if not last_user_message:
698
- return chatbot[:-1], new_history, "No user message to regenerate the bot response."
699
-
700
- bot_message = chat(
701
- last_user_message,
702
- new_history,
703
- media_content,
704
- selected_parts,
705
- api_endpoint,
706
- api_key,
707
- custom_prompt,
708
- temperature,
709
- system_prompt
710
- )
711
-
712
- new_history.append((last_user_message, bot_message))
713
- new_chatbot = chatbot[:-1] + [(last_user_message, bot_message)]
714
-
715
- return new_chatbot, new_history, "Last message regenerated successfully."
716
-
717
- for i in range(3):
718
- regenerate_buttons[i].click(
719
- regenerate_last_message,
720
- inputs=[chat_history[i], chatbots[i], media_content, selected_parts, api_endpoints[i], api_keys[i], user_prompt, temperatures[i], system_prompt],
721
- outputs=[chatbots[i], chat_history[i], gr.Textbox(label=f"Regenerate Status {i + 1}")]
722
- )
723
-
724
- # In the create_chat_interface_multi_api function:
725
- submit.click(
726
- chat_wrapper_multi,
727
- inputs=[msg, user_prompt,
728
- system_prompt] + chat_history + chatbots + api_endpoints + api_keys + temperatures +
729
- [media_content, selected_parts],
730
- outputs=[msg] + chatbots + chat_history
731
- ).then(
732
- lambda: (gr.update(value=""), gr.update(value="")),
733
- outputs=[msg, user_prompt]
734
- )
735
-
736
- items_output.change(
737
- update_chat_content,
738
- inputs=[items_output, use_content, use_summary, use_prompt, item_mapping],
739
- outputs=[media_content, selected_parts]
740
- )
741
-
742
- for checkbox in [use_content, use_summary, use_prompt]:
743
- checkbox.change(
744
- update_selected_parts,
745
- inputs=[use_content, use_summary, use_prompt],
746
- outputs=[selected_parts]
747
- )
748
-
749
-
750
-
751
- def create_chat_interface_four():
752
- custom_css = """
753
- .chatbot-container .message-wrap .message {
754
- font-size: 14px !important;
755
- }
756
- .chat-window {
757
- height: 400px;
758
- overflow-y: auto;
759
- }
760
- """
761
-
762
- with gr.TabItem("Four Independent API Chats", visible=True):
763
- gr.Markdown("# Four Independent API Chat Interfaces")
764
-
765
- with gr.Row():
766
- with gr.Column():
767
- preset_prompt = gr.Dropdown(
768
- label="Select Preset Prompt",
769
- choices=load_preset_prompts(),
770
- visible=True
771
- )
772
- user_prompt = gr.Textbox(
773
- label="Modify Prompt",
774
- lines=3
775
- )
776
- with gr.Column():
777
- gr.Markdown("Scroll down for the chat windows...")
778
-
779
- chat_interfaces = []
780
-
781
- def create_single_chat_interface(index, user_prompt_component):
782
- with gr.Column():
783
- gr.Markdown(f"### Chat Window {index + 1}")
784
- api_endpoint = gr.Dropdown(
785
- label=f"API Endpoint {index + 1}",
786
- choices=[
787
- "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq",
788
- "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold",
789
- "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"
790
- ]
791
- )
792
- api_key = gr.Textbox(
793
- label=f"API Key {index + 1} (if required)",
794
- type="password"
795
- )
796
- temperature = gr.Slider(
797
- label=f"Temperature {index + 1}",
798
- minimum=0.0,
799
- maximum=1.0,
800
- step=0.05,
801
- value=0.7
802
- )
803
- chatbot = gr.Chatbot(height=400, elem_classes="chat-window")
804
- msg = gr.Textbox(label=f"Enter your message for Chat {index + 1}")
805
- submit = gr.Button(f"Submit to Chat {index + 1}")
806
- regenerate_button = gr.Button(f"Regenerate Last Message {index + 1}")
807
- clear_chat_button = gr.Button(f"Clear Chat {index + 1}")
808
-
809
- # State to maintain chat history
810
- chat_history = gr.State([])
811
-
812
- # Append to chat_interfaces list
813
- chat_interfaces.append({
814
- 'api_endpoint': api_endpoint,
815
- 'api_key': api_key,
816
- 'temperature': temperature,
817
- 'chatbot': chatbot,
818
- 'msg': msg,
819
- 'submit': submit,
820
- 'regenerate_button': regenerate_button,
821
- 'clear_chat_button': clear_chat_button,
822
- 'chat_history': chat_history
823
- })
824
-
825
- # Create four chat interfaces arranged in a 2x2 grid
826
- with gr.Row():
827
- for i in range(2):
828
- with gr.Column():
829
- for j in range(2):
830
- create_single_chat_interface(i * 2 + j, user_prompt)
831
-
832
- # Update user_prompt based on preset_prompt selection
833
- preset_prompt.change(
834
- fn=update_user_prompt,
835
- inputs=preset_prompt,
836
- outputs=user_prompt
837
- )
838
-
839
- def chat_wrapper_single(message, chat_history, api_endpoint, api_key, temperature, user_prompt):
840
- logging.debug(f"Chat Wrapper Single - Message: {message}, Chat History: {chat_history}")
841
-
842
- new_msg, new_history, _ = chat_wrapper(
843
- message,
844
- chat_history,
845
- {}, # Empty media_content
846
- [], # Empty selected_parts
847
- api_endpoint,
848
- api_key,
849
- user_prompt, # custom_prompt
850
- None, # conversation_id
851
- False, # save_conversation
852
- temperature, # temperature
853
- system_prompt="", # system_prompt
854
- max_tokens=None,
855
- top_p=None,
856
- frequency_penalty=None,
857
- presence_penalty=None,
858
- stop_sequence=None
859
- )
860
- if "API request failed" not in new_msg:
861
- chat_history.append((message, new_msg))
862
- else:
863
- logging.error(f"API request failed: {new_msg}")
864
-
865
- return "", chat_history, chat_history
866
-
867
- def regenerate_last_message(chat_history, api_endpoint, api_key, temperature, user_prompt):
868
- if not chat_history:
869
- return chat_history, chat_history, "No messages to regenerate."
870
-
871
- last_user_message, _ = chat_history[-1]
872
-
873
- new_msg, new_history, _ = chat_wrapper(
874
- last_user_message,
875
- chat_history[:-1],
876
- {}, # Empty media_content
877
- [], # Empty selected_parts
878
- api_endpoint,
879
- api_key,
880
- user_prompt, # custom_prompt
881
- None, # conversation_id
882
- False, # save_conversation
883
- temperature, # temperature
884
- system_prompt="", # system_prompt
885
- max_tokens=None,
886
- top_p=None,
887
- frequency_penalty=None,
888
- presence_penalty=None,
889
- stop_sequence=None
890
- )
891
-
892
- if "API request failed" not in new_msg:
893
- new_history.append((last_user_message, new_msg))
894
- return new_history, new_history, "Last message regenerated successfully."
895
- else:
896
- logging.error(f"API request failed during regeneration: {new_msg}")
897
- return chat_history, chat_history, f"Failed to regenerate: {new_msg}"
898
-
899
- # Attach click events for each chat interface
900
- for interface in chat_interfaces:
901
- interface['submit'].click(
902
- chat_wrapper_single,
903
- inputs=[
904
- interface['msg'],
905
- interface['chat_history'],
906
- interface['api_endpoint'],
907
- interface['api_key'],
908
- interface['temperature'],
909
- user_prompt
910
- ],
911
- outputs=[
912
- interface['msg'],
913
- interface['chatbot'],
914
- interface['chat_history']
915
- ]
916
- )
917
-
918
- interface['regenerate_button'].click(
919
- regenerate_last_message,
920
- inputs=[
921
- interface['chat_history'],
922
- interface['api_endpoint'],
923
- interface['api_key'],
924
- interface['temperature'],
925
- user_prompt
926
- ],
927
- outputs=[
928
- interface['chatbot'],
929
- interface['chat_history'],
930
- gr.Textbox(label="Regenerate Status")
931
- ]
932
- )
933
-
934
- interface['clear_chat_button'].click(
935
- clear_chat_single,
936
- inputs=[],
937
- outputs=[interface['chatbot'], interface['chat_history']]
938
- )
939
-
940
-
941
- def chat_wrapper_single(message, chat_history, chatbot, api_endpoint, api_key, temperature, media_content,
942
- selected_parts, conversation_id, save_conversation, user_prompt):
943
- new_msg, new_history, new_conv_id = chat_wrapper(
944
- message, chat_history, media_content, selected_parts,
945
- api_endpoint, api_key, user_prompt, conversation_id,
946
- save_conversation, temperature, system_prompt=""
947
- )
948
-
949
- if new_msg:
950
- updated_chatbot = chatbot + [(message, new_msg)]
951
- else:
952
- updated_chatbot = chatbot
953
-
954
- return new_msg, updated_chatbot, new_history, new_conv_id
955
-
956
-
957
- # FIXME - Finish implementing functions + testing/valdidation
958
- def create_chat_management_tab():
959
- with gr.TabItem("Chat Management", visible=True):
960
- gr.Markdown("# Chat Management")
961
-
962
- with gr.Row():
963
- search_query = gr.Textbox(label="Search Conversations")
964
- search_button = gr.Button("Search")
965
-
966
- conversation_list = gr.Dropdown(label="Select Conversation", choices=[])
967
- conversation_mapping = gr.State({})
968
-
969
- with gr.Tabs():
970
- with gr.TabItem("Edit", visible=True):
971
- chat_content = gr.TextArea(label="Chat Content (JSON)", lines=20, max_lines=50)
972
- save_button = gr.Button("Save Changes")
973
- delete_button = gr.Button("Delete Conversation", variant="stop")
974
-
975
- with gr.TabItem("Preview", visible=True):
976
- chat_preview = gr.HTML(label="Chat Preview")
977
- result_message = gr.Markdown("")
978
-
979
- def search_conversations(query):
980
- conversations = search_chat_conversations(query)
981
- choices = [f"{conv['conversation_name']} (Media: {conv['media_title']}, ID: {conv['id']})" for conv in
982
- conversations]
983
- mapping = {choice: conv['id'] for choice, conv in zip(choices, conversations)}
984
- return gr.update(choices=choices), mapping
985
-
986
- def load_conversations(selected, conversation_mapping):
987
- logging.info(f"Selected: {selected}")
988
- logging.info(f"Conversation mapping: {conversation_mapping}")
989
-
990
- try:
991
- if selected and selected in conversation_mapping:
992
- conversation_id = conversation_mapping[selected]
993
- messages = get_chat_messages(conversation_id)
994
- conversation_data = {
995
- "conversation_id": conversation_id,
996
- "messages": messages
997
- }
998
- json_content = json.dumps(conversation_data, indent=2)
999
-
1000
- # Create HTML preview
1001
- html_preview = "<div style='max-height: 500px; overflow-y: auto;'>"
1002
- for msg in messages:
1003
- sender_style = "background-color: #e6f3ff;" if msg[
1004
- 'sender'] == 'user' else "background-color: #f0f0f0;"
1005
- html_preview += f"<div style='margin-bottom: 10px; padding: 10px; border-radius: 5px; {sender_style}'>"
1006
- html_preview += f"<strong>{msg['sender']}:</strong> {html.escape(msg['message'])}<br>"
1007
- html_preview += f"<small>Timestamp: {msg['timestamp']}</small>"
1008
- html_preview += "</div>"
1009
- html_preview += "</div>"
1010
-
1011
- logging.info("Returning json_content and html_preview")
1012
- return json_content, html_preview
1013
- else:
1014
- logging.warning("No conversation selected or not in mapping")
1015
- return "", "<p>No conversation selected</p>"
1016
- except Exception as e:
1017
- logging.error(f"Error in load_conversations: {str(e)}")
1018
- return f"Error: {str(e)}", "<p>Error loading conversation</p>"
1019
-
1020
- def validate_conversation_json(content):
1021
- try:
1022
- data = json.loads(content)
1023
- if not isinstance(data, dict):
1024
- return False, "Invalid JSON structure: root should be an object"
1025
- if "conversation_id" not in data or not isinstance(data["conversation_id"], int):
1026
- return False, "Missing or invalid conversation_id"
1027
- if "messages" not in data or not isinstance(data["messages"], list):
1028
- return False, "Missing or invalid messages array"
1029
- for msg in data["messages"]:
1030
- if not all(key in msg for key in ["sender", "message"]):
1031
- return False, "Invalid message structure: missing required fields"
1032
- return True, data
1033
- except json.JSONDecodeError as e:
1034
- return False, f"Invalid JSON: {str(e)}"
1035
-
1036
- def save_conversation(selected, conversation_mapping, content):
1037
- if not selected or selected not in conversation_mapping:
1038
- return "Please select a conversation before saving.", "<p>No changes made</p>"
1039
-
1040
- conversation_id = conversation_mapping[selected]
1041
- is_valid, result = validate_conversation_json(content)
1042
-
1043
- if not is_valid:
1044
- return f"Error: {result}", "<p>No changes made due to error</p>"
1045
-
1046
- conversation_data = result
1047
- if conversation_data["conversation_id"] != conversation_id:
1048
- return "Error: Conversation ID mismatch.", "<p>No changes made due to ID mismatch</p>"
1049
-
1050
- try:
1051
- with db.get_connection() as conn:
1052
- conn.execute("BEGIN TRANSACTION")
1053
- cursor = conn.cursor()
1054
-
1055
- # Backup original conversation
1056
- cursor.execute("SELECT * FROM ChatMessages WHERE conversation_id = ?", (conversation_id,))
1057
- original_messages = cursor.fetchall()
1058
- backup_data = json.dumps({"conversation_id": conversation_id, "messages": original_messages})
1059
-
1060
- # You might want to save this backup_data somewhere
1061
-
1062
- # Delete existing messages
1063
- cursor.execute("DELETE FROM ChatMessages WHERE conversation_id = ?", (conversation_id,))
1064
-
1065
- # Insert updated messages
1066
- for message in conversation_data["messages"]:
1067
- cursor.execute('''
1068
- INSERT INTO ChatMessages (conversation_id, sender, message, timestamp)
1069
- VALUES (?, ?, ?, COALESCE(?, CURRENT_TIMESTAMP))
1070
- ''', (conversation_id, message["sender"], message["message"], message.get("timestamp")))
1071
-
1072
- conn.commit()
1073
-
1074
- # Create updated HTML preview
1075
- html_preview = "<div style='max-height: 500px; overflow-y: auto;'>"
1076
- for msg in conversation_data["messages"]:
1077
- sender_style = "background-color: #e6f3ff;" if msg[
1078
- 'sender'] == 'user' else "background-color: #f0f0f0;"
1079
- html_preview += f"<div style='margin-bottom: 10px; padding: 10px; border-radius: 5px; {sender_style}'>"
1080
- html_preview += f"<strong>{msg['sender']}:</strong> {html.escape(msg['message'])}<br>"
1081
- html_preview += f"<small>Timestamp: {msg.get('timestamp', 'N/A')}</small>"
1082
- html_preview += "</div>"
1083
- html_preview += "</div>"
1084
-
1085
- return "Conversation updated successfully.", html_preview
1086
- except sqlite3.Error as e:
1087
- conn.rollback()
1088
- logging.error(f"Database error in save_conversation: {e}")
1089
- return f"Error updating conversation: {str(e)}", "<p>Error occurred while saving</p>"
1090
- except Exception as e:
1091
- conn.rollback()
1092
- logging.error(f"Unexpected error in save_conversation: {e}")
1093
- return f"Unexpected error: {str(e)}", "<p>Unexpected error occurred</p>"
1094
-
1095
- def delete_conversation(selected, conversation_mapping):
1096
- if not selected or selected not in conversation_mapping:
1097
- return "Please select a conversation before deleting.", "<p>No changes made</p>", gr.update(choices=[])
1098
-
1099
- conversation_id = conversation_mapping[selected]
1100
-
1101
- try:
1102
- with db.get_connection() as conn:
1103
- cursor = conn.cursor()
1104
-
1105
- # Delete messages associated with the conversation
1106
- cursor.execute("DELETE FROM ChatMessages WHERE conversation_id = ?", (conversation_id,))
1107
-
1108
- # Delete the conversation itself
1109
- cursor.execute("DELETE FROM ChatConversations WHERE id = ?", (conversation_id,))
1110
-
1111
- conn.commit()
1112
-
1113
- # Update the conversation list
1114
- remaining_conversations = [choice for choice in conversation_mapping.keys() if choice != selected]
1115
- updated_mapping = {choice: conversation_mapping[choice] for choice in remaining_conversations}
1116
-
1117
- return "Conversation deleted successfully.", "<p>Conversation deleted</p>", gr.update(choices=remaining_conversations)
1118
- except sqlite3.Error as e:
1119
- conn.rollback()
1120
- logging.error(f"Database error in delete_conversation: {e}")
1121
- return f"Error deleting conversation: {str(e)}", "<p>Error occurred while deleting</p>", gr.update()
1122
- except Exception as e:
1123
- conn.rollback()
1124
- logging.error(f"Unexpected error in delete_conversation: {e}")
1125
- return f"Unexpected error: {str(e)}", "<p>Unexpected error occurred</p>", gr.update()
1126
-
1127
- def parse_formatted_content(formatted_content):
1128
- lines = formatted_content.split('\n')
1129
- conversation_id = int(lines[0].split(': ')[1])
1130
- timestamp = lines[1].split(': ')[1]
1131
- history = []
1132
- current_role = None
1133
- current_content = None
1134
- for line in lines[3:]:
1135
- if line.startswith("Role: "):
1136
- if current_role is not None:
1137
- history.append({"role": current_role, "content": ["", current_content]})
1138
- current_role = line.split(': ')[1]
1139
- elif line.startswith("Content: "):
1140
- current_content = line.split(': ', 1)[1]
1141
- if current_role is not None:
1142
- history.append({"role": current_role, "content": ["", current_content]})
1143
- return json.dumps({
1144
- "conversation_id": conversation_id,
1145
- "timestamp": timestamp,
1146
- "history": history
1147
- }, indent=2)
1148
-
1149
- search_button.click(
1150
- search_conversations,
1151
- inputs=[search_query],
1152
- outputs=[conversation_list, conversation_mapping]
1153
- )
1154
-
1155
- conversation_list.change(
1156
- load_conversations,
1157
- inputs=[conversation_list, conversation_mapping],
1158
- outputs=[chat_content, chat_preview]
1159
- )
1160
-
1161
- save_button.click(
1162
- save_conversation,
1163
- inputs=[conversation_list, conversation_mapping, chat_content],
1164
- outputs=[result_message, chat_preview]
1165
- )
1166
-
1167
- delete_button.click(
1168
- delete_conversation,
1169
- inputs=[conversation_list, conversation_mapping],
1170
- outputs=[result_message, chat_preview, conversation_list]
1171
- )
1172
-
1173
- return search_query, search_button, conversation_list, conversation_mapping, chat_content, save_button, delete_button, result_message, chat_preview
1174
-
1175
-
1176
-
1177
- # Mock function to simulate LLM processing
1178
- def process_with_llm(workflow, context, prompt, api_endpoint, api_key):
1179
- api_key_snippet = api_key[:5] + "..." if api_key else "Not provided"
1180
- return f"LLM output using {api_endpoint} (API Key: {api_key_snippet}) for {workflow} with context: {context[:30]}... and prompt: {prompt[:30]}..."
1181
-
1182
-
1183
- #
1184
- # End of Chat_ui.py
1185
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Config_tab.py DELETED
@@ -1,51 +0,0 @@
1
- import gradio as gr
2
- import configparser
3
-
4
- # FIXME
5
- CONFIG_PATH = './Config_Files/config.txt'
6
-
7
- def load_config():
8
- config = configparser.ConfigParser()
9
- config.read(CONFIG_PATH)
10
- return config
11
-
12
- def save_config(config):
13
- with open(CONFIG_PATH, 'w') as configfile:
14
- config.write(configfile)
15
-
16
- def get_config_as_text():
17
- with open(CONFIG_PATH, 'r') as file:
18
- content = file.read()
19
- return content, "Config refreshed successfully"
20
-
21
- def save_config_from_text(text):
22
- with open(CONFIG_PATH, 'w') as file:
23
- file.write(text)
24
- return "Config saved successfully"
25
-
26
-
27
- def create_config_editor_tab():
28
- with gr.TabItem("Edit Config", visible=True):
29
- gr.Markdown("# Edit Configuration File")
30
-
31
- with gr.Row():
32
- with gr.Column():
33
- refresh_button = gr.Button("Refresh Config")
34
-
35
- with gr.Column():
36
- config_text = gr.TextArea(label="Full Config", lines=30)
37
- save_text_button = gr.Button("Save Config")
38
-
39
- with gr.Row():
40
- output = gr.Textbox(label="Output")
41
-
42
- # Event handlers
43
- refresh_button.click(get_config_as_text, inputs=[], outputs=[config_text, output])
44
-
45
- config_text.change(lambda: None, None, None) # Dummy handler to enable changes
46
- save_text_button.click(save_config_from_text, inputs=[config_text], outputs=[output])
47
-
48
- # Initialize the interface
49
- config_text.value = get_config_as_text()[0] # Only set the config text, not the output message
50
-
51
- return refresh_button, config_text, save_text_button, output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Embeddings_tab.py DELETED
@@ -1,508 +0,0 @@
1
- # Embeddings_tabc.py
2
- # Description: This file contains the code for the RAG Chat tab in the Gradio UI
3
- #
4
- # Imports
5
- import json
6
- import logging
7
- #
8
- # External Imports
9
- import gradio as gr
10
- import numpy as np
11
- from tqdm import tqdm
12
- #
13
- # Local Imports
14
- from App_Function_Libraries.DB.DB_Manager import get_all_content_from_database
15
- from App_Function_Libraries.RAG.ChromaDB_Library import chroma_client, \
16
- store_in_chroma, situate_context
17
- from App_Function_Libraries.RAG.Embeddings_Create import create_embedding, create_embeddings_batch
18
- from App_Function_Libraries.Chunk_Lib import improved_chunking_process, chunk_for_embedding
19
- #
20
- ########################################################################################################################
21
- #
22
- # Functions:
23
-
24
- def create_embeddings_tab():
25
- with gr.TabItem("Create Embeddings", visible=True):
26
- gr.Markdown("# Create Embeddings for All Content")
27
-
28
- with gr.Row():
29
- with gr.Column():
30
- embedding_provider = gr.Radio(
31
- choices=["huggingface", "local", "openai"],
32
- label="Select Embedding Provider",
33
- value="huggingface"
34
- )
35
- gr.Markdown("Note: Local provider requires a running Llama.cpp/llamafile server.")
36
- gr.Markdown("OpenAI provider requires a valid API key.")
37
-
38
- huggingface_model = gr.Dropdown(
39
- choices=[
40
- "jinaai/jina-embeddings-v3",
41
- "Alibaba-NLP/gte-large-en-v1.5",
42
- "dunzhang/setll_en_400M_v5",
43
- "custom"
44
- ],
45
- label="Hugging Face Model",
46
- value="jinaai/jina-embeddings-v3",
47
- visible=True
48
- )
49
-
50
- openai_model = gr.Dropdown(
51
- choices=[
52
- "text-embedding-3-small",
53
- "text-embedding-3-large"
54
- ],
55
- label="OpenAI Embedding Model",
56
- value="text-embedding-3-small",
57
- visible=False
58
- )
59
-
60
- custom_embedding_model = gr.Textbox(
61
- label="Custom Embedding Model",
62
- placeholder="Enter your custom embedding model name here",
63
- visible=False
64
- )
65
-
66
- embedding_api_url = gr.Textbox(
67
- label="API URL (for local provider)",
68
- value="http://localhost:8080/embedding",
69
- visible=False
70
- )
71
-
72
- # Add chunking options
73
- chunking_method = gr.Dropdown(
74
- choices=["words", "sentences", "paragraphs", "tokens", "semantic"],
75
- label="Chunking Method",
76
- value="words"
77
- )
78
- max_chunk_size = gr.Slider(
79
- minimum=1, maximum=8000, step=1, value=500,
80
- label="Max Chunk Size"
81
- )
82
- chunk_overlap = gr.Slider(
83
- minimum=0, maximum=4000, step=1, value=200,
84
- label="Chunk Overlap"
85
- )
86
- adaptive_chunking = gr.Checkbox(
87
- label="Use Adaptive Chunking",
88
- value=False
89
- )
90
-
91
- create_button = gr.Button("Create Embeddings")
92
-
93
- with gr.Column():
94
- status_output = gr.Textbox(label="Status", lines=10)
95
-
96
- def update_provider_options(provider):
97
- if provider == "huggingface":
98
- return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
99
- elif provider == "local":
100
- return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
101
- else: # OpenAI
102
- return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
103
-
104
- def update_huggingface_options(model):
105
- if model == "custom":
106
- return gr.update(visible=True)
107
- else:
108
- return gr.update(visible=False)
109
-
110
- embedding_provider.change(
111
- fn=update_provider_options,
112
- inputs=[embedding_provider],
113
- outputs=[huggingface_model, openai_model, custom_embedding_model, embedding_api_url]
114
- )
115
-
116
- huggingface_model.change(
117
- fn=update_huggingface_options,
118
- inputs=[huggingface_model],
119
- outputs=[custom_embedding_model]
120
- )
121
-
122
- def create_all_embeddings(provider, hf_model, openai_model, custom_model, api_url, method, max_size, overlap, adaptive):
123
- try:
124
- all_content = get_all_content_from_database()
125
- if not all_content:
126
- return "No content found in the database."
127
-
128
- chunk_options = {
129
- 'method': method,
130
- 'max_size': max_size,
131
- 'overlap': overlap,
132
- 'adaptive': adaptive
133
- }
134
-
135
- collection_name = "all_content_embeddings"
136
- collection = chroma_client.get_or_create_collection(name=collection_name)
137
-
138
- # Determine the model to use
139
- if provider == "huggingface":
140
- model = custom_model if hf_model == "custom" else hf_model
141
- elif provider == "openai":
142
- model = openai_model
143
- else:
144
- model = custom_model
145
-
146
- for item in all_content:
147
- media_id = item['id']
148
- text = item['content']
149
-
150
- chunks = improved_chunking_process(text, chunk_options)
151
- for i, chunk in enumerate(chunks):
152
- chunk_text = chunk['text']
153
- chunk_id = f"doc_{media_id}_chunk_{i}"
154
-
155
- existing = collection.get(ids=[chunk_id])
156
- if existing['ids']:
157
- continue
158
-
159
- embedding = create_embedding(chunk_text, provider, model, api_url)
160
- metadata = {
161
- "media_id": str(media_id),
162
- "chunk_index": i,
163
- "total_chunks": len(chunks),
164
- "chunking_method": method,
165
- "max_chunk_size": max_size,
166
- "chunk_overlap": overlap,
167
- "adaptive_chunking": adaptive,
168
- "embedding_model": model,
169
- "embedding_provider": provider,
170
- **chunk['metadata']
171
- }
172
- store_in_chroma(collection_name, [chunk_text], [embedding], [chunk_id], [metadata])
173
-
174
- return "Embeddings created and stored successfully for all content."
175
- except Exception as e:
176
- logging.error(f"Error during embedding creation: {str(e)}")
177
- return f"Error: {str(e)}"
178
-
179
- create_button.click(
180
- fn=create_all_embeddings,
181
- inputs=[embedding_provider, huggingface_model, openai_model, custom_embedding_model, embedding_api_url,
182
- chunking_method, max_chunk_size, chunk_overlap, adaptive_chunking],
183
- outputs=status_output
184
- )
185
-
186
-
187
- def create_view_embeddings_tab():
188
- with gr.TabItem("View/Update Embeddings", visible=True):
189
- gr.Markdown("# View and Update Embeddings")
190
- item_mapping = gr.State({})
191
- with gr.Row():
192
- with gr.Column():
193
- item_dropdown = gr.Dropdown(label="Select Item", choices=[], interactive=True)
194
- refresh_button = gr.Button("Refresh Item List")
195
- embedding_status = gr.Textbox(label="Embedding Status", interactive=False)
196
- embedding_preview = gr.Textbox(label="Embedding Preview", interactive=False, lines=5)
197
- embedding_metadata = gr.Textbox(label="Embedding Metadata", interactive=False, lines=10)
198
-
199
- with gr.Column():
200
- create_new_embedding_button = gr.Button("Create New Embedding")
201
- embedding_provider = gr.Radio(
202
- choices=["huggingface", "local", "openai"],
203
- label="Select Embedding Provider",
204
- value="huggingface"
205
- )
206
- gr.Markdown("Note: Local provider requires a running Llama.cpp/llamafile server.")
207
- gr.Markdown("OpenAI provider requires a valid API key.")
208
-
209
- huggingface_model = gr.Dropdown(
210
- choices=[
211
- "jinaai/jina-embeddings-v3",
212
- "Alibaba-NLP/gte-large-en-v1.5",
213
- "dunzhang/stella_en_400M_v5",
214
- "custom"
215
- ],
216
- label="Hugging Face Model",
217
- value="jinaai/jina-embeddings-v3",
218
- visible=True
219
- )
220
-
221
- openai_model = gr.Dropdown(
222
- choices=[
223
- "text-embedding-3-small",
224
- "text-embedding-3-large"
225
- ],
226
- label="OpenAI Embedding Model",
227
- value="text-embedding-3-small",
228
- visible=False
229
- )
230
-
231
- custom_embedding_model = gr.Textbox(
232
- label="Custom Embedding Model",
233
- placeholder="Enter your custom embedding model name here",
234
- visible=False
235
- )
236
-
237
- embedding_api_url = gr.Textbox(
238
- label="API URL (for local provider)",
239
- value="http://localhost:8080/embedding",
240
- visible=False
241
- )
242
- chunking_method = gr.Dropdown(
243
- choices=["words", "sentences", "paragraphs", "tokens", "semantic"],
244
- label="Chunking Method",
245
- value="words"
246
- )
247
- max_chunk_size = gr.Slider(
248
- minimum=1, maximum=8000, step=5, value=500,
249
- label="Max Chunk Size"
250
- )
251
- chunk_overlap = gr.Slider(
252
- minimum=0, maximum=5000, step=5, value=200,
253
- label="Chunk Overlap"
254
- )
255
- adaptive_chunking = gr.Checkbox(
256
- label="Use Adaptive Chunking",
257
- value=False
258
- )
259
- contextual_api_choice = gr.Dropdown(
260
- choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
261
- label="Select API for Contextualized Embeddings",
262
- value="OpenAI"
263
- )
264
- use_contextual_embeddings = gr.Checkbox(
265
- label="Use Contextual Embeddings",
266
- value=True
267
- )
268
- contextual_api_key = gr.Textbox(label="API Key", lines=1)
269
-
270
- def get_items_with_embedding_status():
271
- try:
272
- items = get_all_content_from_database()
273
- collection = chroma_client.get_or_create_collection(name="all_content_embeddings")
274
- choices = []
275
- new_item_mapping = {}
276
- for item in items:
277
- try:
278
- result = collection.get(ids=[f"doc_{item['id']}_chunk_0"])
279
- embedding_exists = result is not None and result.get('ids') and len(result['ids']) > 0
280
- status = "Embedding exists" if embedding_exists else "No embedding"
281
- except Exception as e:
282
- print(f"Error checking embedding for item {item['id']}: {str(e)}")
283
- status = "Error checking"
284
- choice = f"{item['title']} ({status})"
285
- choices.append(choice)
286
- new_item_mapping[choice] = item['id']
287
- return gr.update(choices=choices), new_item_mapping
288
- except Exception as e:
289
- print(f"Error in get_items_with_embedding_status: {str(e)}")
290
- return gr.update(choices=["Error: Unable to fetch items"]), {}
291
-
292
- def update_provider_options(provider):
293
- if provider == "huggingface":
294
- return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
295
- elif provider == "local":
296
- return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
297
- else: # OpenAI
298
- return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
299
-
300
- def update_huggingface_options(model):
301
- if model == "custom":
302
- return gr.update(visible=True)
303
- else:
304
- return gr.update(visible=False)
305
-
306
- def check_embedding_status(selected_item, item_mapping):
307
- if not selected_item:
308
- return "Please select an item", "", ""
309
-
310
- try:
311
- item_id = item_mapping.get(selected_item)
312
- if item_id is None:
313
- return f"Invalid item selected: {selected_item}", "", ""
314
-
315
- item_title = selected_item.rsplit(' (', 1)[0]
316
- collection = chroma_client.get_or_create_collection(name="all_content_embeddings")
317
-
318
- result = collection.get(ids=[f"doc_{item_id}_chunk_0"], include=["embeddings", "metadatas"])
319
- logging.info(f"ChromaDB result for item '{item_title}' (ID: {item_id}): {result}")
320
-
321
- if not result['ids']:
322
- return f"No embedding found for item '{item_title}' (ID: {item_id})", "", ""
323
-
324
- if not result['embeddings'] or not result['embeddings'][0]:
325
- return f"Embedding data missing for item '{item_title}' (ID: {item_id})", "", ""
326
-
327
- embedding = result['embeddings'][0]
328
- metadata = result['metadatas'][0] if result['metadatas'] else {}
329
- embedding_preview = str(embedding[:50])
330
- status = f"Embedding exists for item '{item_title}' (ID: {item_id})"
331
- return status, f"First 50 elements of embedding:\n{embedding_preview}", json.dumps(metadata, indent=2)
332
-
333
- except Exception as e:
334
- logging.error(f"Error in check_embedding_status: {str(e)}")
335
- return f"Error processing item: {selected_item}. Details: {str(e)}", "", ""
336
-
337
- def create_new_embedding_for_item(selected_item, provider, hf_model, openai_model, custom_model, api_url,
338
- method, max_size, overlap, adaptive,
339
- item_mapping, use_contextual, contextual_api_choice=None):
340
- if not selected_item:
341
- return "Please select an item", "", ""
342
-
343
- try:
344
- item_id = item_mapping.get(selected_item)
345
- if item_id is None:
346
- return f"Invalid item selected: {selected_item}", "", ""
347
-
348
- items = get_all_content_from_database()
349
- item = next((item for item in items if item['id'] == item_id), None)
350
- if not item:
351
- return f"Item not found: {item_id}", "", ""
352
-
353
- chunk_options = {
354
- 'method': method,
355
- 'max_size': max_size,
356
- 'overlap': overlap,
357
- 'adaptive': adaptive
358
- }
359
-
360
- logging.info(f"Chunking content for item: {item['title']} (ID: {item_id})")
361
- chunks = chunk_for_embedding(item['content'], item['title'], chunk_options)
362
- collection_name = "all_content_embeddings"
363
- collection = chroma_client.get_or_create_collection(name=collection_name)
364
-
365
- # Delete existing embeddings for this item
366
- existing_ids = [f"doc_{item_id}_chunk_{i}" for i in range(len(chunks))]
367
- collection.delete(ids=existing_ids)
368
- logging.info(f"Deleted {len(existing_ids)} existing embeddings for item {item_id}")
369
-
370
- texts, ids, metadatas = [], [], []
371
- chunk_count = 0
372
- logging.info("Generating contextual summaries and preparing chunks for embedding")
373
- for i, chunk in enumerate(chunks):
374
- chunk_text = chunk['text']
375
- chunk_metadata = chunk['metadata']
376
- if use_contextual:
377
- logging.debug(f"Generating contextual summary for chunk {chunk_count}")
378
- context = situate_context(contextual_api_choice, item['content'], chunk_text)
379
- contextualized_text = f"{chunk_text}\n\nContextual Summary: {context}"
380
- else:
381
- contextualized_text = chunk_text
382
- context = None
383
-
384
- chunk_id = f"doc_{item_id}_chunk_{i}"
385
-
386
- # Determine the model to use
387
- if provider == "huggingface":
388
- model = custom_model if hf_model == "custom" else hf_model
389
- elif provider == "openai":
390
- model = openai_model
391
- else:
392
- model = custom_model
393
-
394
- metadata = {
395
- "media_id": str(item_id),
396
- "chunk_index": i,
397
- "total_chunks": len(chunks),
398
- "chunking_method": method,
399
- "max_chunk_size": max_size,
400
- "chunk_overlap": overlap,
401
- "adaptive_chunking": adaptive,
402
- "embedding_model": model,
403
- "embedding_provider": provider,
404
- "original_text": chunk_text,
405
- "use_contextual_embeddings": use_contextual,
406
- "contextual_summary": context,
407
- **chunk_metadata
408
- }
409
-
410
- texts.append(contextualized_text)
411
- ids.append(chunk_id)
412
- metadatas.append(metadata)
413
- chunk_count += 1
414
-
415
- # Create embeddings in batch
416
- logging.info(f"Creating embeddings for {len(texts)} chunks")
417
- embeddings = create_embeddings_batch(texts, provider, model, api_url)
418
-
419
- # Store in Chroma
420
- store_in_chroma(collection_name, texts, embeddings, ids, metadatas)
421
-
422
- # Create a preview of the first embedding
423
- if isinstance(embeddings, np.ndarray) and embeddings.size > 0:
424
- embedding_preview = str(embeddings[0][:50])
425
- elif isinstance(embeddings, list) and len(embeddings) > 0:
426
- embedding_preview = str(embeddings[0][:50])
427
- else:
428
- embedding_preview = "No embeddings created"
429
-
430
- # Return status message
431
- status = f"New embeddings created and stored for item: {item['title']} (ID: {item_id})"
432
-
433
- # Add contextual summaries to status message if enabled
434
- if use_contextual:
435
- status += " (with contextual summaries)"
436
-
437
- # Return status message, embedding preview, and metadata
438
- return status, f"First 50 elements of new embedding:\n{embedding_preview}", json.dumps(metadatas[0],
439
- indent=2)
440
- except Exception as e:
441
- logging.error(f"Error in create_new_embedding_for_item: {str(e)}", exc_info=True)
442
- return f"Error creating embedding: {str(e)}", "", ""
443
-
444
- refresh_button.click(
445
- get_items_with_embedding_status,
446
- outputs=[item_dropdown, item_mapping]
447
- )
448
- item_dropdown.change(
449
- check_embedding_status,
450
- inputs=[item_dropdown, item_mapping],
451
- outputs=[embedding_status, embedding_preview, embedding_metadata]
452
- )
453
- create_new_embedding_button.click(
454
- create_new_embedding_for_item,
455
- inputs=[item_dropdown, embedding_provider, huggingface_model, openai_model, custom_embedding_model, embedding_api_url,
456
- chunking_method, max_chunk_size, chunk_overlap, adaptive_chunking, item_mapping,
457
- use_contextual_embeddings, contextual_api_choice],
458
- outputs=[embedding_status, embedding_preview, embedding_metadata]
459
- )
460
- embedding_provider.change(
461
- update_provider_options,
462
- inputs=[embedding_provider],
463
- outputs=[huggingface_model, openai_model, custom_embedding_model, embedding_api_url]
464
- )
465
- huggingface_model.change(
466
- update_huggingface_options,
467
- inputs=[huggingface_model],
468
- outputs=[custom_embedding_model]
469
- )
470
-
471
- return (item_dropdown, refresh_button, embedding_status, embedding_preview, embedding_metadata,
472
- create_new_embedding_button, embedding_provider, huggingface_model, openai_model, custom_embedding_model, embedding_api_url,
473
- chunking_method, max_chunk_size, chunk_overlap, adaptive_chunking,
474
- use_contextual_embeddings, contextual_api_choice, contextual_api_key)
475
-
476
-
477
- def create_purge_embeddings_tab():
478
- with gr.TabItem("Purge Embeddings", visible=True):
479
- gr.Markdown("# Purge Embeddings")
480
-
481
- with gr.Row():
482
- with gr.Column():
483
- purge_button = gr.Button("Purge All Embeddings")
484
- with gr.Column():
485
- status_output = gr.Textbox(label="Status", lines=10)
486
-
487
- def purge_all_embeddings():
488
- try:
489
- # It came to me in a dream....I literally don't remember how the fuck this works, cant find documentation...
490
- collection_name = "all_content_embeddings"
491
- chroma_client.delete_collection(collection_name)
492
- chroma_client.create_collection(collection_name)
493
- logging.info(f"All embeddings have been purged successfully.")
494
- return "All embeddings have been purged successfully."
495
- except Exception as e:
496
- logging.error(f"Error during embedding purge: {str(e)}")
497
- return f"Error: {str(e)}"
498
-
499
- purge_button.click(
500
- fn=purge_all_embeddings,
501
- outputs=status_output
502
- )
503
-
504
-
505
-
506
- #
507
- # End of file
508
- ########################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Evaluations_Benchmarks_tab.py DELETED
@@ -1,60 +0,0 @@
1
- ###################################################################################################
2
- # Evaluations_Benchmarks_tab.py - Gradio code for G-Eval testing
3
- # We will use the G-Eval API to evaluate the quality of the generated summaries.
4
-
5
- import gradio as gr
6
- from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
7
-
8
- def create_geval_tab():
9
- with gr.Tab("G-Eval", visible=True):
10
- gr.Markdown("# G-Eval Summarization Evaluation")
11
- with gr.Row():
12
- with gr.Column():
13
- document_input = gr.Textbox(label="Source Document", lines=10)
14
- summary_input = gr.Textbox(label="Summary", lines=5)
15
- api_name_input = gr.Dropdown(
16
- choices=["OpenAI", "Anthropic", "Cohere", "Groq", "OpenRouter", "DeepSeek", "HuggingFace", "Mistral", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "Local-LLM", "Ollama"],
17
- label="Select API"
18
- )
19
- api_key_input = gr.Textbox(label="API Key (if required)", type="password")
20
- evaluate_button = gr.Button("Evaluate Summary")
21
- with gr.Column():
22
- output = gr.Textbox(label="Evaluation Results", lines=10)
23
-
24
- evaluate_button.click(
25
- fn=run_geval,
26
- inputs=[document_input, summary_input, api_name_input, api_key_input],
27
- outputs=output
28
- )
29
-
30
- return document_input, summary_input, api_name_input, api_key_input, evaluate_button, output
31
-
32
-
33
- def create_infinite_bench_tab():
34
- with gr.Tab("Infinite Bench", visible=True):
35
- gr.Markdown("# Infinite Bench Evaluation (Coming Soon)")
36
- with gr.Row():
37
- with gr.Column():
38
- api_name_input = gr.Dropdown(
39
- choices=["OpenAI", "Anthropic", "Cohere", "Groq", "OpenRouter", "DeepSeek", "HuggingFace", "Mistral", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "Local-LLM", "Ollama"],
40
- label="Select API"
41
- )
42
- api_key_input = gr.Textbox(label="API Key (if required)", type="password")
43
- evaluate_button = gr.Button("Evaluate Summary")
44
- with gr.Column():
45
- output = gr.Textbox(label="Evaluation Results", lines=10)
46
-
47
- # evaluate_button.click(
48
- # fn=run_geval,
49
- # inputs=[api_name_input, api_key_input],
50
- # outputs=output
51
- # )
52
-
53
- return api_name_input, api_key_input, evaluate_button, output
54
-
55
-
56
- # If you want to run this as a standalone Gradio app
57
- if __name__ == "__main__":
58
- with gr.Blocks() as demo:
59
- create_geval_tab()
60
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py DELETED
@@ -1,313 +0,0 @@
1
- # Explain_summarize_tab.py
2
- # Gradio UI for explaining and summarizing text
3
- #
4
- # Imports
5
- import logging
6
- #
7
- # External Imports
8
- import gradio as gr
9
-
10
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
11
- from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
12
- #
13
- # Local Imports
14
- from App_Function_Libraries.Summarization.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
15
- summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm, \
16
- summarize_with_ollama
17
- from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
18
- summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
19
- summarize_with_huggingface
20
- #
21
- #
22
- ############################################################################################################
23
- #
24
- # Functions:
25
-
26
- def create_summarize_explain_tab():
27
- with gr.TabItem("Analyze Text", visible=True):
28
- gr.Markdown("# Analyze / Explain / Summarize Text without ingesting it into the DB")
29
- with gr.Row():
30
- with gr.Column():
31
- with gr.Row():
32
- text_to_work_input = gr.Textbox(label="Text to be Explained or Summarized",
33
- placeholder="Enter the text you want explained or summarized here",
34
- lines=20)
35
- with gr.Row():
36
- explanation_checkbox = gr.Checkbox(label="Explain Text", value=True)
37
- summarization_checkbox = gr.Checkbox(label="Summarize Text", value=True)
38
- custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
39
- value=False,
40
- visible=True)
41
- preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
42
- value=False,
43
- visible=True)
44
- with gr.Row():
45
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
46
- choices=load_preset_prompts(),
47
- visible=False)
48
- with gr.Row():
49
- custom_prompt_input = gr.Textbox(label="Custom Prompt",
50
- placeholder="Enter custom prompt here",
51
- lines=3,
52
- visible=False)
53
- with gr.Row():
54
- system_prompt_input = gr.Textbox(label="System Prompt",
55
- value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
56
- **Bulleted Note Creation Guidelines**
57
-
58
- **Headings**:
59
- - Based on referenced topics, not categories like quotes or terms
60
- - Surrounded by **bold** formatting
61
- - Not listed as bullet points
62
- - No space between headings and list items underneath
63
-
64
- **Emphasis**:
65
- - **Important terms** set in bold font
66
- - **Text ending in a colon**: also bolded
67
-
68
- **Review**:
69
- - Ensure adherence to specified format
70
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
71
- """,
72
- lines=3,
73
- visible=False,
74
- interactive=True)
75
- api_endpoint = gr.Dropdown(
76
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
77
- "OpenRouter",
78
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace", "Custom-OpenAI-API"],
79
- value=None,
80
- label="API to be used for request (Mandatory)"
81
- )
82
- with gr.Row():
83
- api_key_input = gr.Textbox(label="API Key (if required)", placeholder="Enter your API key here",
84
- type="password")
85
- with gr.Row():
86
- explain_summarize_button = gr.Button("Explain/Summarize")
87
-
88
- with gr.Column():
89
- summarization_output = gr.Textbox(label="Summary:", lines=20)
90
- explanation_output = gr.Textbox(label="Explanation:", lines=20)
91
- custom_prompt_output = gr.Textbox(label="Custom Prompt:", lines=20, visible=True)
92
-
93
- custom_prompt_checkbox.change(
94
- fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
95
- inputs=[custom_prompt_checkbox],
96
- outputs=[custom_prompt_input, system_prompt_input]
97
- )
98
- preset_prompt_checkbox.change(
99
- fn=lambda x: gr.update(visible=x),
100
- inputs=[preset_prompt_checkbox],
101
- outputs=[preset_prompt]
102
- )
103
-
104
- def update_prompts(preset_name):
105
- prompts = update_user_prompt(preset_name)
106
- return (
107
- gr.update(value=prompts["user_prompt"], visible=True),
108
- gr.update(value=prompts["system_prompt"], visible=True)
109
- )
110
-
111
- preset_prompt.change(
112
- update_prompts,
113
- inputs=preset_prompt,
114
- outputs=[custom_prompt_input, system_prompt_input]
115
- )
116
-
117
- explain_summarize_button.click(
118
- fn=summarize_explain_text,
119
- inputs=[text_to_work_input, api_endpoint, api_key_input, summarization_checkbox, explanation_checkbox, custom_prompt_input, system_prompt_input],
120
- outputs=[summarization_output, explanation_output, custom_prompt_output]
121
- )
122
-
123
-
124
- def summarize_explain_text(message, api_endpoint, api_key, summarization, explanation, custom_prompt, custom_system_prompt,):
125
- global custom_prompt_output
126
- summarization_response = None
127
- explanation_response = None
128
- temp = 0.7
129
- try:
130
- logging.info(f"Debug - summarize_explain_text Function - Message: {message}")
131
- logging.info(f"Debug - summarize_explain_text Function - API Endpoint: {api_endpoint}")
132
-
133
- # Prepare the input for the API
134
- input_data = f"User: {message}\n"
135
- # Print first 500 chars
136
- logging.info(f"Debug - Chat Function - Input Data: {input_data[:500]}...")
137
- logging.debug(f"Debug - Chat Function - API Key: {api_key[:10]}")
138
- user_prompt = " "
139
- if not api_endpoint:
140
- return "Please select an API endpoint", "Please select an API endpoint"
141
- try:
142
- if summarization:
143
- system_prompt = """<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
144
- **Bulleted Note Creation Guidelines**
145
-
146
- **Headings**:
147
- - Based on referenced topics, not categories like quotes or terms
148
- - Surrounded by **bold** formatting
149
- - Not listed as bullet points
150
- - No space between headings and list items underneath
151
-
152
- **Emphasis**:
153
- - **Important terms** set in bold font
154
- - **Text ending in a colon**: also bolded
155
-
156
- **Review**:
157
- - Ensure adherence to specified format
158
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]"""
159
-
160
- # Use the existing API request code based on the selected endpoint
161
- logging.info(f"Debug - Chat Function - API Endpoint: {api_endpoint}")
162
- if api_endpoint.lower() == 'openai':
163
- summarization_response = summarize_with_openai(api_key, input_data, user_prompt, temp,
164
- system_prompt)
165
- elif api_endpoint.lower() == "anthropic":
166
- summarization_response = summarize_with_anthropic(api_key, input_data, user_prompt, temp,
167
- system_prompt)
168
- elif api_endpoint.lower() == "cohere":
169
- summarization_response = summarize_with_cohere(api_key, input_data, user_prompt, temp,
170
- system_prompt)
171
- elif api_endpoint.lower() == "groq":
172
- summarization_response = summarize_with_groq(api_key, input_data, user_prompt, temp, system_prompt)
173
- elif api_endpoint.lower() == "openrouter":
174
- summarization_response = summarize_with_openrouter(api_key, input_data, user_prompt, temp,
175
- system_prompt)
176
- elif api_endpoint.lower() == "deepseek":
177
- summarization_response = summarize_with_deepseek(api_key, input_data, user_prompt, temp,
178
- system_prompt)
179
- elif api_endpoint.lower() == "llama.cpp":
180
- summarization_response = summarize_with_llama(input_data, user_prompt, api_key, temp, system_prompt)
181
- elif api_endpoint.lower() == "kobold":
182
- summarization_response = summarize_with_kobold(input_data, api_key, user_prompt, temp,
183
- system_prompt)
184
- elif api_endpoint.lower() == "ooba":
185
- summarization_response = summarize_with_oobabooga(input_data, api_key, user_prompt, temp,
186
- system_prompt)
187
- elif api_endpoint.lower() == "tabbyapi":
188
- summarization_response = summarize_with_tabbyapi(input_data, user_prompt, temp, system_prompt)
189
- elif api_endpoint.lower() == "vllm":
190
- summarization_response = summarize_with_vllm(input_data, user_prompt, system_prompt)
191
- elif api_endpoint.lower() == "local-llm":
192
- summarization_response = summarize_with_local_llm(input_data, user_prompt, temp, system_prompt)
193
- elif api_endpoint.lower() == "huggingface":
194
- summarization_response = summarize_with_huggingface(api_key, input_data, user_prompt,
195
- temp) # , system_prompt)
196
- elif api_endpoint.lower() == "ollama":
197
- summarization_response = summarize_with_ollama(input_data, user_prompt, None, api_key, temp, system_prompt)
198
- else:
199
- raise ValueError(f"Unsupported API endpoint: {api_endpoint}")
200
- except Exception as e:
201
- logging.error(f"Error in summarization: {str(e)}")
202
- response1 = f"An error occurred during summarization: {str(e)}"
203
-
204
- try:
205
- if explanation:
206
- system_prompt = """You are a professional teacher. Please explain the content presented in an easy to digest fashion so that a non-specialist may understand it."""
207
- # Use the existing API request code based on the selected endpoint
208
- logging.info(f"Debug - Chat Function - API Endpoint: {api_endpoint}")
209
- if api_endpoint.lower() == 'openai':
210
- explanation_response = summarize_with_openai(api_key, input_data, user_prompt, temp, system_prompt)
211
- elif api_endpoint.lower() == "anthropic":
212
- explanation_response = summarize_with_anthropic(api_key, input_data, user_prompt, temp,
213
- system_prompt)
214
- elif api_endpoint.lower() == "cohere":
215
- explanation_response = summarize_with_cohere(api_key, input_data, user_prompt, temp, system_prompt)
216
- elif api_endpoint.lower() == "groq":
217
- explanation_response = summarize_with_groq(api_key, input_data, user_prompt, temp, system_prompt)
218
- elif api_endpoint.lower() == "openrouter":
219
- explanation_response = summarize_with_openrouter(api_key, input_data, user_prompt, temp,
220
- system_prompt)
221
- elif api_endpoint.lower() == "deepseek":
222
- explanation_response = summarize_with_deepseek(api_key, input_data, user_prompt, temp,
223
- system_prompt)
224
- elif api_endpoint.lower() == "llama.cpp":
225
- explanation_response = summarize_with_llama(input_data, user_prompt, temp, system_prompt)
226
- elif api_endpoint.lower() == "kobold":
227
- explanation_response = summarize_with_kobold(input_data, api_key, user_prompt, temp, system_prompt)
228
- elif api_endpoint.lower() == "ooba":
229
- explanation_response = summarize_with_oobabooga(input_data, api_key, user_prompt, temp,
230
- system_prompt)
231
- elif api_endpoint.lower() == "tabbyapi":
232
- explanation_response = summarize_with_tabbyapi(input_data, user_prompt, temp, system_prompt)
233
- elif api_endpoint.lower() == "vllm":
234
- explanation_response = summarize_with_vllm(input_data, user_prompt, system_prompt)
235
- elif api_endpoint.lower() == "local-llm":
236
- explanation_response = summarize_with_local_llm(input_data, user_prompt, temp, system_prompt)
237
- elif api_endpoint.lower() == "huggingface":
238
- explanation_response = summarize_with_huggingface(api_key, input_data, user_prompt,
239
- temp) # , system_prompt)
240
- elif api_endpoint.lower() == "ollama":
241
- explanation_response = summarize_with_ollama(input_data, user_prompt, temp, system_prompt)
242
- else:
243
- raise ValueError(f"Unsupported API endpoint: {api_endpoint}")
244
- except Exception as e:
245
- logging.error(f"Error in summarization: {str(e)}")
246
- response2 = f"An error occurred during summarization: {str(e)}"
247
-
248
- try:
249
- if custom_prompt:
250
- system_prompt = custom_system_prompt
251
- user_prompt = custom_prompt + input_data
252
- # Use the existing API request code based on the selected endpoint
253
- logging.info(f"Debug - Chat Function - API Endpoint: {api_endpoint}")
254
- if api_endpoint.lower() == 'openai':
255
- custom_prompt_output = summarize_with_openai(api_key, input_data, user_prompt, temp, system_prompt)
256
- elif api_endpoint.lower() == "anthropic":
257
- custom_prompt_output = summarize_with_anthropic(api_key, input_data, user_prompt, temp,
258
- system_prompt)
259
- elif api_endpoint.lower() == "cohere":
260
- custom_prompt_output = summarize_with_cohere(api_key, input_data, user_prompt, temp, system_prompt)
261
- elif api_endpoint.lower() == "groq":
262
- custom_prompt_output = summarize_with_groq(api_key, input_data, user_prompt, temp, system_prompt)
263
- elif api_endpoint.lower() == "openrouter":
264
- custom_prompt_output = summarize_with_openrouter(api_key, input_data, user_prompt, temp,
265
- system_prompt)
266
- elif api_endpoint.lower() == "deepseek":
267
- custom_prompt_output = summarize_with_deepseek(api_key, input_data, user_prompt, temp,
268
- system_prompt)
269
- elif api_endpoint.lower() == "llama.cpp":
270
- custom_prompt_output = summarize_with_llama(input_data, user_prompt, temp, system_prompt)
271
- elif api_endpoint.lower() == "kobold":
272
- custom_prompt_output = summarize_with_kobold(input_data, api_key, user_prompt, temp, system_prompt)
273
- elif api_endpoint.lower() == "ooba":
274
- custom_prompt_output = summarize_with_oobabooga(input_data, api_key, user_prompt, temp,
275
- system_prompt)
276
- elif api_endpoint.lower() == "tabbyapi":
277
- custom_prompt_output = summarize_with_tabbyapi(input_data, user_prompt, temp, system_prompt)
278
- elif api_endpoint.lower() == "vllm":
279
- custom_prompt_output = summarize_with_vllm(input_data, user_prompt, system_prompt)
280
- elif api_endpoint.lower() == "local-llm":
281
- custom_prompt_output = summarize_with_local_llm(input_data, user_prompt, temp, system_prompt)
282
- elif api_endpoint.lower() == "huggingface":
283
- custom_prompt_output = summarize_with_huggingface(api_key, input_data, user_prompt,
284
- temp) # , system_prompt)
285
- elif api_endpoint.lower() == "ollama":
286
- custom_prompt_output = summarize_with_ollama(input_data, user_prompt, temp, system_prompt)
287
- else:
288
- raise ValueError(f"Unsupported API endpoint: {api_endpoint}")
289
- except Exception as e:
290
- logging.error(f"Error in summarization: {str(e)}")
291
- response2 = f"An error occurred during summarization: {str(e)}"
292
-
293
-
294
- if summarization_response:
295
- response1 = f"Summary: {summarization_response}"
296
- else:
297
- response1 = "Summary: No summary requested"
298
-
299
- if explanation_response:
300
- response2 = f"Explanation: {explanation_response}"
301
- else:
302
- response2 = "Explanation: No explanation requested"
303
-
304
- if custom_prompt_output:
305
- response3 = f"Custom Prompt: {custom_prompt_output}"
306
- else:
307
- response3 = "Custom Prompt: No custom prompt requested"
308
-
309
- return response1, response2, response3
310
-
311
- except Exception as e:
312
- logging.error(f"Error in chat function: {str(e)}")
313
- return f"An error occurred: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Export_Functionality.py DELETED
@@ -1,266 +0,0 @@
1
- # Export_Functionality.py
2
- # Functionality for exporting items as markdown files
3
- import os
4
- import json
5
- import math
6
- import logging
7
- import shutil
8
- import tempfile
9
- from typing import List, Dict, Optional, Tuple
10
- import gradio as gr
11
- from App_Function_Libraries.DB.DB_Manager import DatabaseError
12
- from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, browse_items
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- def export_item_as_markdown(media_id: int) -> Tuple[Optional[str], str]:
17
- try:
18
- content, prompt, summary = fetch_item_details(media_id)
19
- title = f"Item {media_id}" # You might want to fetch the actual title
20
- markdown_content = f"# {title}\n\n## Prompt\n{prompt}\n\n## Summary\n{summary}\n\n## Content\n{content}"
21
-
22
- filename = f"export_item_{media_id}.md"
23
- with open(filename, "w", encoding='utf-8') as f:
24
- f.write(markdown_content)
25
-
26
- logger.info(f"Successfully exported item {media_id} to {filename}")
27
- return filename, f"Successfully exported item {media_id} to {filename}"
28
- except Exception as e:
29
- error_message = f"Error exporting item {media_id}: {str(e)}"
30
- logger.error(error_message)
31
- return None, error_message
32
-
33
-
34
- def export_items_by_keyword(keyword: str) -> str:
35
- try:
36
- items = fetch_items_by_keyword(keyword)
37
- if not items:
38
- logger.warning(f"No items found for keyword: {keyword}")
39
- return None
40
-
41
- # Create a temporary directory to store individual markdown files
42
- with tempfile.TemporaryDirectory() as temp_dir:
43
- folder_name = f"export_keyword_{keyword}"
44
- export_folder = os.path.join(temp_dir, folder_name)
45
- os.makedirs(export_folder)
46
-
47
- for item in items:
48
- content, prompt, summary = fetch_item_details(item['id'])
49
- markdown_content = f"# {item['title']}\n\n## Prompt\n{prompt}\n\n## Summary\n{summary}\n\n## Content\n{content}"
50
-
51
- # Create individual markdown file for each item
52
- file_name = f"{item['id']}_{item['title'][:50]}.md" # Limit filename length
53
- file_path = os.path.join(export_folder, file_name)
54
- with open(file_path, "w", encoding='utf-8') as f:
55
- f.write(markdown_content)
56
-
57
- # Create a zip file containing all markdown files
58
- zip_filename = f"{folder_name}.zip"
59
- shutil.make_archive(os.path.join(temp_dir, folder_name), 'zip', export_folder)
60
-
61
- # Move the zip file to a location accessible by Gradio
62
- final_zip_path = os.path.join(os.getcwd(), zip_filename)
63
- shutil.move(os.path.join(temp_dir, zip_filename), final_zip_path)
64
-
65
- logger.info(f"Successfully exported {len(items)} items for keyword '{keyword}' to {zip_filename}")
66
- return final_zip_path
67
- except Exception as e:
68
- logger.error(f"Error exporting items for keyword '{keyword}': {str(e)}")
69
- return None
70
-
71
-
72
- def export_selected_items(selected_items: List[Dict]) -> Tuple[Optional[str], str]:
73
- try:
74
- logger.debug(f"Received selected_items: {selected_items}")
75
- if not selected_items:
76
- logger.warning("No items selected for export")
77
- return None, "No items selected for export"
78
-
79
- markdown_content = "# Selected Items\n\n"
80
- for item in selected_items:
81
- logger.debug(f"Processing item: {item}")
82
- try:
83
- # Check if 'value' is a string (JSON) or already a dictionary
84
- if isinstance(item, str):
85
- item_data = json.loads(item)
86
- elif isinstance(item, dict) and 'value' in item:
87
- item_data = item['value'] if isinstance(item['value'], dict) else json.loads(item['value'])
88
- else:
89
- item_data = item
90
-
91
- logger.debug(f"Item data after processing: {item_data}")
92
-
93
- if 'id' not in item_data:
94
- logger.error(f"'id' not found in item data: {item_data}")
95
- continue
96
-
97
- content, prompt, summary = fetch_item_details(item_data['id'])
98
- markdown_content += f"## {item_data.get('title', 'Item {}'.format(item_data['id']))}\n\n### Prompt\n{prompt}\n\n### Summary\n{summary}\n\n### Content\n{content}\n\n---\n\n"
99
- except Exception as e:
100
- logger.error(f"Error processing item {item}: {str(e)}")
101
- markdown_content += f"## Error\n\nUnable to process this item.\n\n---\n\n"
102
-
103
- filename = "export_selected_items.md"
104
- with open(filename, "w", encoding='utf-8') as f:
105
- f.write(markdown_content)
106
-
107
- logger.info(f"Successfully exported {len(selected_items)} selected items to {filename}")
108
- return filename, f"Successfully exported {len(selected_items)} items to {filename}"
109
- except Exception as e:
110
- error_message = f"Error exporting selected items: {str(e)}"
111
- logger.error(error_message)
112
- return None, error_message
113
-
114
-
115
- def display_search_results_export_tab(search_query: str, search_type: str, page: int = 1, items_per_page: int = 10):
116
- logger.info(f"Searching with query: '{search_query}', type: '{search_type}', page: {page}")
117
- try:
118
- results = browse_items(search_query, search_type)
119
- logger.info(f"browse_items returned {len(results)} results")
120
-
121
- if not results:
122
- return [], f"No results found for query: '{search_query}'", 1, 1
123
-
124
- total_pages = math.ceil(len(results) / items_per_page)
125
- start_index = (page - 1) * items_per_page
126
- end_index = start_index + items_per_page
127
- paginated_results = results[start_index:end_index]
128
-
129
- checkbox_data = [
130
- {
131
- "name": f"Name: {item[1]}\nURL: {item[2]}",
132
- "value": {"id": item[0], "title": item[1], "url": item[2]}
133
- }
134
- for item in paginated_results
135
- ]
136
-
137
- logger.info(f"Returning {len(checkbox_data)} items for checkbox (page {page} of {total_pages})")
138
- return checkbox_data, f"Found {len(results)} results (showing page {page} of {total_pages})", page, total_pages
139
-
140
- except DatabaseError as e:
141
- error_message = f"Error in display_search_results_export_tab: {str(e)}"
142
- logger.error(error_message)
143
- return [], error_message, 1, 1
144
- except Exception as e:
145
- error_message = f"Unexpected error in display_search_results_export_tab: {str(e)}"
146
- logger.error(error_message)
147
- return [], error_message, 1, 1
148
-
149
-
150
- def create_export_tab():
151
- with gr.Tab("Search and Export"):
152
- with gr.Row():
153
- with gr.Column():
154
- gr.Markdown("# Search and Export Items")
155
- gr.Markdown("Search for items and export them as markdown files")
156
- gr.Markdown("You can also export items by keyword")
157
- search_query = gr.Textbox(label="Search Query")
158
- search_type = gr.Radio(["Title", "URL", "Keyword", "Content"], label="Search By")
159
- search_button = gr.Button("Search")
160
-
161
- with gr.Column():
162
- prev_button = gr.Button("Previous Page")
163
- next_button = gr.Button("Next Page")
164
-
165
- current_page = gr.State(1)
166
- total_pages = gr.State(1)
167
-
168
- search_results = gr.CheckboxGroup(label="Search Results", choices=[])
169
- export_selected_button = gr.Button("Export Selected Items")
170
-
171
- keyword_input = gr.Textbox(label="Enter keyword for export")
172
- export_by_keyword_button = gr.Button("Export items by keyword")
173
-
174
- export_output = gr.File(label="Download Exported File")
175
- error_output = gr.Textbox(label="Status/Error Messages", interactive=False)
176
-
177
- def search_and_update(query, search_type, page):
178
- results, message, current, total = display_search_results_export_tab(query, search_type, page)
179
- logger.debug(f"search_and_update results: {results}")
180
- return results, message, current, total, gr.update(choices=results)
181
-
182
- search_button.click(
183
- fn=search_and_update,
184
- inputs=[search_query, search_type, current_page],
185
- outputs=[search_results, error_output, current_page, total_pages, search_results],
186
- show_progress="full"
187
- )
188
-
189
-
190
- def update_page(current, total, direction):
191
- new_page = max(1, min(total, current + direction))
192
- return new_page
193
-
194
- prev_button.click(
195
- fn=update_page,
196
- inputs=[current_page, total_pages, gr.State(-1)],
197
- outputs=[current_page]
198
- ).then(
199
- fn=search_and_update,
200
- inputs=[search_query, search_type, current_page],
201
- outputs=[search_results, error_output, current_page, total_pages],
202
- show_progress=True
203
- )
204
-
205
- next_button.click(
206
- fn=update_page,
207
- inputs=[current_page, total_pages, gr.State(1)],
208
- outputs=[current_page]
209
- ).then(
210
- fn=search_and_update,
211
- inputs=[search_query, search_type, current_page],
212
- outputs=[search_results, error_output, current_page, total_pages],
213
- show_progress=True
214
- )
215
-
216
- def handle_export_selected(selected_items):
217
- logger.debug(f"Exporting selected items: {selected_items}")
218
- return export_selected_items(selected_items)
219
-
220
- export_selected_button.click(
221
- fn=handle_export_selected,
222
- inputs=[search_results],
223
- outputs=[export_output, error_output],
224
- show_progress="full"
225
- )
226
-
227
- export_by_keyword_button.click(
228
- fn=export_items_by_keyword,
229
- inputs=[keyword_input],
230
- outputs=[export_output, error_output],
231
- show_progress="full"
232
- )
233
-
234
- def handle_item_selection(selected_items):
235
- logger.debug(f"Selected items: {selected_items}")
236
- if not selected_items:
237
- return None, "No item selected"
238
-
239
- try:
240
- # Assuming selected_items is a list of dictionaries
241
- selected_item = selected_items[0]
242
- logger.debug(f"First selected item: {selected_item}")
243
-
244
- # Check if 'value' is a string (JSON) or already a dictionary
245
- if isinstance(selected_item['value'], str):
246
- item_data = json.loads(selected_item['value'])
247
- else:
248
- item_data = selected_item['value']
249
-
250
- logger.debug(f"Item data: {item_data}")
251
-
252
- item_id = item_data['id']
253
- return export_item_as_markdown(item_id)
254
- except Exception as e:
255
- error_message = f"Error processing selected item: {str(e)}"
256
- logger.error(error_message)
257
- return None, error_message
258
-
259
- search_results.select(
260
- fn=handle_item_selection,
261
- inputs=[search_results],
262
- outputs=[export_output, error_output],
263
- show_progress="full"
264
- )
265
-
266
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Gradio_Shared.py DELETED
@@ -1,285 +0,0 @@
1
- # Gradio_Shared.py
2
- # Gradio UI functions that are shared across multiple tabs
3
- #
4
- # Imports
5
- import logging
6
- import sqlite3
7
- import traceback
8
- from functools import wraps
9
- from typing import List, Tuple
10
- #
11
- # External Imports
12
- import gradio as gr
13
- #
14
- # Local Imports
15
- from App_Function_Libraries.DB.DB_Manager import list_prompts, db, search_and_display, fetch_prompt_details
16
- from App_Function_Libraries.DB.SQLite_DB import DatabaseError
17
- from App_Function_Libraries.Utils.Utils import format_transcription
18
- #
19
- ##############################################################################################################
20
- #
21
- # Functions:
22
-
23
- whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large", "large-v1", "large-v2", "large-v3",
24
- "distil-large-v2", "distil-medium.en", "distil-small.en"]
25
-
26
- # Sample data
27
- prompts_category_1 = [
28
- "What are the key points discussed in the video?",
29
- "Summarize the main arguments made by the speaker.",
30
- "Describe the conclusions of the study presented."
31
- ]
32
-
33
- prompts_category_2 = [
34
- "How does the proposed solution address the problem?",
35
- "What are the implications of the findings?",
36
- "Can you explain the theory behind the observed phenomenon?"
37
- ]
38
-
39
- all_prompts = prompts_category_1 + prompts_category_2
40
-
41
-
42
-
43
- #FIXME - SQL Functions that need to be addressed/added to DB manager
44
- def search_media(query, fields, keyword, page):
45
- try:
46
- results = search_and_display(query, fields, keyword, page)
47
- return results
48
- except Exception as e:
49
- logger = logging.getLogger()
50
- logger.error(f"Error searching media: {e}")
51
- return str(e)
52
-
53
- def fetch_items_by_title_or_url(search_query: str, search_type: str):
54
- try:
55
- with db.get_connection() as conn:
56
- cursor = conn.cursor()
57
- if search_type == 'Title':
58
- cursor.execute("SELECT id, title, url FROM Media WHERE title LIKE ?", (f'%{search_query}%',))
59
- elif search_type == 'URL':
60
- cursor.execute("SELECT id, title, url FROM Media WHERE url LIKE ?", (f'%{search_query}%',))
61
- results = cursor.fetchall()
62
- return results
63
- except sqlite3.Error as e:
64
- raise DatabaseError(f"Error fetching items by {search_type}: {e}")
65
-
66
- def fetch_items_by_keyword(search_query: str):
67
- try:
68
- with db.get_connection() as conn:
69
- cursor = conn.cursor()
70
- cursor.execute("""
71
- SELECT m.id, m.title, m.url
72
- FROM Media m
73
- JOIN MediaKeywords mk ON m.id = mk.media_id
74
- JOIN Keywords k ON mk.keyword_id = k.id
75
- WHERE k.keyword LIKE ?
76
- """, (f'%{search_query}%',))
77
- results = cursor.fetchall()
78
- return results
79
- except sqlite3.Error as e:
80
- raise DatabaseError(f"Error fetching items by keyword: {e}")
81
-
82
- # FIXME - Raw SQL not using DB_Manager...
83
- def fetch_items_by_content(search_query: str):
84
- try:
85
- with db.get_connection() as conn:
86
- cursor = conn.cursor()
87
- cursor.execute("SELECT id, title, url FROM Media WHERE content LIKE ?", (f'%{search_query}%',))
88
- results = cursor.fetchall()
89
- return results
90
- except sqlite3.Error as e:
91
- raise DatabaseError(f"Error fetching items by content: {e}")
92
-
93
-
94
-
95
- # FIXME - RAW SQL not using DB_Manager...
96
- def fetch_item_details_single(media_id: int):
97
- try:
98
- with db.get_connection() as conn:
99
- cursor = conn.cursor()
100
- cursor.execute("""
101
- SELECT prompt, summary
102
- FROM MediaModifications
103
- WHERE media_id = ?
104
- ORDER BY modification_date DESC
105
- LIMIT 1
106
- """, (media_id,))
107
- prompt_summary_result = cursor.fetchone()
108
- cursor.execute("SELECT content FROM Media WHERE id = ?", (media_id,))
109
- content_result = cursor.fetchone()
110
-
111
- prompt = prompt_summary_result[0] if prompt_summary_result else ""
112
- summary = prompt_summary_result[1] if prompt_summary_result else ""
113
- content = content_result[0] if content_result else ""
114
-
115
- return prompt, summary, content
116
- except sqlite3.Error as e:
117
- raise Exception(f"Error fetching item details: {e}")
118
-
119
-
120
- # FIXME - RAW SQL not using DB_Manager...
121
- def fetch_item_details(media_id: int):
122
- try:
123
- with db.get_connection() as conn:
124
- cursor = conn.cursor()
125
- cursor.execute("""
126
- SELECT prompt, summary
127
- FROM MediaModifications
128
- WHERE media_id = ?
129
- ORDER BY modification_date DESC
130
- LIMIT 1
131
- """, (media_id,))
132
- prompt_summary_result = cursor.fetchone()
133
- cursor.execute("SELECT content FROM Media WHERE id = ?", (media_id,))
134
- content_result = cursor.fetchone()
135
-
136
- prompt = prompt_summary_result[0] if prompt_summary_result else ""
137
- summary = prompt_summary_result[1] if prompt_summary_result else ""
138
- content = content_result[0] if content_result else ""
139
-
140
- return content, prompt, summary
141
- except sqlite3.Error as e:
142
- logging.error(f"Error fetching item details: {e}")
143
- return "", "", "" # Return empty strings if there's an error
144
-
145
- # Handle prompt selection
146
- def handle_prompt_selection(prompt):
147
- return f"You selected: {prompt}"
148
-
149
-
150
- def update_user_prompt(preset_name):
151
- details = fetch_prompt_details(preset_name)
152
- if details:
153
- # Return a dictionary with all details
154
- return {
155
- "title": details[0],
156
- "author": details[1],
157
- "details": details[2],
158
- "system_prompt": details[3],
159
- "user_prompt": details[4] if len(details) > 3 else "",
160
- }
161
- return {"title": "", "details": "", "system_prompt": "", "user_prompt": "", "author": ""}
162
-
163
- def browse_items(search_query, search_type):
164
- if search_type == 'Keyword':
165
- results = fetch_items_by_keyword(search_query)
166
- elif search_type == 'Content':
167
- results = fetch_items_by_content(search_query)
168
- else:
169
- results = fetch_items_by_title_or_url(search_query, search_type)
170
- return results
171
-
172
-
173
- def update_dropdown(search_query, search_type):
174
- results = browse_items(search_query, search_type)
175
- item_options = [f"{item[1]} ({item[2]})" for item in results]
176
- new_item_mapping = {f"{item[1]} ({item[2]})": item[0] for item in results}
177
- print(f"Debug - Update Dropdown - New Item Mapping: {new_item_mapping}")
178
- return gr.update(choices=item_options), new_item_mapping
179
-
180
-
181
-
182
- def get_media_id(selected_item, item_mapping):
183
- return item_mapping.get(selected_item)
184
-
185
-
186
- def update_detailed_view(item, item_mapping):
187
- # Function to update the detailed view based on selected item
188
- if item:
189
- item_id = item_mapping.get(item)
190
- if item_id:
191
- content, prompt, summary = fetch_item_details(item_id)
192
- if content or prompt or summary:
193
- details_html = "<h4>Details:</h4>"
194
- if prompt:
195
- formatted_prompt = format_transcription(prompt)
196
- details_html += f"<h4>Prompt:</h4>{formatted_prompt}</p>"
197
- if summary:
198
- formatted_summary = format_transcription(summary)
199
- details_html += f"<h4>Summary:</h4>{formatted_summary}</p>"
200
- # Format the transcription content for better readability
201
- formatted_content = format_transcription(content)
202
- #content_html = f"<h4>Transcription:</h4><div style='white-space: pre-wrap;'>{content}</div>"
203
- content_html = f"<h4>Transcription:</h4><div style='white-space: pre-wrap;'>{formatted_content}</div>"
204
- return details_html, content_html
205
- else:
206
- return "No details available.", "No details available."
207
- else:
208
- return "No item selected", "No item selected"
209
- else:
210
- return "No item selected", "No item selected"
211
-
212
-
213
- def format_content(content):
214
- # Format content using markdown
215
- formatted_content = f"```\n{content}\n```"
216
- return formatted_content
217
-
218
-
219
- def update_prompt_dropdown():
220
- prompt_names = list_prompts()
221
- return gr.update(choices=prompt_names)
222
-
223
-
224
- def display_prompt_details(selected_prompt):
225
- if selected_prompt:
226
- prompts = update_user_prompt(selected_prompt)
227
- if prompts["title"]: # Check if we have any details
228
- details_str = f"<h4>Details:</h4><p>{prompts['details']}</p>"
229
- system_str = f"<h4>System:</h4><p>{prompts['system_prompt']}</p>"
230
- user_str = f"<h4>User:</h4><p>{prompts['user_prompt']}</p>" if prompts['user_prompt'] else ""
231
- return details_str + system_str + user_str
232
- return "No details available."
233
-
234
- def search_media_database(query: str) -> List[Tuple[int, str, str]]:
235
- return browse_items(query, 'Title')
236
-
237
-
238
- def load_media_content(media_id: int) -> dict:
239
- try:
240
- print(f"Debug - Load Media Content - Media ID: {media_id}")
241
- item_details = fetch_item_details(media_id)
242
- print(f"Debug - Load Media Content - Item Details: \n\n{item_details}\n\n\n\n")
243
-
244
- if isinstance(item_details, tuple) and len(item_details) == 3:
245
- content, prompt, summary = item_details
246
- else:
247
- print(f"Debug - Load Media Content - Unexpected item_details format: \n\n{item_details}\n\n\n\n")
248
- content, prompt, summary = "", "", ""
249
-
250
- return {
251
- "content": content or "No content available",
252
- "prompt": prompt or "No prompt available",
253
- "summary": summary or "No summary available"
254
- }
255
- except Exception as e:
256
- print(f"Debug - Load Media Content - Error: {str(e)}")
257
- return {"content": "", "prompt": "", "summary": ""}
258
-
259
-
260
- def error_handler(func):
261
- @wraps(func)
262
- def wrapper(*args, **kwargs):
263
- try:
264
- return func(*args, **kwargs)
265
- except Exception as e:
266
- error_message = f"Error in {func.__name__}: {str(e)}"
267
- logging.error(f"{error_message}\n{traceback.format_exc()}")
268
- return {"error": error_message, "details": traceback.format_exc()}
269
- return wrapper
270
-
271
-
272
- def create_chunking_inputs():
273
- chunk_text_by_words_checkbox = gr.Checkbox(label="Chunk Text by Words", value=False, visible=True)
274
- max_words_input = gr.Number(label="Max Words", value=300, precision=0, visible=True)
275
- chunk_text_by_sentences_checkbox = gr.Checkbox(label="Chunk Text by Sentences", value=False, visible=True)
276
- max_sentences_input = gr.Number(label="Max Sentences", value=10, precision=0, visible=True)
277
- chunk_text_by_paragraphs_checkbox = gr.Checkbox(label="Chunk Text by Paragraphs", value=False, visible=True)
278
- max_paragraphs_input = gr.Number(label="Max Paragraphs", value=5, precision=0, visible=True)
279
- chunk_text_by_tokens_checkbox = gr.Checkbox(label="Chunk Text by Tokens", value=False, visible=True)
280
- max_tokens_input = gr.Number(label="Max Tokens", value=1000, precision=0, visible=True)
281
- gr_semantic_chunk_long_file = gr.Checkbox(label="Semantic Chunking by Sentence similarity", value=False, visible=True)
282
- gr_semantic_chunk_long_file_size = gr.Number(label="Max Chunk Size", value=2000, visible=True)
283
- gr_semantic_chunk_long_file_overlap = gr.Number(label="Max Chunk Overlap Size", value=100, visible=True)
284
- return [chunk_text_by_words_checkbox, max_words_input, chunk_text_by_sentences_checkbox, max_sentences_input,
285
- chunk_text_by_paragraphs_checkbox, max_paragraphs_input, chunk_text_by_tokens_checkbox, max_tokens_input]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Import_Functionality.py DELETED
@@ -1,388 +0,0 @@
1
- # Import_Functionality.py
2
- # Functionality to import content into the DB
3
- #
4
- # Imports
5
- from time import sleep
6
- import logging
7
- import re
8
- import shutil
9
- import tempfile
10
- import os
11
- import traceback
12
- import zipfile
13
- #
14
- # External Imports
15
- import gradio as gr
16
- #
17
- # Local Imports
18
- from App_Function_Libraries.DB.DB_Manager import insert_prompt_to_db, load_preset_prompts, import_obsidian_note_to_db, \
19
- add_media_to_database
20
- from App_Function_Libraries.Prompt_Handling import import_prompt_from_file, import_prompts_from_zip#
21
- from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
22
-
23
- ###################################################################################################################
24
- #
25
- # Functions:
26
-
27
- logger = logging.getLogger()
28
-
29
-
30
- def import_data(file, title, author, keywords, custom_prompt, summary, auto_summarize, api_name, api_key):
31
- logging.debug(f"Starting import_data with file: {file} / Title: {title} / Author: {author} / Keywords: {keywords}")
32
- if file is None:
33
- return "No file uploaded. Please upload a file."
34
-
35
- try:
36
- logging.debug(f"File object type: {type(file)}")
37
- logging.debug(f"File object attributes: {dir(file)}")
38
-
39
- if hasattr(file, 'name'):
40
- file_name = file.name
41
- else:
42
- file_name = 'unknown_file'
43
-
44
- # Create a temporary file
45
- with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.txt', encoding='utf-8') as temp_file:
46
- if isinstance(file, str):
47
- # If file is a string, it's likely file content
48
- temp_file.write(file)
49
- elif hasattr(file, 'read'):
50
- # If file has a 'read' method, it's likely a file-like object
51
- content = file.read()
52
- if isinstance(content, bytes):
53
- content = content.decode('utf-8')
54
- temp_file.write(content)
55
- else:
56
- # If it's neither a string nor a file-like object, try converting it to a string
57
- temp_file.write(str(file))
58
-
59
- temp_file.seek(0)
60
- file_content = temp_file.read()
61
-
62
- logging.debug(f"File name: {file_name}")
63
- logging.debug(f"File content (first 100 chars): {file_content[:100]}")
64
-
65
- # Create info_dict
66
- info_dict = {
67
- 'title': title or 'Untitled',
68
- 'uploader': author or 'Unknown',
69
- }
70
-
71
- # FIXME - Add chunking support... I added chapter chunking specifically for this...
72
- # Create segments (assuming one segment for the entire content)
73
- segments = [{'Text': file_content}]
74
-
75
- # Process keywords
76
- keyword_list = [kw.strip() for kw in keywords.split(',') if kw.strip()] if keywords else []
77
-
78
- # Handle summarization
79
- if auto_summarize and api_name and api_key:
80
- summary = perform_summarization(api_name, file_content, custom_prompt, api_key)
81
- elif not summary:
82
- summary = "No summary provided"
83
-
84
- # Add to database
85
- result = add_media_to_database(
86
- url=file_name, # Using filename as URL
87
- info_dict=info_dict,
88
- segments=segments,
89
- summary=summary,
90
- keywords=keyword_list,
91
- custom_prompt_input=custom_prompt,
92
- whisper_model="Imported", # Indicating this was an imported file
93
- media_type="document",
94
- overwrite=False # Set this to True if you want to overwrite existing entries
95
- )
96
-
97
- # Clean up the temporary file
98
- os.unlink(temp_file.name)
99
-
100
- return f"File '{file_name}' import attempt complete. Database result: {result}"
101
- except Exception as e:
102
- logging.exception(f"Error importing file: {str(e)}")
103
- return f"Error importing file: {str(e)}"
104
-
105
-
106
- def process_obsidian_zip(zip_file):
107
- with tempfile.TemporaryDirectory() as temp_dir:
108
- try:
109
- with zipfile.ZipFile(zip_file, 'r') as zip_ref:
110
- zip_ref.extractall(temp_dir)
111
-
112
- imported_files, total_files, errors = import_obsidian_vault(temp_dir)
113
-
114
- return imported_files, total_files, errors
115
- except zipfile.BadZipFile:
116
- error_msg = "The uploaded file is not a valid zip file."
117
- logger.error(error_msg)
118
- return 0, 0, [error_msg]
119
- except Exception as e:
120
- error_msg = f"Error processing zip file: {str(e)}\n{traceback.format_exc()}"
121
- logger.error(error_msg)
122
- return 0, 0, [error_msg]
123
- finally:
124
- shutil.rmtree(temp_dir, ignore_errors=True)
125
-
126
-
127
-
128
- def scan_obsidian_vault(vault_path):
129
- markdown_files = []
130
- for root, dirs, files in os.walk(vault_path):
131
- for file in files:
132
- if file.endswith('.md'):
133
- markdown_files.append(os.path.join(root, file))
134
- return markdown_files
135
-
136
-
137
- def parse_obsidian_note(file_path):
138
- with open(file_path, 'r', encoding='utf-8') as file:
139
- content = file.read()
140
-
141
- frontmatter = {}
142
- frontmatter_match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
143
- if frontmatter_match:
144
- frontmatter_text = frontmatter_match.group(1)
145
- import yaml
146
- frontmatter = yaml.safe_load(frontmatter_text)
147
- content = content[frontmatter_match.end():]
148
-
149
- tags = re.findall(r'#(\w+)', content)
150
- links = re.findall(r'\[\[(.*?)\]\]', content)
151
-
152
- return {
153
- 'title': os.path.basename(file_path).replace('.md', ''),
154
- 'content': content,
155
- 'frontmatter': frontmatter,
156
- 'tags': tags,
157
- 'links': links,
158
- 'file_path': file_path # Add this line
159
- }
160
-
161
- def create_import_single_prompt_tab():
162
- with gr.TabItem("Import a Prompt", visible=True):
163
- gr.Markdown("# Import a prompt into the database")
164
-
165
- with gr.Row():
166
- with gr.Column():
167
- import_file = gr.File(label="Upload file for import", file_types=["txt", "md"])
168
- title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content")
169
- author_input = gr.Textbox(label="Author", placeholder="Enter the author's name")
170
- system_input = gr.Textbox(label="System", placeholder="Enter the system message for the prompt", lines=3)
171
- user_input = gr.Textbox(label="User", placeholder="Enter the user message for the prompt", lines=3)
172
- keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords separated by commas")
173
- import_button = gr.Button("Import Prompt")
174
-
175
- with gr.Column():
176
- import_output = gr.Textbox(label="Import Status")
177
- save_button = gr.Button("Save to Database")
178
- save_output = gr.Textbox(label="Save Status")
179
-
180
- def handle_import(file):
181
- result = import_prompt_from_file(file)
182
- if isinstance(result, tuple) and len(result) == 5:
183
- title, author, system, user, keywords = result
184
- return gr.update(value="File successfully imported. You can now edit the content before saving."), \
185
- gr.update(value=title), gr.update(value=author), gr.update(value=system), \
186
- gr.update(value=user), gr.update(value=", ".join(keywords))
187
- else:
188
- return gr.update(value=result), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
189
-
190
- import_button.click(
191
- fn=handle_import,
192
- inputs=[import_file],
193
- outputs=[import_output, title_input, author_input, system_input, user_input, keywords_input]
194
- )
195
-
196
- def save_prompt_to_db(title, author, system, user, keywords):
197
- keyword_list = [k.strip() for k in keywords.split(',') if k.strip()]
198
- return insert_prompt_to_db(title, author, system, user, keyword_list)
199
-
200
- save_button.click(
201
- fn=save_prompt_to_db,
202
- inputs=[title_input, author_input, system_input, user_input, keywords_input],
203
- outputs=save_output
204
- )
205
-
206
- def update_prompt_dropdown():
207
- return gr.update(choices=load_preset_prompts())
208
-
209
- save_button.click(
210
- fn=update_prompt_dropdown,
211
- inputs=[],
212
- outputs=[gr.Dropdown(label="Select Preset Prompt")]
213
- )
214
-
215
- def create_import_item_tab():
216
- with gr.TabItem("Import Markdown/Text Files", visible=True):
217
- gr.Markdown("# Import a markdown file or text file into the database")
218
- gr.Markdown("...and have it tagged + summarized")
219
- with gr.Row():
220
- with gr.Column():
221
- import_file = gr.File(label="Upload file for import", file_types=["txt", "md"])
222
- title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content")
223
- author_input = gr.Textbox(label="Author", placeholder="Enter the author's name")
224
- keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords, comma-separated")
225
- custom_prompt_input = gr.Textbox(label="Custom Prompt",
226
- placeholder="Enter a custom prompt for summarization (optional)")
227
- summary_input = gr.Textbox(label="Summary",
228
- placeholder="Enter a summary or leave blank for auto-summarization", lines=3)
229
- auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
230
- api_name_input = gr.Dropdown(
231
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
232
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace", "Custom-OpenAI-API"],
233
- label="API for Auto-summarization"
234
- )
235
- api_key_input = gr.Textbox(label="API Key", type="password")
236
- with gr.Column():
237
- import_button = gr.Button("Import Data")
238
- import_output = gr.Textbox(label="Import Status")
239
-
240
- import_button.click(
241
- fn=import_data,
242
- inputs=[import_file, title_input, author_input, keywords_input, custom_prompt_input,
243
- summary_input, auto_summarize_checkbox, api_name_input, api_key_input],
244
- outputs=import_output
245
- )
246
-
247
-
248
- def create_import_multiple_prompts_tab():
249
- with gr.TabItem("Import Multiple Prompts", visible=True):
250
- gr.Markdown("# Import multiple prompts into the database")
251
- gr.Markdown("Upload a zip file containing multiple prompt files (txt or md)")
252
-
253
- with gr.Row():
254
- with gr.Column():
255
- zip_file = gr.File(label="Upload zip file for import", file_types=["zip"])
256
- import_button = gr.Button("Import Prompts")
257
- prompts_dropdown = gr.Dropdown(label="Select Prompt to Edit", choices=[])
258
- title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content")
259
- author_input = gr.Textbox(label="Author", placeholder="Enter the author's name")
260
- system_input = gr.Textbox(label="System", placeholder="Enter the system message for the prompt",
261
- lines=3)
262
- user_input = gr.Textbox(label="User", placeholder="Enter the user message for the prompt", lines=3)
263
- keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords separated by commas")
264
-
265
- with gr.Column():
266
- import_output = gr.Textbox(label="Import Status")
267
- save_button = gr.Button("Save to Database")
268
- save_output = gr.Textbox(label="Save Status")
269
- prompts_display = gr.Textbox(label="Identified Prompts")
270
-
271
- def handle_zip_import(zip_file):
272
- result = import_prompts_from_zip(zip_file)
273
- if isinstance(result, list):
274
- prompt_titles = [prompt['title'] for prompt in result]
275
- return gr.update(
276
- value="Zip file successfully imported. Select a prompt to edit from the dropdown."), prompt_titles, gr.update(
277
- value="\n".join(prompt_titles)), result
278
- else:
279
- return gr.update(value=result), [], gr.update(value=""), []
280
-
281
- def handle_prompt_selection(selected_title, prompts):
282
- selected_prompt = next((prompt for prompt in prompts if prompt['title'] == selected_title), None)
283
- if selected_prompt:
284
- return (
285
- selected_prompt['title'],
286
- selected_prompt.get('author', ''),
287
- selected_prompt['system'],
288
- selected_prompt.get('user', ''),
289
- ", ".join(selected_prompt.get('keywords', []))
290
- )
291
- else:
292
- return "", "", "", "", ""
293
-
294
- zip_import_state = gr.State([])
295
-
296
- import_button.click(
297
- fn=handle_zip_import,
298
- inputs=[zip_file],
299
- outputs=[import_output, prompts_dropdown, prompts_display, zip_import_state]
300
- )
301
-
302
- prompts_dropdown.change(
303
- fn=handle_prompt_selection,
304
- inputs=[prompts_dropdown, zip_import_state],
305
- outputs=[title_input, author_input, system_input, user_input, keywords_input]
306
- )
307
-
308
- def save_prompt_to_db(title, author, system, user, keywords):
309
- keyword_list = [k.strip() for k in keywords.split(',') if k.strip()]
310
- return insert_prompt_to_db(title, author, system, user, keyword_list)
311
-
312
- save_button.click(
313
- fn=save_prompt_to_db,
314
- inputs=[title_input, author_input, system_input, user_input, keywords_input],
315
- outputs=save_output
316
- )
317
-
318
- def update_prompt_dropdown():
319
- return gr.update(choices=load_preset_prompts())
320
-
321
- save_button.click(
322
- fn=update_prompt_dropdown,
323
- inputs=[],
324
- outputs=[gr.Dropdown(label="Select Preset Prompt")]
325
- )
326
-
327
-
328
- def create_import_obsidian_vault_tab():
329
- with gr.TabItem("Import Obsidian Vault", visible=True):
330
- gr.Markdown("## Import Obsidian Vault")
331
- with gr.Row():
332
- with gr.Column():
333
- vault_path_input = gr.Textbox(label="Obsidian Vault Path (Local)")
334
- vault_zip_input = gr.File(label="Upload Obsidian Vault (Zip)")
335
- with gr.Column():
336
- import_vault_button = gr.Button("Import Obsidian Vault")
337
- import_status = gr.Textbox(label="Import Status", interactive=False)
338
-
339
-
340
- def import_vault(vault_path, vault_zip):
341
- if vault_zip:
342
- imported, total, errors = process_obsidian_zip(vault_zip.name)
343
- elif vault_path:
344
- imported, total, errors = import_obsidian_vault(vault_path)
345
- else:
346
- return "Please provide either a local vault path or upload a zip file."
347
-
348
- status = f"Imported {imported} out of {total} files.\n"
349
- if errors:
350
- status += f"Encountered {len(errors)} errors:\n" + "\n".join(errors)
351
- return status
352
-
353
-
354
- import_vault_button.click(
355
- fn=import_vault,
356
- inputs=[vault_path_input, vault_zip_input],
357
- outputs=[import_status],
358
- )
359
-
360
-
361
- def import_obsidian_vault(vault_path, progress=gr.Progress()):
362
- try:
363
- markdown_files = scan_obsidian_vault(vault_path)
364
- total_files = len(markdown_files)
365
- imported_files = 0
366
- errors = []
367
-
368
- for i, file_path in enumerate(markdown_files):
369
- try:
370
- note_data = parse_obsidian_note(file_path)
371
- success, error_msg = import_obsidian_note_to_db(note_data)
372
- if success:
373
- imported_files += 1
374
- else:
375
- errors.append(error_msg)
376
- except Exception as e:
377
- error_msg = f"Error processing {file_path}: {str(e)}"
378
- logger.error(error_msg)
379
- errors.append(error_msg)
380
-
381
- progress((i + 1) / total_files, f"Imported {imported_files} of {total_files} files")
382
- sleep(0.1) # Small delay to prevent UI freezing
383
-
384
- return imported_files, total_files, errors
385
- except Exception as e:
386
- error_msg = f"Error scanning vault: {str(e)}\n{traceback.format_exc()}"
387
- logger.error(error_msg)
388
- return 0, 0, [error_msg]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Introduction_tab.py DELETED
@@ -1,167 +0,0 @@
1
- # Introduction_tab.py
2
- # Gradio UI functions for the Introduction tab
3
- #
4
- # Imports
5
- #
6
- # External Imports
7
- import gradio as gr
8
- #
9
- # Local Imports
10
- from App_Function_Libraries.DB.DB_Manager import get_db_config
11
- #
12
- ####################################################################################################
13
- #
14
- # Functions:
15
-
16
-
17
-
18
- def create_introduction_tab():
19
- with gr.TabItem("Introduction", visible=True):
20
- db_config = get_db_config()
21
- db_type = db_config['type']
22
- gr.Markdown(f"# tldw: Your LLM-powered Research Multi-tool (Using {db_type.capitalize()} Database)")
23
- with gr.Row():
24
- with gr.Column():
25
- gr.Markdown("""### What can it do?
26
- - Transcribe and summarize videos from URLs/Local files
27
- - Transcribe and Summarize Audio files/Podcasts (URL/local file)
28
- - Summarize articles from URLs/Local notes
29
- - Ingest and summarize books(epub/PDF)
30
- - Ingest and summarize research papers (PDFs - WIP)
31
- - Search and display ingested content + summaries
32
- - Create and manage custom prompts
33
- - Chat with an LLM of your choice to generate content using the selected item + Prompts
34
- - Keyword support for content search and display
35
- - Export keywords/items to markdown/CSV(csv is wip)
36
- - Import existing notes from Obsidian to the database (Markdown/txt files or a zip containing a collection of files)
37
- - View and manage chat history
38
- - Writing Tools: Grammar & Style check, Tone Analyzer & Editor, more planned...
39
- - RAG (Retrieval-Augmented Generation) support for content generation(think about asking questions about your entire library of items)
40
- - More features planned...
41
- - All powered by your choice of LLM.
42
- - Currently supports: Local-LLM(llamafile-server), OpenAI, Anthropic, Cohere, Groq, DeepSeek, OpenRouter, Llama.cpp, Kobold, Ooba, Tabbyapi, VLLM and more to come...
43
- - All data is stored locally in a SQLite database for easy access and management.
44
- - No trackers (Gradio has some analytics but it's disabled here...)
45
- - No ads, no tracking, no BS. Just you and your content.
46
- - Open-source and free to use. Contributions welcome!
47
- - If you have any thoughts or feedback, please let me know on github or via email.
48
- """)
49
- gr.Markdown(
50
- """Follow this project at [tl/dw: Too Long, Didn't Watch - Your Personal Research Multi-Tool - GitHub](https://github.com/rmusser01/tldw)""")
51
- with gr.Column():
52
- gr.Markdown("""### How to use:
53
- ##### Quick Start: Just click on the appropriate tab for what you're trying to do and fill in the required fields. Click "Process <video/audio/article/etc>" and wait for the results.
54
- #### Simple Instructions
55
- - Basic Usage:
56
- - If you don't have an API key/don't know what an LLM is/don't know what an API key is, please look further down the page for information on getting started.
57
- - If you want summaries/chat with an LLM, you'll need:
58
- 1. An API key for the LLM API service you want to use, or,
59
- 2. A local inference server running an LLM (like llamafile-server/llama.cpp - for instructions on how to do so see the projects README or below), or,
60
- 3. A "local" inference server you have access to running an LLM.
61
- - If you just want transcriptions you can ignore the above.
62
- - Select the tab for the task you want to perform
63
- - Fill in the required fields
64
- - Click the "Process" button
65
- - Wait for the results to appear
66
- - Download the results if needed
67
- - Repeat as needed
68
- - As of writing this, the UI is still a work in progress.
69
- - That being said, I plan to replace it all eventually. In the meantime, please have patience.
70
- - The UI is divided into tabs for different tasks.
71
- - Each tab has a set of fields that you can fill in to perform the task.
72
- - Some fields are mandatory, some are optional.
73
- - The fields are mostly self-explanatory, but I will try to add more detailed instructions as I go.
74
- #### Detailed Usage:
75
- - There are 8 Top-level tabs in the UI. Each tab has a specific set of tasks that you can perform by selecting one of the 'sub-tabs' made available by clicking on the top tab.
76
- - The tabs are as follows:
77
- 1. Transcription / Summarization / Ingestion - This tab is for processing videos, audio files, articles, books, and PDFs/office docs.
78
- 2. Search / Detailed View - This tab is for searching and displaying content from the database. You can also view detailed information about the selected item.
79
- 3. Chat with an LLM - This tab is for chatting with an LLM to generate content based on the selected item and prompts.
80
- 4. Edit Existing Items - This tab is for editing existing items in the database (Prompts + ingested items).
81
- 5. Writing Tools - This tab is for using various writing tools like Grammar & Style check, Tone Analyzer & Editor, etc.
82
- 6. Keywords - This tab is for managing keywords for content search and display.
83
- 7. Import/Export - This tab is for importing notes from Obsidian and exporting keywords/items to markdown/CSV.
84
- 8. Utilities - This tab contains some random utilities that I thought might be useful.
85
- - Each sub-tab is responsible for that set of functionality. This is reflected in the codebase as well, where I have split the functionality into separate files for each tab/larger goal.
86
- """)
87
- with gr.Row():
88
- gr.Markdown("""### HELP! I don't know what any of this this shit is!
89
- ### DON'T PANIC
90
- #### Its ok, you're not alone, most people have no clue what any of this stuff is.
91
- - So let's try and fix that.
92
-
93
- #### Introduction to LLMs:
94
- - Non-Technical introduction to Generative AI and LLMs: https://paruir.medium.com/understanding-generative-ai-and-llms-a-non-technical-overview-part-1-788c0eb0dd64
95
- - Google's Intro to LLMs: https://developers.google.com/machine-learning/resources/intro-llms#llm_considerations
96
- - LLMs 101(coming from a tech background): https://vinija.ai/models/LLM/
97
- - LLM Fundamentals / LLM Scientist / LLM Engineer courses(Free): https://github.com/mlabonne/llm-course
98
-
99
- #### Various Phrases & Terms to know
100
- - **LLM** - Large Language Model - A type of neural network that can generate human-like text.
101
- - **API** - Application Programming Interface - A set of rules and protocols that allows one software application to communicate with another.
102
- * Think of it like a post address for a piece of software. You can send messages to and from it.
103
- - **API Key** - A unique identifier that is used to authenticate a user, developer, or calling program to an API.
104
- * Like the key to a post office box. You need it to access the contents.
105
- - **GUI** - Graphical User Interface - the thing facilitating your interact with this application.
106
- - **DB** - Database
107
- - **Prompt Engineering** - The process of designing prompts that are used to guide the output of a language model. Is a meme but also very much not.
108
- - **Quantization** - The process of converting a continuous range of values into a finite range of discrete values.
109
- * https://github.com/ggerganov/llama.cpp/blob/cddae4884c853b1a7ab420458236d666e2e34423/examples/quantize/README.md#L27
110
- - **GGUF Files** - GGUF is a binary format that is designed for fast loading and saving of models, and for ease of reading. Models are traditionally developed using PyTorch or another framework, and then converted to GGUF for use in GGML. https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
111
- - **Inference Engine** - A software system that is designed to execute a model that has been trained by a machine learning algorithm. Llama.cpp and Kobold.cpp are examples of inference engines.
112
- - **Abliteration** - https://huggingface.co/blog/mlabonne/abliteration
113
- """)
114
- with gr.Row():
115
- gr.Markdown("""### Ok cool, but how do I get started? I don't have an API key or a local server running...
116
- #### Great, glad you asked! Getting Started:
117
- - **Getting an API key for a commercial services provider:
118
- - **OpenAI:**
119
- * https://platform.openai.com/docs/quickstart
120
- - **Anthropic:**
121
- * https://docs.anthropic.com/en/api/getting-started
122
- - **Cohere:**
123
- * https://docs.cohere.com/
124
- * They offer 1k free requests a month(up to 1million tokens total I think?), so you can try it out without paying.
125
- - **Groq:**
126
- * https://console.groq.com/keys
127
- * Offer an account with free credits to try out their service. No idea how much you get.
128
- - **DeepSeek:**
129
- * https://platform.deepseek.com/ (Chinese-hosted/is in english)
130
- - **OpenRouter:**
131
- * https://openrouter.ai/
132
- - **Mistral:**
133
- * https://console.mistral.ai/
134
- - **Choosing a Model to download**
135
- - You'll first need to select a model you want to use with the server.
136
- - Keep in mind that the model you select will determine the quality of the output you get, and that models run fastest when offloaded fully to your GPU.
137
- * So this means that you can run a large model (Command-R) on CPU+System RAM, but you're gonna see a massive performance hit. Not saying its unusable, but it's not ideal.
138
- * With that in mind, I would recommend an abliterated version of Meta's Llama3.1 model for most tasks. (Abliterated since it won't refuse requests)
139
- * I say this because of the general quality of the model + it's context size.
140
- * You can find the model here: https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
141
- * And the Q8 quant(total size 8.6GB): https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/resolve/main/meta-llama-3.1-8b-instruct-abliterated.Q8_0.gguf?download=true
142
- - **Local Inference Server:**
143
- - **Llamafile-Server (wrapper for llama.cpp):**
144
- * Run this script with the `--local_llm` argument next time, and you'll be walked through setting up a local instance of llamafile-server.
145
- - **Llama.cpp Inference Engine:**
146
- * Download the latest release for your platform here: https://github.com/ggerganov/llama.cpp/releases
147
- * Windows: `llama-<release_number>-bin-win-cuda-cu<11.7.1 or 12.2.0 - version depends on installed cuda>-x64.zip`
148
- * Run it: `llama-server.exe --model <path_to_model> -ctx 8192 -ngl 999`
149
- - `-ctx 8192` sets the context size to 8192 tokens, `-ngl 999` sets the number of layers to offload to the GPU to 999. (essentially ensuring we only use our GPU and not CPU for processing)
150
- * Macos: `llama-<release_number>-bin-macos-arm64.zip - for Apple Silicon / `llama-<release_number>-bin-macos-x64.zip` - for Intel Macs
151
- * Run it: `llama-server --model <path_to_model> -ctx 8192 -ngl 999`
152
- - `-ctx 8192` sets the context size to 8192 tokens, `-ngl 999` sets the number of layers to offload to the GPU to 999. (essentially ensuring we only use our GPU and not CPU for processing)
153
- * Linux: You can probably figure it out.
154
- - **Kobold.cpp Server:**
155
- 1. Download from here: https://github.com/LostRuins/koboldcpp/releases/latest
156
- 2. `Double click KoboldCPP.exe and select model OR run "KoboldCPP.exe --help" in CMD prompt to get command line arguments for more control.`
157
- 3. `Generally you don't have to change much besides the Presets and GPU Layers. Run with CuBLAS or CLBlast for GPU acceleration.`
158
- 4. `Select your GGUF or GGML model you downloaded earlier, and connect to the displayed URL once it finishes loading.`
159
- - **Linux**
160
- 1. `On Linux, we provide a koboldcpp-linux-x64 PyInstaller prebuilt binary on the releases page for modern systems. Simply download and run the binary.`
161
- * Alternatively, you can also install koboldcpp to the current directory by running the following terminal command: `curl -fLo koboldcpp https://github.com/LostRuins/koboldcpp/releases/latest/download/koboldcpp-linux-x64 && chmod +x koboldcpp`
162
- 2. When you can't use the precompiled binary directly, we provide an automated build script which uses conda to obtain all dependencies, and generates (from source) a ready-to-use a pyinstaller binary for linux users. Simply execute the build script with `./koboldcpp.sh dist` and run the generated binary.
163
- """)
164
-
165
- #
166
- # End of Introduction_tab.py
167
- ####################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Keywords.py DELETED
@@ -1,65 +0,0 @@
1
- # Keywords.py
2
- # Purpose: This file contains the functions to create the Keywords tab in the Gradio UI.
3
- #
4
- # The Keywords tab allows the user to add, delete, view, and export keywords from the database.
5
- #
6
- # Imports:
7
-
8
- #
9
- # External Imports
10
- import gradio as gr
11
- #
12
- # Internal Imports
13
- from App_Function_Libraries.DB.DB_Manager import add_keyword, delete_keyword, keywords_browser_interface, export_keywords_to_csv
14
- #
15
- #
16
- ######################################################################################################################
17
- #
18
- # Functions:
19
-
20
-
21
- def create_export_keywords_tab():
22
- with gr.TabItem("Export Keywords", visible=True):
23
- with gr.Row():
24
- with gr.Column():
25
- export_keywords_button = gr.Button("Export Keywords")
26
- with gr.Column():
27
- export_keywords_output = gr.File(label="Download Exported Keywords")
28
- export_keywords_status = gr.Textbox(label="Export Status")
29
-
30
- export_keywords_button.click(
31
- fn=export_keywords_to_csv,
32
- outputs=[export_keywords_status, export_keywords_output]
33
- )
34
-
35
- def create_view_keywords_tab():
36
- with gr.TabItem("View Keywords", visible=True):
37
- gr.Markdown("# Browse Keywords")
38
- with gr.Column():
39
- browse_output = gr.Markdown()
40
- browse_button = gr.Button("View Existing Keywords")
41
- browse_button.click(fn=keywords_browser_interface, outputs=browse_output)
42
-
43
-
44
- def create_add_keyword_tab():
45
- with gr.TabItem("Add Keywords", visible=True):
46
- with gr.Row():
47
- with gr.Column():
48
- gr.Markdown("# Add Keywords to the Database")
49
- add_input = gr.Textbox(label="Add Keywords (comma-separated)", placeholder="Enter keywords here...")
50
- add_button = gr.Button("Add Keywords")
51
- with gr.Row():
52
- add_output = gr.Textbox(label="Result")
53
- add_button.click(fn=add_keyword, inputs=add_input, outputs=add_output)
54
-
55
-
56
- def create_delete_keyword_tab():
57
- with gr.Tab("Delete Keywords", visible=True):
58
- with gr.Row():
59
- with gr.Column():
60
- gr.Markdown("# Delete Keywords from the Database")
61
- delete_input = gr.Textbox(label="Delete Keyword", placeholder="Enter keyword to delete here...")
62
- delete_button = gr.Button("Delete Keyword")
63
- with gr.Row():
64
- delete_output = gr.Textbox(label="Result")
65
- delete_button.click(fn=delete_keyword, inputs=delete_input, outputs=delete_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Live_Recording.py DELETED
@@ -1,142 +0,0 @@
1
- # Live_Recording.py
2
- # Description: Gradio UI for live audio recording and transcription.
3
- #
4
- # Import necessary modules and functions
5
- import logging
6
- import os
7
- import time
8
-
9
- # External Imports
10
- import gradio as gr
11
- # Local Imports
12
- from App_Function_Libraries.Audio.Audio_Transcription_Lib import (record_audio, speech_to_text, save_audio_temp,
13
- stop_recording)
14
- from App_Function_Libraries.DB.DB_Manager import add_media_to_database
15
- from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
16
- #
17
- #######################################################################################################################
18
- #
19
- # Functions:
20
-
21
- whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large", "large-v1", "large-v2", "large-v3",
22
- "distil-large-v2", "distil-medium.en", "distil-small.en"]
23
-
24
- def create_live_recording_tab():
25
- with gr.Tab("Live Recording and Transcription", visible=True):
26
- gr.Markdown("# Live Audio Recording and Transcription")
27
- with gr.Row():
28
- with gr.Column():
29
- duration = gr.Slider(minimum=1, maximum=8000, value=15, label="Recording Duration (seconds)")
30
- whisper_models_input = gr.Dropdown(choices=whisper_models, value="medium", label="Whisper Model")
31
- vad_filter = gr.Checkbox(label="Use VAD Filter")
32
- save_recording = gr.Checkbox(label="Save Recording")
33
- save_to_db = gr.Checkbox(label="Save Transcription to Database(Must be checked to save - can be checked afer transcription)", value=False)
34
- custom_title = gr.Textbox(label="Custom Title (for database)", visible=False)
35
- record_button = gr.Button("Start Recording")
36
- stop_button = gr.Button("Stop Recording")
37
- with gr.Column():
38
- output = gr.Textbox(label="Transcription", lines=10)
39
- audio_output = gr.Audio(label="Recorded Audio", visible=False)
40
-
41
- recording_state = gr.State(value=None)
42
-
43
- def start_recording(duration):
44
- log_counter("live_recording_start_attempt", labels={"duration": duration})
45
- p, stream, audio_queue, stop_event, audio_thread = record_audio(duration)
46
- log_counter("live_recording_start_success", labels={"duration": duration})
47
- return (p, stream, audio_queue, stop_event, audio_thread)
48
-
49
- def end_recording_and_transcribe(recording_state, whisper_model, vad_filter, save_recording, save_to_db, custom_title):
50
- log_counter("live_recording_end_attempt", labels={"model": whisper_model})
51
- start_time = time.time()
52
-
53
- if recording_state is None:
54
- log_counter("live_recording_end_error", labels={"error": "Recording hasn't started yet"})
55
- return "Recording hasn't started yet.", None
56
-
57
- p, stream, audio_queue, stop_event, audio_thread = recording_state
58
- audio_data = stop_recording(p, stream, audio_queue, stop_event, audio_thread)
59
-
60
- temp_file = save_audio_temp(audio_data)
61
- segments = speech_to_text(temp_file, whisper_model=whisper_model, vad_filter=vad_filter)
62
- transcription = "\n".join([segment["Text"] for segment in segments])
63
-
64
- if save_recording:
65
- log_counter("live_recording_saved", labels={"model": whisper_model})
66
- else:
67
- os.remove(temp_file)
68
-
69
- end_time = time.time() - start_time
70
- log_histogram("live_recording_end_duration", end_time, labels={"model": whisper_model})
71
- log_counter("live_recording_end_success", labels={"model": whisper_model})
72
- return transcription, temp_file if save_recording else None
73
-
74
- def save_transcription_to_db(transcription, custom_title):
75
- log_counter("save_transcription_to_db_attempt")
76
- start_time = time.time()
77
- if custom_title.strip() == "":
78
- custom_title = "Self-recorded Audio"
79
-
80
- try:
81
- url = "self_recorded"
82
- info_dict = {
83
- "title": custom_title,
84
- "uploader": "self-recorded",
85
- "webpage_url": url
86
- }
87
- segments = [{"Text": transcription}]
88
- summary = ""
89
- keywords = ["self-recorded", "audio"]
90
- custom_prompt_input = ""
91
- whisper_model = "self-recorded"
92
- media_type = "audio"
93
-
94
- result = add_media_to_database(
95
- url=url,
96
- info_dict=info_dict,
97
- segments=segments,
98
- summary=summary,
99
- keywords=keywords,
100
- custom_prompt_input=custom_prompt_input,
101
- whisper_model=whisper_model,
102
- media_type=media_type
103
- )
104
- end_time = time.time() - start_time
105
- log_histogram("save_transcription_to_db_duration", end_time)
106
- log_counter("save_transcription_to_db_success")
107
- return f"Transcription saved to database successfully. {result}"
108
- except Exception as e:
109
- logging.error(f"Error saving transcription to database: {str(e)}")
110
- log_counter("save_transcription_to_db_error", labels={"error": str(e)})
111
- return f"Error saving transcription to database: {str(e)}"
112
-
113
- def update_custom_title_visibility(save_to_db):
114
- return gr.update(visible=save_to_db)
115
-
116
- record_button.click(
117
- fn=start_recording,
118
- inputs=[duration],
119
- outputs=[recording_state]
120
- )
121
-
122
- stop_button.click(
123
- fn=end_recording_and_transcribe,
124
- inputs=[recording_state, whisper_models_input, vad_filter, save_recording, save_to_db, custom_title],
125
- outputs=[output, audio_output]
126
- )
127
-
128
- save_to_db.change(
129
- fn=update_custom_title_visibility,
130
- inputs=[save_to_db],
131
- outputs=[custom_title]
132
- )
133
-
134
- gr.Button("Save to Database").click(
135
- fn=save_transcription_to_db,
136
- inputs=[output, custom_title],
137
- outputs=gr.Textbox(label="Database Save Status")
138
- )
139
-
140
- #
141
- # End of Functions
142
- ########################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Llamafile_tab.py DELETED
@@ -1,312 +0,0 @@
1
- # Llamafile_tab.py
2
- # Description: Gradio interface for configuring and launching Llamafile with Local LLMs
3
-
4
- # Imports
5
- import os
6
- import logging
7
- from typing import Tuple, Optional
8
- import gradio as gr
9
-
10
-
11
- from App_Function_Libraries.Local_LLM.Local_LLM_Inference_Engine_Lib import (
12
- download_llm_model,
13
- llm_models,
14
- start_llamafile,
15
- get_gguf_llamafile_files
16
- )
17
- #
18
- #######################################################################################################################
19
- #
20
- # Functions:
21
-
22
- def create_chat_with_llamafile_tab():
23
- # Function to update model path based on selection
24
- def on_local_model_change(selected_model: str, search_directory: str) -> str:
25
- if selected_model and isinstance(search_directory, str):
26
- model_path = os.path.abspath(os.path.join(search_directory, selected_model))
27
- logging.debug(f"Selected model path: {model_path}") # Debug print for selected model path
28
- return model_path
29
- return "Invalid selection or directory."
30
-
31
- # Function to update the dropdown with available models
32
- def update_dropdowns(search_directory: str) -> Tuple[dict, str]:
33
- logging.debug(f"User-entered directory: {search_directory}") # Debug print for directory
34
- if not os.path.isdir(search_directory):
35
- logging.debug(f"Directory does not exist: {search_directory}") # Debug print for non-existing directory
36
- return gr.update(choices=[], value=None), "Directory does not exist."
37
-
38
- logging.debug(f"Directory exists: {search_directory}, scanning for files...") # Confirm directory exists
39
- model_files = get_gguf_llamafile_files(search_directory)
40
-
41
- if not model_files:
42
- logging.debug(f"No model files found in {search_directory}") # Debug print for no files found
43
- return gr.update(choices=[], value=None), "No model files found in the specified directory."
44
-
45
- # Update the dropdown choices with the model files found
46
- logging.debug(f"Models loaded from {search_directory}: {model_files}") # Debug: Print model files loaded
47
- return gr.update(choices=model_files, value=None), f"Models loaded from {search_directory}."
48
-
49
-
50
-
51
- def download_preset_model(selected_model: str) -> Tuple[str, str]:
52
- """
53
- Downloads the selected preset model.
54
-
55
- Args:
56
- selected_model (str): The key of the selected preset model.
57
-
58
- Returns:
59
- Tuple[str, str]: Status message and the path to the downloaded model.
60
- """
61
- model_info = llm_models.get(selected_model)
62
- if not model_info:
63
- return "Invalid model selection.", ""
64
-
65
- try:
66
- model_path = download_llm_model(
67
- model_name=model_info["name"],
68
- model_url=model_info["url"],
69
- model_filename=model_info["filename"],
70
- model_hash=model_info["hash"]
71
- )
72
- return f"Model '{model_info['name']}' downloaded successfully.", model_path
73
- except Exception as e:
74
- logging.error(f"Error downloading model: {e}")
75
- return f"Failed to download model: {e}", ""
76
-
77
- with gr.TabItem("Local LLM with Llamafile", visible=True):
78
- gr.Markdown("# Settings for Llamafile")
79
-
80
- with gr.Row():
81
- with gr.Column():
82
- am_noob = gr.Checkbox(label="Enable Sane Defaults", value=False, visible=True)
83
- advanced_mode_toggle = gr.Checkbox(label="Advanced Mode - Show All Settings", value=False)
84
- # Advanced Inputs
85
- verbose_checked = gr.Checkbox(label="Enable Verbose Output", value=False, visible=False)
86
- threads_checked = gr.Checkbox(label="Set CPU Threads", value=False, visible=False)
87
- threads_value = gr.Number(label="Number of CPU Threads", value=None, precision=0, visible=False)
88
- threads_batched_checked = gr.Checkbox(label="Enable Batched Inference", value=False, visible=False)
89
- threads_batched_value = gr.Number(label="Batch Size for Inference", value=None, precision=0, visible=False)
90
- model_alias_checked = gr.Checkbox(label="Set Model Alias", value=False, visible=False)
91
- model_alias_value = gr.Textbox(label="Model Alias", value="", visible=False)
92
- ctx_size_checked = gr.Checkbox(label="Set Prompt Context Size", value=False, visible=False)
93
- ctx_size_value = gr.Number(label="Prompt Context Size", value=8124, precision=0, visible=False)
94
- ngl_checked = gr.Checkbox(label="Enable GPU Layers", value=False, visible=True)
95
- ngl_value = gr.Number(label="Number of GPU Layers", value=None, precision=0, visible=True)
96
- batch_size_checked = gr.Checkbox(label="Set Batch Size", value=False, visible=False)
97
- batch_size_value = gr.Number(label="Batch Size", value=512, visible=False)
98
- memory_f32_checked = gr.Checkbox(label="Use 32-bit Floating Point", value=False, visible=False)
99
- numa_checked = gr.Checkbox(label="Enable NUMA", value=False, visible=False)
100
- server_timeout_value = gr.Number(label="Server Timeout", value=600, precision=0, visible=False)
101
- host_checked = gr.Checkbox(label="Set IP to Listen On", value=False, visible=False)
102
- host_value = gr.Textbox(label="Host IP Address", value="", visible=False)
103
- port_checked = gr.Checkbox(label="Set Server Port", value=False, visible=False)
104
- port_value = gr.Number(label="Port Number", value=8080, precision=0, visible=False)
105
- api_key_checked = gr.Checkbox(label="Set API Key", value=False, visible=False)
106
- api_key_value = gr.Textbox(label="API Key", value="", visible=False)
107
- http_threads_checked = gr.Checkbox(label="Set HTTP Server Threads", value=False, visible=False)
108
- http_threads_value = gr.Number(label="Number of HTTP Server Threads", value=None, precision=0, visible=False)
109
- hf_repo_checked = gr.Checkbox(label="Use Huggingface Repo Model", value=False, visible=False)
110
- hf_repo_value = gr.Textbox(label="Huggingface Repo Name", value="", visible=False)
111
- hf_file_checked = gr.Checkbox(label="Set Huggingface Model File", value=False, visible=False)
112
- hf_file_value = gr.Textbox(label="Huggingface Model File", value="", visible=False)
113
-
114
- with gr.Column():
115
- # Model Selection Section
116
- gr.Markdown("## Model Selection")
117
-
118
- # Option 1: Select from Local Filesystem
119
- with gr.Row():
120
- search_directory = gr.Textbox(label="Model Directory",
121
- placeholder="Enter directory path(currently '.\Models')",
122
- value=".\Models",
123
- interactive=True)
124
-
125
- # Initial population of local models
126
- initial_dropdown_update, _ = update_dropdowns(".\Models")
127
- refresh_button = gr.Button("Refresh Models")
128
- local_model_dropdown = gr.Dropdown(label="Select Model from Directory", choices=[])
129
- # Display selected model path
130
- model_value = gr.Textbox(label="Selected Model File Path", value="", interactive=False)
131
-
132
- # Option 2: Download Preset Models
133
- gr.Markdown("## Download Preset Models")
134
-
135
- preset_model_dropdown = gr.Dropdown(
136
- label="Select a Preset Model",
137
- choices=list(llm_models.keys()),
138
- value=None,
139
- interactive=True,
140
- info="Choose a preset model to download."
141
- )
142
- download_preset_button = gr.Button("Download Selected Preset")
143
-
144
- with gr.Row():
145
- with gr.Column():
146
- start_button = gr.Button("Start Llamafile")
147
- stop_button = gr.Button("Stop Llamafile (doesn't work)")
148
- output_display = gr.Markdown()
149
-
150
-
151
- # Show/hide advanced inputs based on toggle
152
- def update_visibility(show_advanced: bool):
153
- components = [
154
- verbose_checked, threads_checked, threads_value,
155
- http_threads_checked, http_threads_value,
156
- hf_repo_checked, hf_repo_value,
157
- hf_file_checked, hf_file_value,
158
- ctx_size_checked, ctx_size_value,
159
- ngl_checked, ngl_value,
160
- host_checked, host_value,
161
- port_checked, port_value
162
- ]
163
- return [gr.update(visible=show_advanced) for _ in components]
164
-
165
- def on_start_button_click(
166
- am_noob: bool,
167
- verbose_checked: bool,
168
- threads_checked: bool,
169
- threads_value: Optional[int],
170
- threads_batched_checked: bool,
171
- threads_batched_value: Optional[int],
172
- model_alias_checked: bool,
173
- model_alias_value: str,
174
- http_threads_checked: bool,
175
- http_threads_value: Optional[int],
176
- model_value: str,
177
- hf_repo_checked: bool,
178
- hf_repo_value: str,
179
- hf_file_checked: bool,
180
- hf_file_value: str,
181
- ctx_size_checked: bool,
182
- ctx_size_value: Optional[int],
183
- ngl_checked: bool,
184
- ngl_value: Optional[int],
185
- batch_size_checked: bool,
186
- batch_size_value: Optional[int],
187
- memory_f32_checked: bool,
188
- numa_checked: bool,
189
- server_timeout_value: Optional[int],
190
- host_checked: bool,
191
- host_value: str,
192
- port_checked: bool,
193
- port_value: Optional[int],
194
- api_key_checked: bool,
195
- api_key_value: str
196
- ) -> str:
197
- """
198
- Event handler for the Start Llamafile button.
199
- """
200
- try:
201
- result = start_llamafile(
202
- am_noob,
203
- verbose_checked,
204
- threads_checked,
205
- threads_value,
206
- threads_batched_checked,
207
- threads_batched_value,
208
- model_alias_checked,
209
- model_alias_value,
210
- http_threads_checked,
211
- http_threads_value,
212
- model_value,
213
- hf_repo_checked,
214
- hf_repo_value,
215
- hf_file_checked,
216
- hf_file_value,
217
- ctx_size_checked,
218
- ctx_size_value,
219
- ngl_checked,
220
- ngl_value,
221
- batch_size_checked,
222
- batch_size_value,
223
- memory_f32_checked,
224
- numa_checked,
225
- server_timeout_value,
226
- host_checked,
227
- host_value,
228
- port_checked,
229
- port_value,
230
- api_key_checked,
231
- api_key_value
232
- )
233
- return result
234
- except Exception as e:
235
- logging.error(f"Error starting Llamafile: {e}")
236
- return f"Failed to start Llamafile: {e}"
237
-
238
- advanced_mode_toggle.change(
239
- fn=update_visibility,
240
- inputs=[advanced_mode_toggle],
241
- outputs=[
242
- verbose_checked, threads_checked, threads_value,
243
- http_threads_checked, http_threads_value,
244
- hf_repo_checked, hf_repo_value,
245
- hf_file_checked, hf_file_value,
246
- ctx_size_checked, ctx_size_value,
247
- ngl_checked, ngl_value,
248
- host_checked, host_value,
249
- port_checked, port_value
250
- ]
251
- )
252
-
253
- start_button.click(
254
- fn=on_start_button_click,
255
- inputs=[
256
- am_noob,
257
- verbose_checked,
258
- threads_checked,
259
- threads_value,
260
- threads_batched_checked,
261
- threads_batched_value,
262
- model_alias_checked,
263
- model_alias_value,
264
- http_threads_checked,
265
- http_threads_value,
266
- model_value,
267
- hf_repo_checked,
268
- hf_repo_value,
269
- hf_file_checked,
270
- hf_file_value,
271
- ctx_size_checked,
272
- ctx_size_value,
273
- ngl_checked,
274
- ngl_value,
275
- batch_size_checked,
276
- batch_size_value,
277
- memory_f32_checked,
278
- numa_checked,
279
- server_timeout_value,
280
- host_checked,
281
- host_value,
282
- port_checked,
283
- port_value,
284
- api_key_checked,
285
- api_key_value
286
- ],
287
- outputs=output_display
288
- )
289
-
290
- download_preset_button.click(
291
- fn=download_preset_model,
292
- inputs=[preset_model_dropdown],
293
- outputs=[output_display, model_value]
294
- )
295
-
296
- # Click event for refreshing models
297
- refresh_button.click(
298
- fn=update_dropdowns,
299
- inputs=[search_directory], # Ensure that the directory path (string) is passed
300
- outputs=[local_model_dropdown, output_display] # Update dropdown and status
301
- )
302
-
303
- # Event to update model_value when a model is selected from the dropdown
304
- local_model_dropdown.change(
305
- fn=on_local_model_change, # Function that calculates the model path
306
- inputs=[local_model_dropdown, search_directory], # Inputs: selected model and directory
307
- outputs=[model_value] # Output: Update the model_value textbox with the selected model path
308
- )
309
-
310
- #
311
- #
312
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/MMLU_Pro_tab.py DELETED
@@ -1,115 +0,0 @@
1
- # MMLU_Pro_tab.py
2
- # is a library that contains the Gradio UI code for the MMLU-Pro benchmarking tool.
3
- #
4
- ##############################################################################################################
5
- # Imports
6
- import os
7
-
8
- import gradio as gr
9
- import logging
10
- #
11
- # External Imports
12
- from tqdm import tqdm
13
- # Local Imports
14
- from App_Function_Libraries.Benchmarks_Evaluations.MMLU_Pro.MMLU_Pro_rewritten import (
15
- load_mmlu_pro, run_mmlu_pro_benchmark, mmlu_pro_main, load_mmlu_pro_config
16
- )
17
- #
18
- ##############################################################################################################
19
- #
20
- # Functions:
21
-
22
- # Set up logging
23
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
24
- logger = logging.getLogger(__name__)
25
-
26
-
27
- def get_categories():
28
- """Fetch categories using the dataset loader from MMLU_Pro_rewritten.py"""
29
- try:
30
- test_data, _ = load_mmlu_pro() # Use the function from MMLU_Pro_rewritten.py
31
- return list(test_data.keys()) # Return the categories from the test dataset
32
- except Exception as e:
33
- logger.error(f"Failed to load categories: {e}")
34
- return ["Error loading categories"]
35
-
36
-
37
- def load_categories():
38
- """Helper function to return the categories for the Gradio dropdown."""
39
- categories = get_categories() # Fetch categories from the dataset
40
- if categories:
41
- return gr.update(choices=categories, value=categories[0]) # Update dropdown with categories
42
- else:
43
- return gr.update(choices=["Error loading categories"], value="Error loading categories")
44
-
45
-
46
- def run_benchmark_from_ui(url, api_key, model, timeout, category, parallel, verbosity, log_prompt):
47
- """Function to run the benchmark with parameters from the UI."""
48
-
49
- # Override config with UI parameters
50
- config = load_mmlu_pro_config(
51
- url=url,
52
- api_key=api_key,
53
- model=model,
54
- timeout=timeout,
55
- categories=[category] if category else None,
56
- parallel=parallel,
57
- verbosity=verbosity,
58
- log_prompt=log_prompt
59
- )
60
-
61
- # Run the benchmarking process
62
- try:
63
- # Call the main benchmarking function
64
- mmlu_pro_main()
65
-
66
- # Assume the final report is generated in "eval_results" folder
67
- report_path = os.path.join("eval_results", config["server"]["model"].replace("/", "-"), "final_report.txt")
68
-
69
- # Read the final report
70
- with open(report_path, "r") as f:
71
- report = f.read()
72
-
73
- return report
74
- except Exception as e:
75
- logger.error(f"An error occurred during benchmark execution: {e}")
76
- return f"An error occurred during benchmark execution. Please check the logs for more information. Error: {str(e)}"
77
-
78
-
79
- def create_mmlu_pro_tab():
80
- """Create the Gradio UI tab for MMLU-Pro Benchmark."""
81
- with gr.TabItem("MMLU-Pro Benchmark", visible=True):
82
- gr.Markdown("## Run MMLU-Pro Benchmark")
83
-
84
- with gr.Row():
85
- with gr.Column():
86
- # Inputs for the benchmark
87
- url = gr.Textbox(label="Server URL")
88
- api_key = gr.Textbox(label="API Key", type="password")
89
- model = gr.Textbox(label="Model Name")
90
- timeout = gr.Number(label="Timeout (seconds)", value=30)
91
- category = gr.Dropdown(label="Category", choices=["Load categories..."])
92
- load_categories_btn = gr.Button("Load Categories")
93
- parallel = gr.Slider(label="Parallel Requests", minimum=1, maximum=10, step=1, value=1)
94
- verbosity = gr.Slider(label="Verbosity Level", minimum=0, maximum=2, step=1, value=1)
95
- log_prompt = gr.Checkbox(label="Log Prompt")
96
-
97
- with gr.Column():
98
- # Run button and output display
99
- run_button = gr.Button("Run Benchmark")
100
- output = gr.Textbox(label="Benchmark Results", lines=20)
101
-
102
- # When "Load Categories" is clicked, load the categories into the dropdown
103
- load_categories_btn.click(
104
- load_categories,
105
- outputs=category
106
- )
107
-
108
- # When "Run Benchmark" is clicked, trigger the run_benchmark_from_ui function
109
- run_button.click(
110
- run_benchmark_from_ui, # Use the function defined to run the benchmark
111
- inputs=[url, api_key, model, timeout, category, parallel, verbosity, log_prompt],
112
- outputs=output
113
- )
114
-
115
- return [url, api_key, model, timeout, category, parallel, verbosity, log_prompt, run_button, output]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Media_edit.py DELETED
@@ -1,301 +0,0 @@
1
- # Media_edit.py
2
- # Functions for Gradio Media_Edit UI
3
-
4
- # Imports
5
- import logging
6
- import uuid
7
-
8
- # External Imports
9
- import gradio as gr
10
- #
11
- # Local Imports
12
- from App_Function_Libraries.DB.DB_Manager import add_prompt, update_media_content, db, add_or_update_prompt, \
13
- load_prompt_details, fetch_keywords_for_media, update_keywords_for_media
14
- from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_prompt_dropdown
15
- from App_Function_Libraries.DB.SQLite_DB import fetch_item_details
16
-
17
-
18
- def create_media_edit_tab():
19
- with gr.TabItem("Edit Existing Items", visible=True):
20
- gr.Markdown("# Search and Edit Media Items")
21
-
22
- with gr.Row():
23
- search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
24
- search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title", label="Search By")
25
- search_button = gr.Button("Search")
26
-
27
- with gr.Row():
28
- items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
29
- item_mapping = gr.State({})
30
-
31
- content_input = gr.Textbox(label="Edit Content", lines=10)
32
- prompt_input = gr.Textbox(label="Edit Prompt", lines=3)
33
- summary_input = gr.Textbox(label="Edit Summary", lines=5)
34
-
35
- # Adding keyword input box for editing
36
- keywords_input = gr.Textbox(label="Edit Keywords (comma-separated)", placeholder="Enter keywords here...")
37
-
38
- update_button = gr.Button("Update Media Content")
39
- status_message = gr.Textbox(label="Status", interactive=False)
40
-
41
- # Function to update the dropdown with search results
42
- search_button.click(
43
- fn=update_dropdown,
44
- inputs=[search_query_input, search_type_input],
45
- outputs=[items_output, item_mapping]
46
- )
47
-
48
- # Function to load selected media content including keywords
49
- def load_selected_media_content(selected_item, item_mapping):
50
- if selected_item and item_mapping and selected_item in item_mapping:
51
- media_id = item_mapping[selected_item]
52
- content, prompt, summary = fetch_item_details(media_id)
53
-
54
- # Fetch keywords for the selected item
55
- keywords = fetch_keywords_for_media(media_id)
56
- keywords_str = ", ".join(keywords) if keywords else ""
57
-
58
- return content, prompt, summary, keywords_str
59
- return "No item selected or invalid selection", "", "", ""
60
-
61
- # Load the selected media content and associated keywords
62
- items_output.change(
63
- fn=load_selected_media_content,
64
- inputs=[items_output, item_mapping],
65
- outputs=[content_input, prompt_input, summary_input, keywords_input]
66
- )
67
-
68
- # Function to update media content, prompt, summary, and keywords
69
- def update_media_with_keywords(selected_item, item_mapping, content, prompt, summary, keywords):
70
- if selected_item and item_mapping and selected_item in item_mapping:
71
- media_id = item_mapping[selected_item]
72
-
73
- # Split keywords into a list
74
- keyword_list = [kw.strip() for kw in keywords.split(",") if kw.strip()]
75
-
76
- # Update content, prompt, summary, and keywords in the database
77
- status = update_media_content(media_id, content, prompt, summary)
78
- keyword_status = update_keywords_for_media(media_id, keyword_list)
79
-
80
- return f"{status}\nKeywords: {keyword_status}"
81
- return "No item selected or invalid selection"
82
-
83
- # Update button click event
84
- update_button.click(
85
- fn=update_media_with_keywords,
86
- inputs=[items_output, item_mapping, content_input, prompt_input, summary_input, keywords_input],
87
- outputs=status_message
88
- )
89
-
90
-
91
- def create_media_edit_and_clone_tab():
92
- with gr.TabItem("Clone and Edit Existing Items", visible=True):
93
- gr.Markdown("# Search, Edit, and Clone Existing Items")
94
-
95
- with gr.Row():
96
- with gr.Column():
97
- search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
98
- search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title",
99
- label="Search By")
100
- with gr.Column():
101
- search_button = gr.Button("Search")
102
- clone_button = gr.Button("Clone Item")
103
- save_clone_button = gr.Button("Save Cloned Item", visible=False)
104
- with gr.Row():
105
- items_output = gr.Dropdown(label="Select Item", choices=[], interactive=True)
106
- item_mapping = gr.State({})
107
-
108
- content_input = gr.Textbox(label="Edit Content", lines=10)
109
- prompt_input = gr.Textbox(label="Edit Prompt", lines=3)
110
- summary_input = gr.Textbox(label="Edit Summary", lines=5)
111
- new_title_input = gr.Textbox(label="New Title (for cloning)", visible=False)
112
- status_message = gr.Textbox(label="Status", interactive=False)
113
-
114
- search_button.click(
115
- fn=update_dropdown,
116
- inputs=[search_query_input, search_type_input],
117
- outputs=[items_output, item_mapping]
118
- )
119
-
120
- def load_selected_media_content(selected_item, item_mapping):
121
- if selected_item and item_mapping and selected_item in item_mapping:
122
- media_id = item_mapping[selected_item]
123
- content, prompt, summary = fetch_item_details(media_id)
124
- return content, prompt, summary, gr.update(visible=True), gr.update(visible=False)
125
- return "No item selected or invalid selection", "", "", gr.update(visible=False), gr.update(visible=False)
126
-
127
- items_output.change(
128
- fn=load_selected_media_content,
129
- inputs=[items_output, item_mapping],
130
- outputs=[content_input, prompt_input, summary_input, clone_button, save_clone_button]
131
- )
132
-
133
- def prepare_for_cloning(selected_item):
134
- return gr.update(value=f"Copy of {selected_item}", visible=True), gr.update(visible=True)
135
-
136
- clone_button.click(
137
- fn=prepare_for_cloning,
138
- inputs=[items_output],
139
- outputs=[new_title_input, save_clone_button]
140
- )
141
-
142
- def save_cloned_item(selected_item, item_mapping, content, prompt, summary, new_title):
143
- if selected_item and item_mapping and selected_item in item_mapping:
144
- original_media_id = item_mapping[selected_item]
145
- try:
146
- with db.get_connection() as conn:
147
- cursor = conn.cursor()
148
-
149
- # Fetch the original item's details
150
- cursor.execute("SELECT type, url FROM Media WHERE id = ?", (original_media_id,))
151
- original_type, original_url = cursor.fetchone()
152
-
153
- # Generate a new unique URL
154
- new_url = f"{original_url}_clone_{uuid.uuid4().hex[:8]}"
155
-
156
- # Insert new item into Media table
157
- cursor.execute("""
158
- INSERT INTO Media (title, content, url, type)
159
- VALUES (?, ?, ?, ?)
160
- """, (new_title, content, new_url, original_type))
161
-
162
- new_media_id = cursor.lastrowid
163
-
164
- # Insert new item into MediaModifications table
165
- cursor.execute("""
166
- INSERT INTO MediaModifications (media_id, prompt, summary, modification_date)
167
- VALUES (?, ?, ?, CURRENT_TIMESTAMP)
168
- """, (new_media_id, prompt, summary))
169
-
170
- # Copy keywords from the original item
171
- cursor.execute("""
172
- INSERT INTO MediaKeywords (media_id, keyword_id)
173
- SELECT ?, keyword_id
174
- FROM MediaKeywords
175
- WHERE media_id = ?
176
- """, (new_media_id, original_media_id))
177
-
178
- # Update full-text search index
179
- cursor.execute("""
180
- INSERT INTO media_fts (rowid, title, content)
181
- VALUES (?, ?, ?)
182
- """, (new_media_id, new_title, content))
183
-
184
- conn.commit()
185
-
186
- return f"Cloned item saved successfully with ID: {new_media_id}", gr.update(
187
- visible=False), gr.update(visible=False)
188
- except Exception as e:
189
- logging.error(f"Error saving cloned item: {e}")
190
- return f"Error saving cloned item: {str(e)}", gr.update(visible=True), gr.update(visible=True)
191
- else:
192
- return "No item selected or invalid selection", gr.update(visible=True), gr.update(visible=True)
193
-
194
- save_clone_button.click(
195
- fn=save_cloned_item,
196
- inputs=[items_output, item_mapping, content_input, prompt_input, summary_input, new_title_input],
197
- outputs=[status_message, new_title_input, save_clone_button]
198
- )
199
-
200
-
201
- def create_prompt_edit_tab():
202
- with gr.TabItem("Add & Edit Prompts", visible=True):
203
- with gr.Row():
204
- with gr.Column():
205
- prompt_dropdown = gr.Dropdown(
206
- label="Select Prompt",
207
- choices=[],
208
- interactive=True
209
- )
210
- prompt_list_button = gr.Button("List Prompts")
211
-
212
- with gr.Column():
213
- title_input = gr.Textbox(label="Title", placeholder="Enter the prompt title")
214
- author_input = gr.Textbox(label="Author", placeholder="Enter the prompt's author", lines=3)
215
- description_input = gr.Textbox(label="Description", placeholder="Enter the prompt description", lines=3)
216
- system_prompt_input = gr.Textbox(label="System Prompt", placeholder="Enter the system prompt", lines=3)
217
- user_prompt_input = gr.Textbox(label="User Prompt", placeholder="Enter the user prompt", lines=3)
218
- add_prompt_button = gr.Button("Add/Update Prompt")
219
- add_prompt_output = gr.HTML()
220
-
221
- # Event handlers
222
- prompt_list_button.click(
223
- fn=update_prompt_dropdown,
224
- outputs=prompt_dropdown
225
- )
226
-
227
- add_prompt_button.click(
228
- fn=add_or_update_prompt,
229
- inputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input],
230
- outputs=add_prompt_output
231
- )
232
-
233
- # Load prompt details when selected
234
- prompt_dropdown.change(
235
- fn=load_prompt_details,
236
- inputs=[prompt_dropdown],
237
- outputs=[title_input, author_input, system_prompt_input, user_prompt_input]
238
- )
239
-
240
-
241
- def create_prompt_clone_tab():
242
- with gr.TabItem("Clone and Edit Prompts", visible=True):
243
- with gr.Row():
244
- with gr.Column():
245
- gr.Markdown("# Clone and Edit Prompts")
246
- prompt_dropdown = gr.Dropdown(
247
- label="Select Prompt",
248
- choices=[],
249
- interactive=True
250
- )
251
- prompt_list_button = gr.Button("List Prompts")
252
-
253
- with gr.Column():
254
- title_input = gr.Textbox(label="Title", placeholder="Enter the prompt title")
255
- author_input = gr.Textbox(label="Author", placeholder="Enter the prompt's author", lines=3)
256
- description_input = gr.Textbox(label="Description", placeholder="Enter the prompt description", lines=3)
257
- system_prompt_input = gr.Textbox(label="System Prompt", placeholder="Enter the system prompt", lines=3)
258
- user_prompt_input = gr.Textbox(label="User Prompt", placeholder="Enter the user prompt", lines=3)
259
- clone_prompt_button = gr.Button("Clone Selected Prompt")
260
- save_cloned_prompt_button = gr.Button("Save Cloned Prompt", visible=False)
261
- add_prompt_output = gr.HTML()
262
-
263
- # Event handlers
264
- prompt_list_button.click(
265
- fn=update_prompt_dropdown,
266
- outputs=prompt_dropdown
267
- )
268
-
269
- # Load prompt details when selected
270
- prompt_dropdown.change(
271
- fn=load_prompt_details,
272
- inputs=[prompt_dropdown],
273
- outputs=[title_input, author_input, description_input, system_prompt_input, user_prompt_input]
274
- )
275
-
276
- def prepare_for_cloning(selected_prompt):
277
- if selected_prompt:
278
- return gr.update(value=f"Copy of {selected_prompt}"), gr.update(visible=True)
279
- return gr.update(), gr.update(visible=False)
280
-
281
- clone_prompt_button.click(
282
- fn=prepare_for_cloning,
283
- inputs=[prompt_dropdown],
284
- outputs=[title_input, save_cloned_prompt_button]
285
- )
286
-
287
- def save_cloned_prompt(title, description, system_prompt, user_prompt):
288
- try:
289
- result = add_prompt(title, description, system_prompt, user_prompt)
290
- if result == "Prompt added successfully.":
291
- return result, gr.update(choices=update_prompt_dropdown())
292
- else:
293
- return result, gr.update()
294
- except Exception as e:
295
- return f"Error saving cloned prompt: {str(e)}", gr.update()
296
-
297
- save_cloned_prompt_button.click(
298
- fn=save_cloned_prompt,
299
- inputs=[title_input, description_input, system_prompt_input, user_prompt_input],
300
- outputs=[add_prompt_output, prompt_dropdown]
301
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Media_wiki_tab.py DELETED
@@ -1,340 +0,0 @@
1
- # Media_wiki_tab.py
2
- # Description: Gradio UI snippet that allows users to import a MediaWiki XML dump file into the application.
3
- #
4
- # Imports
5
- import os
6
- from threading import Thread
7
- #
8
- # 3rd-party Imports
9
- import gradio as gr
10
- import yaml
11
- from ruamel.yaml import YAML
12
- #
13
- # Local Imports
14
- from App_Function_Libraries.MediaWiki.Media_Wiki import import_mediawiki_dump, config
15
- #
16
- #######################################################################################################################
17
- #
18
- # Create MediaWiki Import Tab
19
-
20
- def create_mediawiki_import_tab():
21
- with gr.Tab("MediaWiki Import"):
22
- gr.Markdown("# Import MediaWiki Dump")
23
- with gr.Row():
24
- with gr.Column():
25
- file_path = gr.File(label="MediaWiki XML Dump File")
26
- wiki_name = gr.Textbox(label="Wiki Name", placeholder="Enter a unique name for this wiki")
27
- namespaces = gr.Textbox(label="Namespaces (comma-separated integers, leave empty for all)")
28
- skip_redirects = gr.Checkbox(label="Skip Redirects", value=True)
29
- single_item = gr.Checkbox(label="Import as Single Item", value=False)
30
- chunk_method = gr.Dropdown(
31
- choices=["sentences", "words", "paragraphs", "tokens"],
32
- value="sentences",
33
- label="Chunking Method"
34
- )
35
- chunk_size = gr.Slider(minimum=100, maximum=2000, value=1000, step=100, label="Chunk Size")
36
- chunk_overlap = gr.Slider(minimum=0, maximum=500, value=100, step=10, label="Chunk Overlap")
37
- # FIXME - Add checkbox for 'Enable Summarization upon ingestion' for API summarization of chunks
38
- # api_endpoint = gr.Dropdown(label="Select API Endpoint",
39
- # choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
40
- # "Mistral", "OpenRouter",
41
- # "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama",
42
- # "HuggingFace"])
43
- # api_key = gr.Textbox(label="API Key (if required)", type="password")
44
- import_button = gr.Button("Import MediaWiki Dump")
45
- cancel_button = gr.Button("Cancel Import", visible=False)
46
- with gr.Column():
47
- output = gr.Markdown(label="Import Status")
48
- progress_bar = gr.Progress()
49
-
50
- def validate_inputs(file_path, wiki_name, namespaces):
51
- if not file_path:
52
- return "Please select a MediaWiki XML dump file."
53
- if not wiki_name:
54
- return "Please enter a name for the wiki."
55
- if namespaces:
56
- try:
57
- [int(ns.strip()) for ns in namespaces.split(',')]
58
- except ValueError:
59
- return "Invalid namespaces. Please enter comma-separated integers."
60
- return None
61
-
62
- def check_file_size(file_path):
63
- max_size_mb = 1000 # 1 GB
64
- file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
65
- if file_size_mb > max_size_mb:
66
- return f"Warning: The selected file is {file_size_mb:.2f} MB. Importing large files may take a long time."
67
- return None
68
-
69
- import_thread = None
70
- cancel_flag = False
71
-
72
- def run_import(file_path, wiki_name, namespaces, skip_redirects, single_item, chunk_method, chunk_size,
73
- chunk_overlap, progress=gr.Progress()):#, api_endpoint=None, api_key=None):
74
- validation_error = validate_inputs(file_path, wiki_name, namespaces)
75
- if validation_error:
76
- return gr.update(), gr.update(), validation_error
77
-
78
- file_size_warning = check_file_size(file_path.name)
79
- status_text = "# MediaWiki Import Process\n\n## Initializing\n- Starting import process...\n"
80
- if file_size_warning:
81
- status_text += f"- {file_size_warning}\n"
82
-
83
- chunk_options = {
84
- 'method': chunk_method,
85
- 'max_size': chunk_size,
86
- 'overlap': chunk_overlap,
87
- 'adaptive': True,
88
- 'language': 'en'
89
- }
90
- namespaces_list = [int(ns.strip()) for ns in namespaces.split(',')] if namespaces else None
91
-
92
- pages_processed = 0
93
-
94
- try:
95
- for progress_info in import_mediawiki_dump(
96
- file_path=file_path.name,
97
- wiki_name=wiki_name,
98
- namespaces=namespaces_list,
99
- skip_redirects=skip_redirects,
100
- chunk_options=chunk_options,
101
- single_item=single_item,
102
- progress_callback=progress,
103
- # api_name=api_endpoint,
104
- # api_key=api_key
105
- ):
106
- if progress_info.startswith("Found"):
107
- status_text += f"\n## Parsing\n- {progress_info}\n"
108
- elif progress_info.startswith("Processed page"):
109
- pages_processed += 1
110
- if pages_processed % 10 == 0: # Update every 10 pages to avoid too frequent updates
111
- status_text += f"- {progress_info}\n"
112
- elif progress_info.startswith("Successfully imported"):
113
- status_text += f"\n## Completed\n- {progress_info}\n- Total pages processed: {pages_processed}"
114
- else:
115
- status_text += f"- {progress_info}\n"
116
-
117
- yield gr.update(), gr.update(), status_text
118
-
119
- status_text += "\n## Import Process Completed Successfully"
120
- except Exception as e:
121
- status_text += f"\n## Error\n- An error occurred during the import process: {str(e)}"
122
-
123
- yield gr.update(visible=False), gr.update(visible=True), status_text
124
-
125
- def start_import(*args):
126
- nonlocal import_thread
127
- import_thread = Thread(target=run_import, args=args)
128
- import_thread.start()
129
- return gr.update(visible=True), gr.update(visible=False), gr.update(
130
- value="Import process started. Please wait...")
131
-
132
- def cancel_import():
133
- nonlocal cancel_flag
134
- cancel_flag = True
135
- return gr.update(visible=False), gr.update(visible=True)
136
-
137
- import_button.click(
138
- run_import,
139
- inputs=[file_path, wiki_name, namespaces, skip_redirects, single_item, chunk_method, chunk_size,
140
- chunk_overlap],#, api_endpoint, api_key],
141
- outputs=[cancel_button, import_button, output]
142
- )
143
-
144
- cancel_button.click(
145
- cancel_import,
146
- outputs=[cancel_button, import_button]
147
- )
148
-
149
- return file_path, wiki_name, namespaces, skip_redirects, single_item, chunk_method, chunk_size, chunk_overlap, import_button, output
150
-
151
-
152
- class PreservedTokenSafeDumper(yaml.SafeDumper):
153
- def represent_scalar(self, tag, value, style=None):
154
- if style is None and isinstance(value, str) and '\n' in value:
155
- style = '|'
156
- return super().represent_scalar(tag, value, style)
157
-
158
-
159
- def update_yaml_file(file_path, updates):
160
- with open(file_path, 'r') as file:
161
- lines = file.readlines()
162
-
163
- def format_value(value):
164
- if isinstance(value, bool):
165
- return str(value).lower()
166
- elif isinstance(value, (int, float)):
167
- return str(value)
168
- elif isinstance(value, list):
169
- return '[' + ', '.join(map(str, value)) + ']'
170
- else:
171
- return f"'{value}'"
172
-
173
- def update_line(line, updates, prefix=''):
174
- for key, value in updates.items():
175
- full_key = f"{prefix}{key}:" if prefix else f"{key}:"
176
- if line.strip().startswith(full_key):
177
- indentation = line[:line.index(full_key)]
178
- if isinstance(value, dict):
179
- return line # Keep the line as is for nested structures
180
- else:
181
- return f"{indentation}{full_key} {format_value(value)}\n"
182
- return line
183
-
184
- updated_lines = []
185
- current_prefix = ''
186
- for line in lines:
187
- stripped = line.strip()
188
- if stripped and not stripped.startswith('#'):
189
- indent = len(line) - len(line.lstrip())
190
- if indent == 0:
191
- current_prefix = ''
192
- elif ':' in stripped and not stripped.endswith(':'):
193
- current_prefix = '.'.join(current_prefix.split('.')[:-1]) + '.' if current_prefix else ''
194
-
195
- updated_line = update_line(line, updates, current_prefix)
196
-
197
- if updated_line == line and ':' in stripped and stripped.endswith(':'):
198
- key = stripped[:-1].strip()
199
- if current_prefix:
200
- current_prefix += f"{key}."
201
- else:
202
- current_prefix = f"{key}."
203
-
204
- updated_lines.append(updated_line)
205
- else:
206
- updated_lines.append(line)
207
-
208
- with open(file_path, 'w') as file:
209
- file.writelines(updated_lines)
210
-
211
- #
212
- #
213
- #######################################################################################################################
214
- #
215
- # Config tab
216
-
217
- yaml = YAML()
218
- yaml.preserve_quotes = True
219
- yaml.indent(mapping=2, sequence=4, offset=2)
220
-
221
- def load_config():
222
- config_path = os.path.join('Config_Files', 'mediawiki_import_config.yaml')
223
- with open(config_path, 'r') as file:
224
- return yaml.load(file)
225
-
226
- def save_config(updated_config):
227
- config_path = os.path.join('Config_Files', 'mediawiki_import_config.yaml')
228
- config = load_config()
229
-
230
-
231
- def create_mediawiki_config_tab():
232
- with gr.TabItem("MediaWiki Import Configuration", visible=True):
233
- gr.Markdown("# MediaWiki Import Configuration (Broken currently/doesn't work)")
234
- with gr.Row():
235
- with gr.Column():
236
- namespaces = gr.Textbox(label="Default Namespaces (comma-separated integers)",
237
- value=','.join(map(str, config['import']['default_namespaces'])))
238
- skip_redirects = gr.Checkbox(label="Skip Redirects by Default",
239
- value=config['import']['default_skip_redirects'])
240
- single_item = gr.Checkbox(label="Import as Single Item by Default",
241
- value=config['import']['single_item_default'])
242
- batch_size = gr.Number(value=config['import']['batch_size'], label="Batch Size")
243
-
244
- chunk_method = gr.Dropdown(
245
- choices=config['chunking']['methods'],
246
- value=config['chunking']['default_method'],
247
- label="Default Chunking Method"
248
- )
249
- chunk_size = gr.Slider(minimum=100, maximum=2000, value=config['chunking']['default_size'], step=100,
250
- label="Default Chunk Size")
251
- chunk_overlap = gr.Slider(minimum=0, maximum=500, value=config['chunking']['default_overlap'], step=10,
252
- label="Default Chunk Overlap")
253
-
254
- with gr.Column():
255
- max_workers = gr.Slider(minimum=1, maximum=16, value=config['processing']['max_workers'], step=1,
256
- label="Max Worker Threads")
257
-
258
- embedding_provider = gr.Dropdown(
259
- choices=['openai', 'local', 'huggingface'],
260
- value=config['embeddings']['provider'],
261
- label="Embedding Provider"
262
- )
263
- embedding_model = gr.Textbox(label="Embedding Model", value=config['embeddings']['model'])
264
- api_key = gr.Textbox(label="API Key (if required)", type="password",
265
- value=config['embeddings'].get('api_key', ''))
266
- local_embedding_url = gr.Textbox(label="Local Embedding URL",
267
- value=config['embeddings'].get('local_url', ''))
268
-
269
- checkpoints_enabled = gr.Checkbox(label="Enable Checkpoints", value=config['checkpoints']['enabled'])
270
- checkpoint_directory = gr.Textbox(label="Checkpoint Directory", value=config['checkpoints']['directory'])
271
-
272
- max_retries = gr.Number(value=config['error_handling']['max_retries'], label="Max Retries")
273
- retry_delay = gr.Number(value=config['error_handling']['retry_delay'], label="Retry Delay (seconds)")
274
-
275
- save_config_button = gr.Button("Save Configuration")
276
- config_output = gr.Markdown(label="Configuration Status")
277
-
278
- def update_config_from_ui(namespaces, skip_redirects, single_item, batch_size, chunk_method, chunk_size,
279
- chunk_overlap, max_workers, embedding_provider, embedding_model, api_key,
280
- local_embedding_url, checkpoints_enabled, checkpoint_directory, max_retries,
281
- retry_delay):
282
- current_config = load_config()
283
- updated_config = {}
284
-
285
- if namespaces != ','.join(map(str, current_config['import']['default_namespaces'])):
286
- updated_config.setdefault('import', {})['default_namespaces'] = [int(ns.strip()) for ns in
287
- namespaces.split(',') if ns.strip()]
288
- if skip_redirects != current_config['import']['default_skip_redirects']:
289
- updated_config.setdefault('import', {})['default_skip_redirects'] = skip_redirects
290
- if single_item != current_config['import']['single_item_default']:
291
- updated_config.setdefault('import', {})['single_item_default'] = single_item
292
- if int(batch_size) != current_config['import']['batch_size']:
293
- updated_config.setdefault('import', {})['batch_size'] = int(batch_size)
294
- if chunk_method != current_config['chunking']['default_method']:
295
- updated_config.setdefault('chunking', {})['default_method'] = chunk_method
296
- if int(chunk_size) != current_config['chunking']['default_size']:
297
- updated_config.setdefault('chunking', {})['default_size'] = int(chunk_size)
298
- if int(chunk_overlap) != current_config['chunking']['default_overlap']:
299
- updated_config.setdefault('chunking', {})['default_overlap'] = int(chunk_overlap)
300
- if int(max_workers) != current_config['processing']['max_workers']:
301
- updated_config.setdefault('processing', {})['max_workers'] = int(max_workers)
302
- if embedding_provider != current_config['embeddings']['provider']:
303
- updated_config.setdefault('embeddings', {})['provider'] = embedding_provider
304
- if embedding_model != current_config['embeddings']['model']:
305
- updated_config.setdefault('embeddings', {})['model'] = embedding_model
306
- if api_key != current_config['embeddings'].get('api_key', ''):
307
- updated_config.setdefault('embeddings', {})['api_key'] = api_key
308
- if local_embedding_url != current_config['embeddings'].get('local_url', ''):
309
- updated_config.setdefault('embeddings', {})['local_url'] = local_embedding_url
310
- if checkpoints_enabled != current_config['checkpoints']['enabled']:
311
- updated_config.setdefault('checkpoints', {})['enabled'] = checkpoints_enabled
312
- if checkpoint_directory != current_config['checkpoints']['directory']:
313
- updated_config.setdefault('checkpoints', {})['directory'] = checkpoint_directory
314
- if int(max_retries) != current_config['error_handling']['max_retries']:
315
- updated_config.setdefault('error_handling', {})['max_retries'] = int(max_retries)
316
- if int(retry_delay) != current_config['error_handling']['retry_delay']:
317
- updated_config.setdefault('error_handling', {})['retry_delay'] = int(retry_delay)
318
-
319
- return updated_config
320
-
321
- def save_config_callback(*args):
322
- updated_config = update_config_from_ui(*args)
323
- save_config(updated_config)
324
- return "Configuration saved successfully."
325
-
326
- save_config_button.click(
327
- save_config_callback,
328
- inputs=[namespaces, skip_redirects, single_item, batch_size, chunk_method, chunk_size,
329
- chunk_overlap, max_workers, embedding_provider, embedding_model, api_key,
330
- local_embedding_url, checkpoints_enabled, checkpoint_directory, max_retries, retry_delay],
331
- outputs=config_output
332
- )
333
-
334
- return namespaces, skip_redirects, single_item, batch_size, chunk_method, chunk_size, chunk_overlap, max_workers, \
335
- embedding_provider, embedding_model, api_key, local_embedding_url, checkpoints_enabled, checkpoint_directory, \
336
- max_retries, retry_delay, save_config_button, config_output
337
-
338
- #
339
- # End of MediaWiki Import Tab
340
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/PDF_ingestion_tab.py DELETED
@@ -1,152 +0,0 @@
1
- # PDF_ingestion_tab.py
2
- # Gradio UI for ingesting PDFs into the database
3
- import os
4
- import shutil
5
- import tempfile
6
-
7
- # Imports
8
- #
9
- # External Imports
10
- import gradio as gr
11
- #
12
- # Local Imports
13
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
14
- from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
15
- from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_metadata_from_pdf, extract_text_and_format_from_pdf, \
16
- process_and_cleanup_pdf
17
- #
18
- #
19
- ########################################################################################################################
20
- #
21
- # Functions:
22
-
23
- def create_pdf_ingestion_tab():
24
- with gr.TabItem("PDF Ingestion", visible=True):
25
- # TODO - Add functionality to extract metadata from pdf as part of conversion process in marker
26
- gr.Markdown("# Ingest PDF Files and Extract Metadata")
27
- with gr.Row():
28
- with gr.Column():
29
- pdf_file_input = gr.File(label="Uploaded PDF File", file_types=[".pdf"], visible=True)
30
- pdf_upload_button = gr.UploadButton("Click to Upload PDF", file_types=[".pdf"])
31
- pdf_title_input = gr.Textbox(label="Title (Optional)")
32
- pdf_author_input = gr.Textbox(label="Author (Optional)")
33
- pdf_keywords_input = gr.Textbox(label="Keywords (Optional, comma-separated)")
34
- with gr.Row():
35
- custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
36
- value=False,
37
- visible=True)
38
- preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
39
- value=False,
40
- visible=True)
41
- with gr.Row():
42
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
43
- choices=load_preset_prompts(),
44
- visible=False)
45
- with gr.Row():
46
- custom_prompt_input = gr.Textbox(label="Custom Prompt",
47
- placeholder="Enter custom prompt here",
48
- lines=3,
49
- visible=False)
50
- with gr.Row():
51
- system_prompt_input = gr.Textbox(label="System Prompt",
52
- value="""
53
- <s>You are a bulleted notes specialist.
54
- [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
55
- **Bulleted Note Creation Guidelines**
56
-
57
- **Headings**:
58
- - Based on referenced topics, not categories like quotes or terms
59
- - Surrounded by **bold** formatting
60
- - Not listed as bullet points
61
- - No space between headings and list items underneath
62
-
63
- **Emphasis**:
64
- - **Important terms** set in bold font
65
- - **Text ending in a colon**: also bolded
66
-
67
- **Review**:
68
- - Ensure adherence to specified format
69
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]""",
70
- lines=3,
71
- visible=False)
72
-
73
- custom_prompt_checkbox.change(
74
- fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
75
- inputs=[custom_prompt_checkbox],
76
- outputs=[custom_prompt_input, system_prompt_input]
77
- )
78
- preset_prompt_checkbox.change(
79
- fn=lambda x: gr.update(visible=x),
80
- inputs=[preset_prompt_checkbox],
81
- outputs=[preset_prompt]
82
- )
83
-
84
- def update_prompts(preset_name):
85
- prompts = update_user_prompt(preset_name)
86
- return (
87
- gr.update(value=prompts["user_prompt"], visible=True),
88
- gr.update(value=prompts["system_prompt"], visible=True)
89
- )
90
-
91
- preset_prompt.change(
92
- update_prompts,
93
- inputs=preset_prompt,
94
- outputs=[custom_prompt_input, system_prompt_input]
95
- )
96
-
97
- pdf_ingest_button = gr.Button("Ingest PDF")
98
-
99
- pdf_upload_button.upload(fn=lambda file: file, inputs=pdf_upload_button, outputs=pdf_file_input)
100
- with gr.Column():
101
- pdf_result_output = gr.Textbox(label="Result")
102
-
103
- pdf_ingest_button.click(
104
- fn=process_and_cleanup_pdf,
105
- inputs=[pdf_file_input, pdf_title_input, pdf_author_input, pdf_keywords_input],
106
- outputs=pdf_result_output
107
- )
108
-
109
-
110
- def test_pdf_ingestion(pdf_file):
111
- if pdf_file is None:
112
- return "No file uploaded", ""
113
-
114
- try:
115
- # Create a temporary directory
116
- with tempfile.TemporaryDirectory() as temp_dir:
117
- # Create a path for the temporary PDF file
118
- temp_path = os.path.join(temp_dir, "temp.pdf")
119
-
120
- # Copy the contents of the uploaded file to the temporary file
121
- shutil.copy(pdf_file.name, temp_path)
122
-
123
- # Extract text and convert to Markdown
124
- markdown_text = extract_text_and_format_from_pdf(temp_path)
125
-
126
- # Extract metadata from PDF
127
- metadata = extract_metadata_from_pdf(temp_path)
128
-
129
- # Use metadata for title and author if not provided
130
- title = metadata.get('title', os.path.splitext(os.path.basename(pdf_file.name))[0])
131
- author = metadata.get('author', 'Unknown')
132
-
133
- result = f"PDF '{title}' by {author} processed successfully."
134
- return result, markdown_text
135
- except Exception as e:
136
- return f"Error ingesting PDF: {str(e)}", ""
137
-
138
- def create_pdf_ingestion_test_tab():
139
- with gr.TabItem("Test PDF Ingestion", visible=True):
140
- with gr.Row():
141
- with gr.Column():
142
- pdf_file_input = gr.File(label="Upload PDF for testing")
143
- test_button = gr.Button("Test PDF Ingestion")
144
- with gr.Column():
145
- test_output = gr.Textbox(label="Test Result")
146
- pdf_content_output = gr.Textbox(label="PDF Content", lines=200)
147
- test_button.click(
148
- fn=test_pdf_ingestion,
149
- inputs=[pdf_file_input],
150
- outputs=[test_output, pdf_content_output]
151
- )
152
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Plaintext_tab_import.py DELETED
@@ -1,116 +0,0 @@
1
- # Plaintext_tab_import.py
2
- # Contains the code for the "Import Plain Text Files" tab in the Gradio UI.
3
- # This tab allows users to upload plain text files (Markdown, Text, RTF) or a zip file containing multiple files.
4
- # The user can provide a title, author, keywords, system prompt, custom user prompt, and select an API for auto-summarization.
5
- #
6
- #######################################################################################################################
7
- #
8
- # Import necessary libraries
9
- import os
10
- import tempfile
11
- import zipfile
12
- #
13
- # Import Non-Local
14
- import gradio as gr
15
- from docx2txt import docx2txt
16
- from pypandoc import convert_file
17
- #
18
- # Import Local libraries
19
- from App_Function_Libraries.Gradio_UI.Import_Functionality import import_data
20
- #
21
- #######################################################################################################################
22
- #
23
- # Functions:
24
-
25
- def create_plain_text_import_tab():
26
- with gr.TabItem("Import Plain text & .docx Files", visible=True):
27
- with gr.Row():
28
- with gr.Column():
29
- gr.Markdown("# Import Markdown(`.md`)/Text(`.txt`)/rtf & `.docx` Files")
30
- gr.Markdown("Upload a single file or a zip file containing multiple files")
31
- import_file = gr.File(label="Upload file for import", file_types=[".md", ".txt", ".rtf", ".docx", ".zip"])
32
- title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content (for single files)")
33
- author_input = gr.Textbox(label="Author", placeholder="Enter the author's name (for single files)")
34
- keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords, comma-separated")
35
- system_prompt_input = gr.Textbox(label="System Prompt (for Summarization)", lines=3,
36
- value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
37
- **Bulleted Note Creation Guidelines**
38
-
39
- **Headings**:
40
- - Based on referenced topics, not categories like quotes or terms
41
- - Surrounded by **bold** formatting
42
- - Not listed as bullet points
43
- - No space between headings and list items underneath
44
-
45
- **Emphasis**:
46
- - **Important terms** set in bold font
47
- - **Text ending in a colon**: also bolded
48
-
49
- **Review**:
50
- - Ensure adherence to specified format
51
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]""",
52
- )
53
- custom_prompt_input = gr.Textbox(label="Custom User Prompt", placeholder="Enter a custom user prompt for summarization (optional)")
54
- auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
55
- api_name_input = gr.Dropdown(
56
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
57
- "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
58
- label="API for Auto-summarization"
59
- )
60
- api_key_input = gr.Textbox(label="API Key", type="password")
61
- import_button = gr.Button("Import File(s)")
62
- with gr.Column():
63
- import_output = gr.Textbox(label="Import Status")
64
-
65
-
66
- def import_plain_text_file(file_path, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
67
- try:
68
- # Determine the file type and convert if necessary
69
- file_extension = os.path.splitext(file_path)[1].lower()
70
- if file_extension == '.rtf':
71
- with tempfile.NamedTemporaryFile(suffix='.md', delete=False) as temp_file:
72
- convert_file(file_path, 'md', outputfile=temp_file.name)
73
- file_path = temp_file.name
74
- elif file_extension == '.docx':
75
- content = docx2txt.process(file_path)
76
- else:
77
- with open(file_path, 'r', encoding='utf-8') as file:
78
- content = file.read()
79
-
80
- # Process the content
81
- return import_data(content, title, author, keywords, system_prompt,
82
- user_prompt, auto_summarize, api_name, api_key)
83
- except Exception as e:
84
- return f"Error processing file: {str(e)}"
85
-
86
- def process_plain_text_zip_file(zip_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
87
- results = []
88
- with tempfile.TemporaryDirectory() as temp_dir:
89
- with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
90
- zip_ref.extractall(temp_dir)
91
-
92
- for filename in os.listdir(temp_dir):
93
- if filename.lower().endswith(('.md', '.txt', '.rtf', '.docx')):
94
- file_path = os.path.join(temp_dir, filename)
95
- result = import_plain_text_file(file_path, title, author, keywords, system_prompt,
96
- user_prompt, auto_summarize, api_name, api_key)
97
- results.append(f"File: {filename} - {result}")
98
-
99
- return "\n".join(results)
100
-
101
- def import_file_handler(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
102
- if file.name.lower().endswith(('.md', '.txt', '.rtf', '.docx')):
103
- return import_plain_text_file(file.name, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
104
- elif file.name.lower().endswith('.zip'):
105
- return process_plain_text_zip_file(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
106
- else:
107
- return "Unsupported file type. Please upload a .md, .txt, .rtf, .docx file or a .zip file containing these file types."
108
-
109
- import_button.click(
110
- fn=import_file_handler,
111
- inputs=[import_file, title_input, author_input, keywords_input, system_prompt_input,
112
- custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input],
113
- outputs=import_output
114
- )
115
-
116
- return import_file, title_input, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
App_Function_Libraries/Gradio_UI/Podcast_tab.py DELETED
@@ -1,163 +0,0 @@
1
- # Podcast_tab.py
2
- # Description: Gradio UI for ingesting podcasts into the database
3
- #
4
- # Imports
5
- #
6
- # External Imports
7
- import gradio as gr
8
- #
9
- # Local Imports
10
- from App_Function_Libraries.Audio.Audio_Files import process_podcast
11
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
12
- from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
13
- #
14
- ########################################################################################################################
15
- #
16
- # Functions:
17
-
18
-
19
- def create_podcast_tab():
20
- with gr.TabItem("Podcast", visible=True):
21
- gr.Markdown("# Podcast Transcription and Ingestion", visible=True)
22
- with gr.Row():
23
- with gr.Column():
24
- podcast_url_input = gr.Textbox(label="Podcast URL", placeholder="Enter the podcast URL here")
25
- podcast_title_input = gr.Textbox(label="Podcast Title", placeholder="Will be auto-detected if possible")
26
- podcast_author_input = gr.Textbox(label="Podcast Author", placeholder="Will be auto-detected if possible")
27
-
28
- podcast_keywords_input = gr.Textbox(
29
- label="Keywords",
30
- placeholder="Enter keywords here (comma-separated, include series name if applicable)",
31
- value="podcast,audio",
32
- elem_id="podcast-keywords-input"
33
- )
34
-
35
- keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
36
-
37
- with gr.Row():
38
- podcast_custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt",
39
- value=False,
40
- visible=True)
41
- preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
42
- value=False,
43
- visible=True)
44
- with gr.Row():
45
- preset_prompt = gr.Dropdown(label="Select Preset Prompt",
46
- choices=load_preset_prompts(),
47
- visible=False)
48
- with gr.Row():
49
- podcast_custom_prompt_input = gr.Textbox(label="Custom Prompt",
50
- placeholder="Enter custom prompt here",
51
- lines=3,
52
- visible=False)
53
- with gr.Row():
54
- system_prompt_input = gr.Textbox(label="System Prompt",
55
- value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
56
- **Bulleted Note Creation Guidelines**
57
-
58
- **Headings**:
59
- - Based on referenced topics, not categories like quotes or terms
60
- - Surrounded by **bold** formatting
61
- - Not listed as bullet points
62
- - No space between headings and list items underneath
63
-
64
- **Emphasis**:
65
- - **Important terms** set in bold font
66
- - **Text ending in a colon**: also bolded
67
-
68
- **Review**:
69
- - Ensure adherence to specified format
70
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
71
- """,
72
- lines=3,
73
- visible=False)
74
-
75
- podcast_custom_prompt_checkbox.change(
76
- fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
77
- inputs=[podcast_custom_prompt_checkbox],
78
- outputs=[podcast_custom_prompt_input, system_prompt_input]
79
- )
80
- preset_prompt_checkbox.change(
81
- fn=lambda x: gr.update(visible=x),
82
- inputs=[preset_prompt_checkbox],
83
- outputs=[preset_prompt]
84
- )
85
-
86
- def update_prompts(preset_name):
87
- prompts = update_user_prompt(preset_name)
88
- return (
89
- gr.update(value=prompts["user_prompt"], visible=True),
90
- gr.update(value=prompts["system_prompt"], visible=True)
91
- )
92
-
93
- preset_prompt.change(
94
- update_prompts,
95
- inputs=preset_prompt,
96
- outputs=[podcast_custom_prompt_input, system_prompt_input]
97
- )
98
-
99
- podcast_api_name_input = gr.Dropdown(
100
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp",
101
- "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace", "Custom-OpenAI-API"],
102
- value=None,
103
- label="API Name for Summarization (Optional)"
104
- )
105
- podcast_api_key_input = gr.Textbox(label="API Key (if required)", type="password")
106
- podcast_whisper_model_input = gr.Dropdown(choices=whisper_models, value="medium", label="Whisper Model")
107
-
108
- keep_original_input = gr.Checkbox(label="Keep original audio file", value=False)
109
- enable_diarization_input = gr.Checkbox(label="Enable speaker diarization", value=False)
110
-
111
- use_cookies_input = gr.Checkbox(label="Use cookies for yt-dlp", value=False)
112
- cookies_input = gr.Textbox(
113
- label="yt-dlp Cookies",
114
- placeholder="Paste your cookies here (JSON format)",
115
- lines=3,
116
- visible=False
117
- )
118
-
119
- use_cookies_input.change(
120
- fn=lambda x: gr.update(visible=x),
121
- inputs=[use_cookies_input],
122
- outputs=[cookies_input]
123
- )
124
-
125
- chunking_options_checkbox = gr.Checkbox(label="Show Chunking Options", value=False)
126
- with gr.Row(visible=False) as chunking_options_box:
127
- gr.Markdown("### Chunking Options")
128
- with gr.Column():
129
- chunk_method = gr.Dropdown(choices=['words', 'sentences', 'paragraphs', 'tokens'], label="Chunking Method")
130
- max_chunk_size = gr.Slider(minimum=100, maximum=1000, value=300, step=50, label="Max Chunk Size")
131
- chunk_overlap = gr.Slider(minimum=0, maximum=100, value=0, step=10, label="Chunk Overlap")
132
- use_adaptive_chunking = gr.Checkbox(label="Use Adaptive Chunking")
133
- use_multi_level_chunking = gr.Checkbox(label="Use Multi-level Chunking")
134
- chunk_language = gr.Dropdown(choices=['english', 'french', 'german', 'spanish'], label="Chunking Language")
135
-
136
- chunking_options_checkbox.change(
137
- fn=lambda x: gr.update(visible=x),
138
- inputs=[chunking_options_checkbox],
139
- outputs=[chunking_options_box]
140
- )
141
-
142
- podcast_process_button = gr.Button("Process Podcast")
143
-
144
- with gr.Column():
145
- podcast_progress_output = gr.Textbox(label="Progress")
146
- podcast_error_output = gr.Textbox(label="Error Messages")
147
- podcast_transcription_output = gr.Textbox(label="Transcription")
148
- podcast_summary_output = gr.Textbox(label="Summary")
149
- download_transcription = gr.File(label="Download Transcription as JSON")
150
- download_summary = gr.File(label="Download Summary as Text")
151
-
152
- podcast_process_button.click(
153
- fn=process_podcast,
154
- inputs=[podcast_url_input, podcast_title_input, podcast_author_input,
155
- podcast_keywords_input, podcast_custom_prompt_input, podcast_api_name_input,
156
- podcast_api_key_input, podcast_whisper_model_input, keep_original_input,
157
- enable_diarization_input, use_cookies_input, cookies_input,
158
- chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
159
- use_multi_level_chunking, chunk_language, keep_timestamps_input],
160
- outputs=[podcast_progress_output, podcast_transcription_output, podcast_summary_output,
161
- podcast_title_input, podcast_author_input, podcast_keywords_input, podcast_error_output,
162
- download_transcription, download_summary]
163
- )