Spaces:
Running
Running
oceansweep
commited on
Commit
•
2842ee9
1
Parent(s):
370522e
Upload Summarization_General_Lib.py
Browse files
App_Function_Libraries/Summarization_General_Lib.py
CHANGED
@@ -20,6 +20,7 @@ import json
|
|
20 |
import logging
|
21 |
import os
|
22 |
import time
|
|
|
23 |
|
24 |
import requests
|
25 |
from requests import RequestException
|
@@ -30,9 +31,9 @@ from App_Function_Libraries.Chunk_Lib import semantic_chunking, rolling_summariz
|
|
30 |
from App_Function_Libraries.Diarization_Lib import combine_transcription_and_diarization
|
31 |
from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
|
32 |
summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm
|
33 |
-
from App_Function_Libraries.DB_Manager import add_media_to_database
|
34 |
# Import Local
|
35 |
-
from App_Function_Libraries.Utils import load_and_log_configs, load_comprehensive_config, sanitize_filename, \
|
36 |
clean_youtube_url, create_download_directory, is_valid_url
|
37 |
from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extract_video_info
|
38 |
|
@@ -43,6 +44,55 @@ from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extrac
|
|
43 |
config = load_comprehensive_config()
|
44 |
openai_api_key = config.get('API', 'openai_api_key', fallback=None)
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
def extract_text_from_segments(segments):
|
47 |
logging.debug(f"Segments received: {segments}")
|
48 |
logging.debug(f"Type of segments: {type(segments)}")
|
@@ -65,18 +115,18 @@ def extract_text_from_segments(segments):
|
|
65 |
|
66 |
def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_message=None):
|
67 |
loaded_config_data = load_and_log_configs()
|
68 |
-
|
69 |
try:
|
70 |
# API key validation
|
71 |
-
if api_key
|
72 |
logging.info("OpenAI: #1 API key not provided as parameter")
|
73 |
logging.info("OpenAI: Attempting to use API key from config file")
|
74 |
api_key = loaded_config_data['api_keys']['openai']
|
75 |
|
76 |
-
if api_key
|
77 |
logging.error("OpenAI: #2 API key not found or is empty")
|
78 |
return "OpenAI: API Key Not Provided/Found in Config file or is empty"
|
79 |
|
|
|
80 |
logging.debug(f"OpenAI: Using API Key: {api_key[:5]}...{api_key[-5:]}")
|
81 |
|
82 |
# Input data handling
|
@@ -121,7 +171,6 @@ def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, sys
|
|
121 |
else:
|
122 |
raise ValueError(f"OpenAI: Invalid input data format: {type(data)}")
|
123 |
|
124 |
-
openai_model = loaded_config_data['models']['openai'] or "gpt-4o"
|
125 |
logging.debug(f"OpenAI: Extracted text (first 500 chars): {text[:500]}...")
|
126 |
logging.debug(f"OpenAI: Custom prompt: {custom_prompt_arg}")
|
127 |
|
@@ -205,6 +254,7 @@ def summarize_with_anthropic(api_key, input_data, custom_prompt_arg, temp=None,
|
|
205 |
if not anthropic_api_key or not anthropic_api_key.strip():
|
206 |
logging.error("Anthropic: No valid API key available")
|
207 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
208 |
# For example: raise ValueError("No valid Anthropic API key available")
|
209 |
|
210 |
|
@@ -344,6 +394,7 @@ def summarize_with_cohere(api_key, input_data, custom_prompt_arg, temp=None, sys
|
|
344 |
if not cohere_api_key or not cohere_api_key.strip():
|
345 |
logging.error("Cohere: No valid API key available")
|
346 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
347 |
# For example: raise ValueError("No valid Anthropic API key available")
|
348 |
|
349 |
if custom_prompt_arg is None:
|
@@ -455,6 +506,7 @@ def summarize_with_groq(api_key, input_data, custom_prompt_arg, temp=None, syste
|
|
455 |
if not groq_api_key or not groq_api_key.strip():
|
456 |
logging.error("Anthropic: No valid API key available")
|
457 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
458 |
# For example: raise ValueError("No valid Anthropic API key available")
|
459 |
|
460 |
logging.debug(f"Groq: Using API Key: {groq_api_key[:5]}...{groq_api_key[-5:]}")
|
@@ -683,6 +735,7 @@ def summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp=None
|
|
683 |
if not huggingface_api_key or not huggingface_api_key.strip():
|
684 |
logging.error("HuggingFace: No valid API key available")
|
685 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
686 |
# For example: raise ValueError("No valid Anthropic API key available")
|
687 |
|
688 |
|
@@ -733,7 +786,7 @@ def summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp=None
|
|
733 |
response = requests.post(API_URL, headers=headers, json=data)
|
734 |
|
735 |
if response.status_code == 200:
|
736 |
-
summary = response.json()[0]['
|
737 |
logging.debug("huggingface: Summarization successful")
|
738 |
print("Summarization successful.")
|
739 |
return summary
|
@@ -772,6 +825,7 @@ def summarize_with_deepseek(api_key, input_data, custom_prompt_arg, temp=None, s
|
|
772 |
if not deepseek_api_key or not deepseek_api_key.strip():
|
773 |
logging.error("DeepSeek: No valid API key available")
|
774 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
775 |
# For example: raise ValueError("No valid deepseek API key available")
|
776 |
|
777 |
|
@@ -877,6 +931,7 @@ def summarize_with_mistral(api_key, input_data, custom_prompt_arg, temp=None, sy
|
|
877 |
if not mistral_api_key or not mistral_api_key.strip():
|
878 |
logging.error("Mistral: No valid API key available")
|
879 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
880 |
# For example: raise ValueError("No valid deepseek API key available")
|
881 |
|
882 |
|
@@ -1145,39 +1200,14 @@ def save_transcription_and_summary(transcription_text, summary_text, download_pa
|
|
1145 |
def summarize_chunk(api_name, text, custom_prompt_input, api_key, temp=None, system_message=None):
|
1146 |
logging.debug("Entered 'summarize_chunk' function")
|
1147 |
try:
|
1148 |
-
|
1149 |
-
|
1150 |
-
|
1151 |
-
return summarize_with_anthropic(api_key, text, custom_prompt_input, temp, system_message)
|
1152 |
-
elif api_name.lower() == "cohere":
|
1153 |
-
return summarize_with_cohere(api_key, text, custom_prompt_input, temp, system_message)
|
1154 |
-
elif api_name.lower() == "groq":
|
1155 |
-
return summarize_with_groq(api_key, text, custom_prompt_input, temp, system_message)
|
1156 |
-
elif api_name.lower() == "openrouter":
|
1157 |
-
return summarize_with_openrouter(api_key, text, custom_prompt_input, temp, system_message)
|
1158 |
-
elif api_name.lower() == "deepseek":
|
1159 |
-
return summarize_with_deepseek(api_key, text, custom_prompt_input, temp, system_message)
|
1160 |
-
elif api_name.lower() == "mistral":
|
1161 |
-
return summarize_with_mistral(api_key, text, custom_prompt_input, temp, system_message)
|
1162 |
-
elif api_name.lower() == "llama.cpp":
|
1163 |
-
return summarize_with_llama(text, custom_prompt_input, temp, system_message)
|
1164 |
-
elif api_name.lower() == "kobold":
|
1165 |
-
return summarize_with_kobold(text, api_key, custom_prompt_input, temp, system_message)
|
1166 |
-
elif api_name.lower() == "ooba":
|
1167 |
-
return summarize_with_oobabooga(text, api_key, custom_prompt_input, temp, system_message)
|
1168 |
-
elif api_name.lower() == "tabbyapi":
|
1169 |
-
return summarize_with_tabbyapi(text, custom_prompt_input, temp, system_message)
|
1170 |
-
elif api_name.lower() == "vllm":
|
1171 |
-
return summarize_with_vllm(text, custom_prompt_input, temp, system_message)
|
1172 |
-
elif api_name.lower() == "local-llm":
|
1173 |
-
return summarize_with_local_llm(text, custom_prompt_input, temp, system_message)
|
1174 |
-
elif api_name.lower() == "huggingface":
|
1175 |
-
return summarize_with_huggingface(api_key, text, custom_prompt_input, temp, )#system_message)
|
1176 |
-
else:
|
1177 |
-
logging.warning(f"Unsupported API: {api_name}")
|
1178 |
return None
|
|
|
|
|
1179 |
except Exception as e:
|
1180 |
-
logging.error(f"Error in summarize_chunk with {api_name}: {str(e)}")
|
1181 |
return None
|
1182 |
|
1183 |
|
|
|
20 |
import logging
|
21 |
import os
|
22 |
import time
|
23 |
+
from typing import Optional
|
24 |
|
25 |
import requests
|
26 |
from requests import RequestException
|
|
|
31 |
from App_Function_Libraries.Diarization_Lib import combine_transcription_and_diarization
|
32 |
from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
|
33 |
summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm
|
34 |
+
from App_Function_Libraries.DB.DB_Manager import add_media_to_database
|
35 |
# Import Local
|
36 |
+
from App_Function_Libraries.Utils.Utils import load_and_log_configs, load_comprehensive_config, sanitize_filename, \
|
37 |
clean_youtube_url, create_download_directory, is_valid_url
|
38 |
from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extract_video_info
|
39 |
|
|
|
44 |
config = load_comprehensive_config()
|
45 |
openai_api_key = config.get('API', 'openai_api_key', fallback=None)
|
46 |
|
47 |
+
|
48 |
+
def summarize(
|
49 |
+
input_data: str,
|
50 |
+
custom_prompt_arg: Optional[str],
|
51 |
+
api_name: str,
|
52 |
+
api_key: Optional[str],
|
53 |
+
temp: Optional[float],
|
54 |
+
system_message: Optional[str]
|
55 |
+
) -> str:
|
56 |
+
try:
|
57 |
+
logging.debug(f"api_name type: {type(api_name)}, value: {api_name}")
|
58 |
+
if api_name.lower() == "openai":
|
59 |
+
return summarize_with_openai(api_key, input_data, custom_prompt_arg, temp, system_message)
|
60 |
+
elif api_name.lower() == "anthropic":
|
61 |
+
return summarize_with_anthropic(api_key, input_data, custom_prompt_arg, temp, system_message)
|
62 |
+
elif api_name.lower() == "cohere":
|
63 |
+
return summarize_with_cohere(api_key, input_data, custom_prompt_arg, temp, system_message)
|
64 |
+
elif api_name.lower() == "groq":
|
65 |
+
return summarize_with_groq(api_key, input_data, custom_prompt_arg, temp, system_message)
|
66 |
+
elif api_name.lower() == "huggingface":
|
67 |
+
return summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp)
|
68 |
+
elif api_name.lower() == "openrouter":
|
69 |
+
return summarize_with_openrouter(api_key, input_data, custom_prompt_arg, temp, system_message)
|
70 |
+
elif api_name.lower() == "deepseek":
|
71 |
+
return summarize_with_deepseek(api_key, input_data, custom_prompt_arg, temp, system_message)
|
72 |
+
elif api_name.lower() == "mistral":
|
73 |
+
return summarize_with_mistral(api_key, input_data, custom_prompt_arg, temp, system_message)
|
74 |
+
elif api_name.lower() == "llama.cpp":
|
75 |
+
return summarize_with_llama(input_data, custom_prompt_arg, temp, system_message)
|
76 |
+
elif api_name.lower() == "kobold":
|
77 |
+
return summarize_with_kobold(input_data, api_key, custom_prompt_arg, temp, system_message)
|
78 |
+
elif api_name.lower() == "ooba":
|
79 |
+
return summarize_with_oobabooga(input_data, api_key, custom_prompt_arg, temp, system_message)
|
80 |
+
elif api_name.lower() == "tabbyapi":
|
81 |
+
return summarize_with_tabbyapi(input_data, custom_prompt_arg, temp, system_message)
|
82 |
+
elif api_name.lower() == "vllm":
|
83 |
+
return summarize_with_vllm(input_data, custom_prompt_arg, None, system_message)
|
84 |
+
elif api_name.lower() == "local-llm":
|
85 |
+
return summarize_with_local_llm(input_data, custom_prompt_arg, temp, system_message)
|
86 |
+
elif api_name.lower() == "huggingface":
|
87 |
+
return summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp, )#system_message)
|
88 |
+
else:
|
89 |
+
return f"Error: Invalid API Name {api_name}"
|
90 |
+
|
91 |
+
except Exception as e:
|
92 |
+
logging.error(f"Error in summarize function: {str(e)}", exc_info=True)
|
93 |
+
return f"Error: {str(e)}"
|
94 |
+
|
95 |
+
|
96 |
def extract_text_from_segments(segments):
|
97 |
logging.debug(f"Segments received: {segments}")
|
98 |
logging.debug(f"Type of segments: {type(segments)}")
|
|
|
115 |
|
116 |
def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_message=None):
|
117 |
loaded_config_data = load_and_log_configs()
|
|
|
118 |
try:
|
119 |
# API key validation
|
120 |
+
if not api_key or api_key.strip() == "":
|
121 |
logging.info("OpenAI: #1 API key not provided as parameter")
|
122 |
logging.info("OpenAI: Attempting to use API key from config file")
|
123 |
api_key = loaded_config_data['api_keys']['openai']
|
124 |
|
125 |
+
if not api_key or api_key.strip() == "":
|
126 |
logging.error("OpenAI: #2 API key not found or is empty")
|
127 |
return "OpenAI: API Key Not Provided/Found in Config file or is empty"
|
128 |
|
129 |
+
openai_api_key = api_key
|
130 |
logging.debug(f"OpenAI: Using API Key: {api_key[:5]}...{api_key[-5:]}")
|
131 |
|
132 |
# Input data handling
|
|
|
171 |
else:
|
172 |
raise ValueError(f"OpenAI: Invalid input data format: {type(data)}")
|
173 |
|
|
|
174 |
logging.debug(f"OpenAI: Extracted text (first 500 chars): {text[:500]}...")
|
175 |
logging.debug(f"OpenAI: Custom prompt: {custom_prompt_arg}")
|
176 |
|
|
|
254 |
if not anthropic_api_key or not anthropic_api_key.strip():
|
255 |
logging.error("Anthropic: No valid API key available")
|
256 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
257 |
+
#FIXME
|
258 |
# For example: raise ValueError("No valid Anthropic API key available")
|
259 |
|
260 |
|
|
|
394 |
if not cohere_api_key or not cohere_api_key.strip():
|
395 |
logging.error("Cohere: No valid API key available")
|
396 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
397 |
+
# FIXME
|
398 |
# For example: raise ValueError("No valid Anthropic API key available")
|
399 |
|
400 |
if custom_prompt_arg is None:
|
|
|
506 |
if not groq_api_key or not groq_api_key.strip():
|
507 |
logging.error("Anthropic: No valid API key available")
|
508 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
509 |
+
# FIXME
|
510 |
# For example: raise ValueError("No valid Anthropic API key available")
|
511 |
|
512 |
logging.debug(f"Groq: Using API Key: {groq_api_key[:5]}...{groq_api_key[-5:]}")
|
|
|
735 |
if not huggingface_api_key or not huggingface_api_key.strip():
|
736 |
logging.error("HuggingFace: No valid API key available")
|
737 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
738 |
+
# FIXME
|
739 |
# For example: raise ValueError("No valid Anthropic API key available")
|
740 |
|
741 |
|
|
|
786 |
response = requests.post(API_URL, headers=headers, json=data)
|
787 |
|
788 |
if response.status_code == 200:
|
789 |
+
summary = response.json()[0]['generated_text'].strip()
|
790 |
logging.debug("huggingface: Summarization successful")
|
791 |
print("Summarization successful.")
|
792 |
return summary
|
|
|
825 |
if not deepseek_api_key or not deepseek_api_key.strip():
|
826 |
logging.error("DeepSeek: No valid API key available")
|
827 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
828 |
+
# FIXME
|
829 |
# For example: raise ValueError("No valid deepseek API key available")
|
830 |
|
831 |
|
|
|
931 |
if not mistral_api_key or not mistral_api_key.strip():
|
932 |
logging.error("Mistral: No valid API key available")
|
933 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
934 |
+
# FIXME
|
935 |
# For example: raise ValueError("No valid deepseek API key available")
|
936 |
|
937 |
|
|
|
1200 |
def summarize_chunk(api_name, text, custom_prompt_input, api_key, temp=None, system_message=None):
|
1201 |
logging.debug("Entered 'summarize_chunk' function")
|
1202 |
try:
|
1203 |
+
result = summarize(text, custom_prompt_input, api_name, api_key, temp, system_message)
|
1204 |
+
if result is None or result.startswith("Error:"):
|
1205 |
+
logging.warning(f"Summarization with {api_name} failed: {result}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1206 |
return None
|
1207 |
+
logging.info(f"Summarization with {api_name} successful")
|
1208 |
+
return result
|
1209 |
except Exception as e:
|
1210 |
+
logging.error(f"Error in summarize_chunk with {api_name}: {str(e)}", exc_info=True)
|
1211 |
return None
|
1212 |
|
1213 |
|