Spaces:
Running
Running
Allen Park
commited on
Commit
•
e34f0a0
1
Parent(s):
6283f19
feat(check token size of context)
Browse files* fn that returns boolean for if token size is under 8000
* raise gr.Error if file exceeds token size
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
app.py
CHANGED
@@ -189,6 +189,12 @@ def model_call(question, document, answer, client_base_url):
|
|
189 |
combined_reasoning = " ".join(reasoning)[1:-1]
|
190 |
return combined_reasoning, score
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
def get_filetype(filename):
|
193 |
return filename.split(".")[-1]
|
194 |
|
@@ -218,6 +224,11 @@ def upload_file(filepath):
|
|
218 |
extracted_file_text = extract_text_pymupdf(filepath)
|
219 |
elif filetype == "docx":
|
220 |
extracted_file_text = extract_text_python_docx(filepath)
|
|
|
|
|
|
|
|
|
|
|
221 |
return [gr.UploadButton(visible=False), gr.Group(visible=True), gr.Markdown(f"**Uploaded file:** {name}"), extracted_file_text]
|
222 |
else:
|
223 |
return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), extracted_file_text]
|
|
|
189 |
combined_reasoning = " ".join(reasoning)[1:-1]
|
190 |
return combined_reasoning, score
|
191 |
|
192 |
+
def return_approximate_token_size(text):
|
193 |
+
MAX_TOKEN_LENGTH = 8000
|
194 |
+
number_of_total_characters = len(text)
|
195 |
+
number_of_tokens = number_of_total_characters / 4
|
196 |
+
return number_of_tokens < MAX_TOKEN_LENGTH
|
197 |
+
|
198 |
def get_filetype(filename):
|
199 |
return filename.split(".")[-1]
|
200 |
|
|
|
224 |
extracted_file_text = extract_text_pymupdf(filepath)
|
225 |
elif filetype == "docx":
|
226 |
extracted_file_text = extract_text_python_docx(filepath)
|
227 |
+
|
228 |
+
# return warning if file is too large
|
229 |
+
if not return_approximate_token_size(extracted_file_text):
|
230 |
+
raise gr.Error("File is too large to process. Please upload a smaller file.")
|
231 |
+
|
232 |
return [gr.UploadButton(visible=False), gr.Group(visible=True), gr.Markdown(f"**Uploaded file:** {name}"), extracted_file_text]
|
233 |
else:
|
234 |
return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), extracted_file_text]
|