Spaces:
Runtime error
Runtime error
File size: 1,237 Bytes
66340f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import os
from fastapi import UploadFile
import mimetypes
from app.parser.parsers import *
from app.schemas.document import Document
async def get_document_from_file(file: UploadFile, temp_file_path="/tmp/temp_file"):
mimetype = file.content_type
stream = await file.read()
with open(temp_file_path, "wb") as file:
file.write(stream)
try:
parsed_text = await extract_text_with_mimetype(temp_file_path, mimetype)
except Exception as e:
os.remove(temp_file_path)
raise Exception("Couldn't get document from file")
os.remove(temp_file_path)
return Document(
text=parsed_text,
)
async def extract_text_with_mimetype(file_path, mimetype):
if mimetype is None:
mimetype, _ = mimetypes.guess_type(file_path)
if mimetype is None:
raise Exception("Unsupported file type")
if mimetype == "application/pdf":
parsed_text = PdfParser.parse(file_path)
elif mimetype == "text/plain":
parsed_text = TxtParser.parse(file_path)
elif (
mimetype
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
):
parsed_text = DocxParser.parse(file_path)
return parsed_text
|