Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -110,29 +110,23 @@ def seperate_image_text_from_pdf(pdf_url):
|
|
110 |
|
111 |
def pdf_image_text_embedding_and_text_embedding(pages_info):
|
112 |
try:
|
113 |
-
# List to store page embeddings
|
114 |
page_embeddings = []
|
115 |
|
116 |
# Iterate through each page
|
117 |
for page in pages_info:
|
118 |
# Extract text from the page
|
119 |
-
text = page
|
|
|
120 |
|
121 |
-
# Extract images from the page
|
122 |
-
images = page["images"]
|
123 |
-
|
124 |
-
# List to store image embeddings
|
125 |
image_embeddings = []
|
126 |
-
|
127 |
-
# Iterate through each image
|
128 |
for image in images:
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
|
137 |
# Store the page embeddings in a dictionary
|
138 |
page_embedding = {
|
@@ -140,12 +134,11 @@ def pdf_image_text_embedding_and_text_embedding(pages_info):
|
|
140 |
"text": text,
|
141 |
}
|
142 |
|
143 |
-
# Append the page embedding to the list
|
144 |
page_embeddings.append(page_embedding)
|
145 |
|
146 |
return page_embeddings
|
147 |
except Exception as e:
|
148 |
-
print("An error occurred:
|
149 |
return "Error"
|
150 |
|
151 |
|
|
|
110 |
|
111 |
def pdf_image_text_embedding_and_text_embedding(pages_info):
|
112 |
try:
|
|
|
113 |
page_embeddings = []
|
114 |
|
115 |
# Iterate through each page
|
116 |
for page in pages_info:
|
117 |
# Extract text from the page
|
118 |
+
text = page.get("text", "")
|
119 |
+
images = page.get("images", [])
|
120 |
|
|
|
|
|
|
|
|
|
121 |
image_embeddings = []
|
|
|
|
|
122 |
for image in images:
|
123 |
+
try:
|
124 |
+
image_embedding = get_image_embedding(image)
|
125 |
+
extracted_text = extract_text(image)
|
126 |
+
image_embeddings.append({"image_embedding": image_embedding.tolist(), "extracted_text": extracted_text})
|
127 |
+
except Exception as image_error:
|
128 |
+
print(f"Error processing image: {image_error}")
|
129 |
+
# Log the error or handle it as needed
|
130 |
|
131 |
# Store the page embeddings in a dictionary
|
132 |
page_embedding = {
|
|
|
134 |
"text": text,
|
135 |
}
|
136 |
|
|
|
137 |
page_embeddings.append(page_embedding)
|
138 |
|
139 |
return page_embeddings
|
140 |
except Exception as e:
|
141 |
+
print(f"An error occurred: {e}")
|
142 |
return "Error"
|
143 |
|
144 |
|