hprasath commited on
Commit
568c815
1 Parent(s): ce0106e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -17
app.py CHANGED
@@ -110,29 +110,23 @@ def seperate_image_text_from_pdf(pdf_url):
110
 
111
  def pdf_image_text_embedding_and_text_embedding(pages_info):
112
  try:
113
- # List to store page embeddings
114
  page_embeddings = []
115
 
116
  # Iterate through each page
117
  for page in pages_info:
118
  # Extract text from the page
119
- text = page["text"]
 
120
 
121
- # Extract images from the page
122
- images = page["images"]
123
-
124
- # List to store image embeddings
125
  image_embeddings = []
126
-
127
- # Iterate through each image
128
  for image in images:
129
- # Get the image embedding
130
- image_embedding = get_image_embedding(image)
131
- extracted_text = extract_text(image)
132
- # Append the image embedding to the list
133
- image_embeddings.append({"image_embedding": image_embedding.tolist() ,"extracted_text":extracted_text})
134
-
135
- # Get the text embedding
136
 
137
  # Store the page embeddings in a dictionary
138
  page_embedding = {
@@ -140,12 +134,11 @@ def pdf_image_text_embedding_and_text_embedding(pages_info):
140
  "text": text,
141
  }
142
 
143
- # Append the page embedding to the list
144
  page_embeddings.append(page_embedding)
145
 
146
  return page_embeddings
147
  except Exception as e:
148
- print("An error occurred:", e)
149
  return "Error"
150
 
151
 
 
110
 
111
  def pdf_image_text_embedding_and_text_embedding(pages_info):
112
  try:
 
113
  page_embeddings = []
114
 
115
  # Iterate through each page
116
  for page in pages_info:
117
  # Extract text from the page
118
+ text = page.get("text", "")
119
+ images = page.get("images", [])
120
 
 
 
 
 
121
  image_embeddings = []
 
 
122
  for image in images:
123
+ try:
124
+ image_embedding = get_image_embedding(image)
125
+ extracted_text = extract_text(image)
126
+ image_embeddings.append({"image_embedding": image_embedding.tolist(), "extracted_text": extracted_text})
127
+ except Exception as image_error:
128
+ print(f"Error processing image: {image_error}")
129
+ # Log the error or handle it as needed
130
 
131
  # Store the page embeddings in a dictionary
132
  page_embedding = {
 
134
  "text": text,
135
  }
136
 
 
137
  page_embeddings.append(page_embedding)
138
 
139
  return page_embeddings
140
  except Exception as e:
141
+ print(f"An error occurred: {e}")
142
  return "Error"
143
 
144