hprasath commited on
Commit
95760fd
1 Parent(s): 3e697f7

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +11 -0
  2. app.py +329 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
app.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import moviepy.editor as mp
2
+ from flask import Flask, request, jsonify
3
+ from flask_cors import CORS
4
+ import requests
5
+ from io import BytesIO
6
+ import speech_recognition as sr
7
+ import io
8
+ import fitz # PyMuPDF for working with PDFs
9
+ import numpy as np
10
+ import cv2
11
+ from flask_caching import Cache
12
+
13
+ from utils.audioEmbedding.index import extract_audio_embeddings
14
+ from utils.videoEmbedding.index import get_video_embedding
15
+ from utils.imageToText.index import extract_text
16
+ from utils.sentanceEmbedding.index import get_text_vector , get_text_discription_vector
17
+ from utils.imageEmbedding.index import get_image_embedding
18
+ from utils.similarityScore import get_all_similarities
19
+ from utils.objectDetection.index import detect_objects
20
+
21
+
22
+
23
+ app = Flask(__name__)
24
+ cache = Cache(app, config={'CACHE_TYPE': 'simple'}) # You can choose a caching type based on your requirements
25
+ CORS(app)
26
+ import moviepy.editor as mp
27
+ import tempfile
28
+
29
+ def get_face_locations(binary_data):
30
+ # Convert binary image data to numpy array
31
+ print(1)
32
+ nparr = np.frombuffer(binary_data, np.uint8)
33
+ image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
34
+
35
+ # Load the pre-trained face detection model
36
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
37
+
38
+ # Convert the image to grayscale
39
+ gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
40
+
41
+ # Detect faces in the image
42
+ faces = face_cascade.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
43
+
44
+ # Extract face locations
45
+ print(2)
46
+ face_locations = []
47
+ for (x, y, w, h) in faces:
48
+ face_locations.append({"top": y, "right": x + w, "bottom": y + h, "left": x})
49
+ print(3)
50
+ return face_locations
51
+
52
+ def seperate_image_text_from_pdf(pdf_url):
53
+ # List to store page information
54
+ pages_info = []
55
+
56
+ # Fetch the PDF from the URL
57
+ response = requests.get(pdf_url)
58
+
59
+ if response.status_code == 200:
60
+ # Create a temporary file to save the PDF data
61
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
62
+ tmp_file.write(response.content)
63
+ tmp_file_path = tmp_file.name
64
+
65
+ # Open the PDF
66
+ pdf = fitz.open(tmp_file_path)
67
+
68
+ # Iterate through each page
69
+ for page_num in range(len(pdf)):
70
+ page = pdf.load_page(page_num)
71
+
72
+ # Extract text
73
+ text = page.get_text()
74
+
75
+ # Count images
76
+ image_list = page.get_images(full=True)
77
+
78
+ # Convert images to BytesIO and store in a list
79
+ images_bytes = []
80
+ for img_index, img_info in enumerate(image_list):
81
+ xref = img_info[0]
82
+ base_image = pdf.extract_image(xref)
83
+ image_bytes = base_image["image"]
84
+ images_bytes.append(image_bytes)
85
+
86
+ # Store page information in a dictionary
87
+ page_info = {
88
+ "pgno": page_num + 1,
89
+ "images": images_bytes,
90
+ "text": text
91
+ }
92
+
93
+ # Append page information to the list
94
+ pages_info.append(page_info)
95
+
96
+ # Close the PDF
97
+ pdf.close()
98
+
99
+ # Clean up the temporary file
100
+ import os
101
+ os.unlink(tmp_file_path)
102
+ else:
103
+ print("Failed to fetch the PDF from the URL.")
104
+
105
+ return pages_info
106
+
107
+ def pdf_image_text_embedding_and_text_embedding(pages_info):
108
+ # List to store page embeddings
109
+ page_embeddings = []
110
+
111
+ # Iterate through each page
112
+ for page in pages_info:
113
+ # Extract text from the page
114
+ text = page["text"]
115
+
116
+ # Extract images from the page
117
+ images = page["images"]
118
+
119
+ # List to store image embeddings
120
+ image_embeddings = []
121
+
122
+ # Iterate through each image
123
+ for image in images:
124
+ # Get the image embedding
125
+ image_embedding = get_image_embedding(image)
126
+ extracted_text = extract_text(image)
127
+ # Append the image embedding to the list
128
+ image_embeddings.append({"image_embedding": image_embedding.tolist() ,"extracted_text":extracted_text})
129
+
130
+ # Get the text embedding
131
+
132
+ # Store the page embeddings in a dictionary
133
+ page_embedding = {
134
+ "images": image_embeddings,
135
+ "text": text,
136
+ }
137
+
138
+ # Append the page embedding to the list
139
+ page_embeddings.append(page_embedding)
140
+
141
+ return page_embeddings
142
+
143
+ def separate_audio_from_video(video_url):
144
+ try:
145
+ # Load the video file
146
+ video = mp.VideoFileClip(video_url)
147
+
148
+ # Extract audio
149
+ audio = video.audio
150
+
151
+ # Create a temporary file to write the audio data
152
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
153
+ temp_audio_filename = temp_audio_file.name
154
+
155
+ # Write the audio data to the temporary file
156
+ audio.write_audiofile(temp_audio_filename)
157
+
158
+ # Read the audio data from the temporary file as bytes
159
+ with open(temp_audio_filename, "rb") as f:
160
+ audio_bytes = f.read()
161
+
162
+ return audio_bytes
163
+
164
+ except Exception as e:
165
+ print("An error occurred:", e)
166
+
167
+
168
+
169
+
170
+ @cache.cached(timeout=300)
171
+ @app.route('/get_text_embedding', methods=['POST'])
172
+ def get_text_embedding_route():
173
+ try:
174
+ text = request.json.get("text")
175
+ text_embedding = get_text_vector(text)
176
+ return jsonify({"text_embedding": text_embedding}), 200
177
+
178
+ except Exception as e:
179
+ return jsonify({"error": str(e)}), 500
180
+
181
+
182
+ @cache.cached(timeout=300)
183
+ @app.route('/extract_audio_text_and_embedding', methods=['POST'])
184
+ def get_audio_embedding_route():
185
+ audio_url = request.json.get('audio_url')
186
+ print(audio_url)
187
+ response = requests.get(audio_url)
188
+ audio_data = response.content
189
+ audio_embedding = extract_audio_embeddings(audio_data)
190
+ audio_embedding_list = audio_embedding
191
+ audio_file = BytesIO(audio_data)
192
+ r = sr.Recognizer()
193
+ with sr.AudioFile(audio_file) as source:
194
+ audio_data = r.record(source)
195
+ extracted_text = ""
196
+ try:
197
+ text = r.recognize_google(audio_data)
198
+ extracted_text = text
199
+ except Exception as e:
200
+ print(e)
201
+ return jsonify({"extracted_text": extracted_text, "audio_embedding": audio_embedding_list}), 200
202
+
203
+ # Route to get image embeddings
204
+ @cache.cached(timeout=300)
205
+ @app.route('/extract_image_text_and_embedding', methods=['POST'])
206
+ def get_image_embedding_route():
207
+ try:
208
+ image_url = request.json.get("imageUrl")
209
+ print(image_url)
210
+ response = requests.get(image_url)
211
+ if response.status_code != 200:
212
+ return jsonify({"error": "Failed to download image"}), 500
213
+ binary_data = response.content
214
+ extracted_text = extract_text(binary_data)
215
+ image_embedding = get_image_embedding(binary_data)
216
+ image_embedding_list = image_embedding.tolist()
217
+ return jsonify({"image_embedding": image_embedding_list,"extracted_text":extracted_text}), 200
218
+
219
+ except Exception as e:
220
+ return jsonify({"error": str(e)}), 500
221
+
222
+ # Route to get video embeddings
223
+ @cache.cached(timeout=300)
224
+ @app.route('/extract_video_text_and_embedding', methods=['POST'])
225
+ def get_video_embedding_route():
226
+ try:
227
+ video_url = request.json.get("videoUrl")
228
+ audio_data = separate_audio_from_video(video_url)
229
+ audio_embedding = extract_audio_embeddings(audio_data)
230
+ audio_embedding_list = audio_embedding
231
+ audio_file = io.BytesIO(audio_data)
232
+ r = sr.Recognizer()
233
+ with sr.AudioFile(audio_file) as source:
234
+ audio_data = r.record(source)
235
+ extracted_text = ""
236
+ try:
237
+ text = r.recognize_google(audio_data)
238
+ extracted_text = text
239
+ except Exception as e:
240
+ print(e)
241
+ video_embedding = get_video_embedding(video_url)
242
+ return jsonify({"video_embedding": video_embedding,"extracted_audio_text": extracted_text, "audio_embedding": audio_embedding_list}), 200
243
+
244
+ except Exception as e:
245
+ print(e)
246
+ return jsonify({"error": str(e)}), 500
247
+
248
+ @cache.cached(timeout=300)
249
+ @app.route('/extract_pdf_text_and_embedding', methods=['POST'])
250
+ def extract_pdf_text_and_embedding():
251
+ try:
252
+ pdf_url = request.json.get("pdfUrl")
253
+ print(1)
254
+ pages_info = seperate_image_text_from_pdf(pdf_url)
255
+ content = pdf_image_text_embedding_and_text_embedding(pages_info)
256
+ print(content)
257
+ return jsonify({"content": content}), 200
258
+
259
+ except Exception as e:
260
+ return jsonify({"error": str(e)}), 500
261
+
262
+ # Route to get text description embeddings
263
+ @cache.cached(timeout=300)
264
+ @app.route('/getTextDescriptionEmbedding', methods=['POST'])
265
+ def get_text_description_embedding_route():
266
+ try:
267
+ text = request.json.get("text")
268
+ text_description_embedding = get_text_discription_vector(text)
269
+ return jsonify({"text_description_embedding": text_description_embedding.tolist()}), 200
270
+
271
+ except Exception as e:
272
+ return jsonify({"error": str(e)}), 500
273
+
274
+
275
+
276
+ # Route to get object detection results
277
+ @cache.cached(timeout=300)
278
+ @app.route('/detectObjects', methods=['POST'])
279
+ def detect_objects_route():
280
+ try:
281
+ image_url = request.json.get("imageUrl")
282
+ response = requests.get(image_url)
283
+ if response.status_code != 200:
284
+ return jsonify({"error": "Failed to download image"}), 500
285
+ binary_data = response.content
286
+ object_detection_results = detect_objects(binary_data)
287
+ return jsonify({"object_detection_results": object_detection_results}), 200
288
+
289
+ except Exception as e:
290
+ return jsonify({"error": str(e)}), 500
291
+
292
+ # Route to get face locations
293
+ @cache.cached(timeout=300)
294
+ @app.route('/getFaceLocations', methods=['POST'])
295
+ def get_face_locations_route():
296
+ try:
297
+ image_url = request.json.get("imageUrl")
298
+ response = requests.get(image_url)
299
+ print(11)
300
+ if response.status_code != 200:
301
+ return jsonify({"error": "Failed to download image"}), 500
302
+ print(22)
303
+ binary_data = response.content
304
+ face_locations = get_face_locations(binary_data)
305
+ print(33)
306
+ print("ok",face_locations)
307
+ return jsonify({"face_locations": str(face_locations)}), 200
308
+
309
+ except Exception as e:
310
+ print(e)
311
+ return jsonify({"error": str(e)}), 500
312
+
313
+ # Route to get similarity score
314
+ @cache.cached(timeout=300)
315
+ @app.route('/getSimilarityScore', methods=['POST'])
316
+ def get_similarity_score_route():
317
+ try:
318
+ embedding1 = request.json.get("embedding1")
319
+ embedding2 = request.json.get("embedding2")
320
+ # Assuming embeddings are provided as lists
321
+ similarity_score = get_all_similarities(embedding1, embedding2)
322
+ return jsonify({"similarity_score": similarity_score}), 200
323
+
324
+ except Exception as e:
325
+ return jsonify({"error": str(e)}), 500
326
+
327
+ @app.route('/')
328
+ def hello():
329
+ return 'Hello, World!'