Spaces:

hprasath
/

image-processing

Sleeping

App Files Files Community

image-processing / app.py

hprasath

Upload 2 files

95760fd verified 8 months ago

raw

history blame

10.9 kB

	import moviepy.editor as mp
	from flask import Flask, request, jsonify
	from flask_cors import CORS
	import requests
	from io import BytesIO
	import speech_recognition as sr
	import io
	import fitz # PyMuPDF for working with PDFs
	import numpy as np
	import cv2
	from flask_caching import Cache

	from utils.audioEmbedding.index import extract_audio_embeddings
	from utils.videoEmbedding.index import get_video_embedding
	from utils.imageToText.index import extract_text
	from utils.sentanceEmbedding.index import get_text_vector , get_text_discription_vector
	from utils.imageEmbedding.index import get_image_embedding
	from utils.similarityScore import get_all_similarities
	from utils.objectDetection.index import detect_objects



	app = Flask(__name__)
	cache = Cache(app, config={'CACHE_TYPE': 'simple'}) # You can choose a caching type based on your requirements
	CORS(app)
	import moviepy.editor as mp
	import tempfile

	def get_face_locations(binary_data):
	# Convert binary image data to numpy array
	print(1)
	nparr = np.frombuffer(binary_data, np.uint8)
	image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

	# Load the pre-trained face detection model
	face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

	# Convert the image to grayscale
	gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Detect faces in the image
	faces = face_cascade.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

	# Extract face locations
	print(2)
	face_locations = []
	for (x, y, w, h) in faces:
	face_locations.append({"top": y, "right": x + w, "bottom": y + h, "left": x})
	print(3)
	return face_locations

	def seperate_image_text_from_pdf(pdf_url):
	# List to store page information
	pages_info = []

	# Fetch the PDF from the URL
	response = requests.get(pdf_url)

	if response.status_code == 200:
	# Create a temporary file to save the PDF data
	with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
	tmp_file.write(response.content)
	tmp_file_path = tmp_file.name

	# Open the PDF
	pdf = fitz.open(tmp_file_path)

	# Iterate through each page
	for page_num in range(len(pdf)):
	page = pdf.load_page(page_num)

	# Extract text
	text = page.get_text()

	# Count images
	image_list = page.get_images(full=True)

	# Convert images to BytesIO and store in a list
	images_bytes = []
	for img_index, img_info in enumerate(image_list):
	xref = img_info[0]
	base_image = pdf.extract_image(xref)
	image_bytes = base_image["image"]
	images_bytes.append(image_bytes)

	# Store page information in a dictionary
	page_info = {
	"pgno": page_num + 1,
	"images": images_bytes,
	"text": text
	}

	# Append page information to the list
	pages_info.append(page_info)

	# Close the PDF
	pdf.close()

	# Clean up the temporary file
	import os
	os.unlink(tmp_file_path)
	else:
	print("Failed to fetch the PDF from the URL.")

	return pages_info

	def pdf_image_text_embedding_and_text_embedding(pages_info):
	# List to store page embeddings
	page_embeddings = []

	# Iterate through each page
	for page in pages_info:
	# Extract text from the page
	text = page["text"]

	# Extract images from the page
	images = page["images"]

	# List to store image embeddings
	image_embeddings = []

	# Iterate through each image
	for image in images:
	# Get the image embedding
	image_embedding = get_image_embedding(image)
	extracted_text = extract_text(image)
	# Append the image embedding to the list
	image_embeddings.append({"image_embedding": image_embedding.tolist() ,"extracted_text":extracted_text})

	# Get the text embedding

	# Store the page embeddings in a dictionary
	page_embedding = {
	"images": image_embeddings,
	"text": text,
	}

	# Append the page embedding to the list
	page_embeddings.append(page_embedding)

	return page_embeddings

	def separate_audio_from_video(video_url):
	try:
	# Load the video file
	video = mp.VideoFileClip(video_url)

	# Extract audio
	audio = video.audio

	# Create a temporary file to write the audio data
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
	temp_audio_filename = temp_audio_file.name

	# Write the audio data to the temporary file
	audio.write_audiofile(temp_audio_filename)

	# Read the audio data from the temporary file as bytes
	with open(temp_audio_filename, "rb") as f:
	audio_bytes = f.read()

	return audio_bytes

	except Exception as e:
	print("An error occurred:", e)




	@cache.cached(timeout=300)
	@app.route('/get_text_embedding', methods=['POST'])
	def get_text_embedding_route():
	try:
	text = request.json.get("text")
	text_embedding = get_text_vector(text)
	return jsonify({"text_embedding": text_embedding}), 200

	except Exception as e:
	return jsonify({"error": str(e)}), 500


	@cache.cached(timeout=300)
	@app.route('/extract_audio_text_and_embedding', methods=['POST'])
	def get_audio_embedding_route():
	audio_url = request.json.get('audio_url')
	print(audio_url)
	response = requests.get(audio_url)
	audio_data = response.content
	audio_embedding = extract_audio_embeddings(audio_data)
	audio_embedding_list = audio_embedding
	audio_file = BytesIO(audio_data)
	r = sr.Recognizer()
	with sr.AudioFile(audio_file) as source:
	audio_data = r.record(source)
	extracted_text = ""
	try:
	text = r.recognize_google(audio_data)
	extracted_text = text
	except Exception as e:
	print(e)
	return jsonify({"extracted_text": extracted_text, "audio_embedding": audio_embedding_list}), 200

	# Route to get image embeddings
	@cache.cached(timeout=300)
	@app.route('/extract_image_text_and_embedding', methods=['POST'])
	def get_image_embedding_route():
	try:
	image_url = request.json.get("imageUrl")
	print(image_url)
	response = requests.get(image_url)
	if response.status_code != 200:
	return jsonify({"error": "Failed to download image"}), 500
	binary_data = response.content
	extracted_text = extract_text(binary_data)
	image_embedding = get_image_embedding(binary_data)
	image_embedding_list = image_embedding.tolist()
	return jsonify({"image_embedding": image_embedding_list,"extracted_text":extracted_text}), 200

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	# Route to get video embeddings
	@cache.cached(timeout=300)
	@app.route('/extract_video_text_and_embedding', methods=['POST'])
	def get_video_embedding_route():
	try:
	video_url = request.json.get("videoUrl")
	audio_data = separate_audio_from_video(video_url)
	audio_embedding = extract_audio_embeddings(audio_data)
	audio_embedding_list = audio_embedding
	audio_file = io.BytesIO(audio_data)
	r = sr.Recognizer()
	with sr.AudioFile(audio_file) as source:
	audio_data = r.record(source)
	extracted_text = ""
	try:
	text = r.recognize_google(audio_data)
	extracted_text = text
	except Exception as e:
	print(e)
	video_embedding = get_video_embedding(video_url)
	return jsonify({"video_embedding": video_embedding,"extracted_audio_text": extracted_text, "audio_embedding": audio_embedding_list}), 200

	except Exception as e:
	print(e)
	return jsonify({"error": str(e)}), 500

	@cache.cached(timeout=300)
	@app.route('/extract_pdf_text_and_embedding', methods=['POST'])
	def extract_pdf_text_and_embedding():
	try:
	pdf_url = request.json.get("pdfUrl")
	print(1)
	pages_info = seperate_image_text_from_pdf(pdf_url)
	content = pdf_image_text_embedding_and_text_embedding(pages_info)
	print(content)
	return jsonify({"content": content}), 200

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	# Route to get text description embeddings
	@cache.cached(timeout=300)
	@app.route('/getTextDescriptionEmbedding', methods=['POST'])
	def get_text_description_embedding_route():
	try:
	text = request.json.get("text")
	text_description_embedding = get_text_discription_vector(text)
	return jsonify({"text_description_embedding": text_description_embedding.tolist()}), 200

	except Exception as e:
	return jsonify({"error": str(e)}), 500



	# Route to get object detection results
	@cache.cached(timeout=300)
	@app.route('/detectObjects', methods=['POST'])
	def detect_objects_route():
	try:
	image_url = request.json.get("imageUrl")
	response = requests.get(image_url)
	if response.status_code != 200:
	return jsonify({"error": "Failed to download image"}), 500
	binary_data = response.content
	object_detection_results = detect_objects(binary_data)
	return jsonify({"object_detection_results": object_detection_results}), 200

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	# Route to get face locations
	@cache.cached(timeout=300)
	@app.route('/getFaceLocations', methods=['POST'])
	def get_face_locations_route():
	try:
	image_url = request.json.get("imageUrl")
	response = requests.get(image_url)
	print(11)
	if response.status_code != 200:
	return jsonify({"error": "Failed to download image"}), 500
	print(22)
	binary_data = response.content
	face_locations = get_face_locations(binary_data)
	print(33)
	print("ok",face_locations)
	return jsonify({"face_locations": str(face_locations)}), 200

	except Exception as e:
	print(e)
	return jsonify({"error": str(e)}), 500

	# Route to get similarity score
	@cache.cached(timeout=300)
	@app.route('/getSimilarityScore', methods=['POST'])
	def get_similarity_score_route():
	try:
	embedding1 = request.json.get("embedding1")
	embedding2 = request.json.get("embedding2")
	# Assuming embeddings are provided as lists
	similarity_score = get_all_similarities(embedding1, embedding2)
	return jsonify({"similarity_score": similarity_score}), 200

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	@app.route('/')
	def hello():
	return 'Hello, World!'