Spaces:

luxmorocco
/

ocr-aimlapi-pyzerox

Build error

App Files Files Community

ocr-aimlapi-pyzerox / app.py

luxmorocco

initial commit

c23289b verified 28 days ago

raw

history blame contribute delete

4.74 kB

	import os
	import gradio as gr
	from pyzerox import zerox
	import asyncio
	import openai
	import markdown2
	from gradio_pdf import PDF

	# Header
	header = """
	# 📄 OCR Reader, 🔍 Analyzer, and 💬 Chat Assistant using 🔎 Zerox, 🧠 GPT-4o, powered by 🚀 AI/ML API

	Author: Jad Tounsi El Azzoiani
	GitHub: [https://github.com/jadouse5](https://github.com/jadouse5)
	LinkedIn: [Jad Tounsi El Azzoiani](https://www.linkedin.com/in/jad-tounsi-el-azzoiani-87499a21a/)

	This project uses:
	- [AI/ML API](https://api.aimlapi.com)
	- [Gradio](https://www.gradio.app)
	- [pyzerox](https://github.com/getomni-ai/zerox?tab=readme-ov-file#python-zerox)
	"""

	# Set up the model and provider
	model = "gpt-4o" # GPT-4o model from AI/ML API

	# Set the environment variables for the AI/ML API
	os.environ["OPENAI_API_KEY"] = "your_api_key"
	os.environ["OPENAI_API_BASE"] = "https://api.aimlapi.com/v1"

	# Initialize the OpenAI client
	client = openai.OpenAI(
	api_key=os.environ["OPENAI_API_KEY"],
	base_url=os.environ["OPENAI_API_BASE"]
	)

	# Async function to process the file using Zerox OCR and GPT-4o
	async def process_file(file):
	file_path = file.name
	result = await zerox(
	file_path=file_path,
	model=model,
	cleanup=True,
	concurrency=5,
	maintain_format=True,
	)
	content = "\n\n".join([page.content for page in result.pages])

	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": "You are an AI assistant that analyzes OCR output. Provide your analysis in markdown format, using bold text, tables, and other formatting as appropriate to make the information clear and easy to read."},
	{"role": "user", "content": f"Analyze the following OCR output and provide a summary:\n\n{content}"}
	],
	temperature=0.7,
	max_tokens=500
	)

	ai_analysis = response.choices[0].message.content
	ai_analysis_html = markdown2.markdown(ai_analysis)

	return content, ai_analysis_html, file_path

	# Function to handle chat with AI
	def chat_with_ai(message, chat_history, document_content):
	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": "You are an AI assistant that can answer questions about a document. Use the document content to provide accurate answers."},
	{"role": "user", "content": f"Document content: {document_content}"},
	{"role": "user", "content": message}
	],
	temperature=0.7,
	max_tokens=150
	)
	ai_response = response.choices[0].message.content
	chat_history.append((message, ai_response))
	return "", chat_history

	# Build the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown(header)

	api_key_input = gr.Textbox(
	label="Enter your AI/ML API Key",
	type="password",
	placeholder="Enter your API key here"
	)

	with gr.Row():
	with gr.Column(scale=1):
	file_input = gr.File(label="Upload Document", file_types=[".pdf", ".docx", ".jpg", ".png", ".jpeg"])
	run_button = gr.Button("Run OCR and Analysis")
	with gr.Column(scale=1):
	pdf_viewer = PDF(label="Original Document", interactive=False)

	with gr.Row():
	with gr.Column(scale=1):
	ocr_output = gr.Textbox(label="Extracted Text", lines=20)
	with gr.Column(scale=1):
	ai_analysis_output = gr.HTML(label="AI Analysis")

	gr.Markdown("## Chat with AI about the document")
	chatbot = gr.Chatbot()
	msg = gr.Textbox(label="Ask a question about the document")
	clear = gr.Button("Clear")

	document_content = gr.State()

	def process_and_display(file, api_key):
	os.environ["OPENAI_API_KEY"] = api_key
	client.api_key = api_key
	content, analysis, file_path = asyncio.run(process_file(file))
	return file_path, file_path, content, analysis, content

	run_button.click(
	process_and_display,
	inputs=[file_input, api_key_input],
	outputs=[pdf_viewer, file_input, ocr_output, ai_analysis_output, document_content]
	)

	msg.submit(chat_with_ai, [msg, chatbot, document_content], [msg, chatbot])
	clear.click(lambda: None, None, chatbot, queue=False)

	footer = gr.Markdown("""
	---
	Created by Jad Tounsi El Azzoiani \| [GitHub](https://github.com/jadouse5) \| [LinkedIn](https://www.linkedin.com/in/jad-tounsi-el-azzoiani-87499a21a/)

	Powered by [AI/ML API](https://aimlapi.com/?via=jad), [Gradio](https://www.gradio.app), and [pyzerox](https://github.com/getomni-ai/zerox?tab=readme-ov-file#python-zerox)
	""")

	demo.launch()