import gradio as gr from transformers import pipeline from langchain import PromptTemplate from langchain.chains import LLMChain from langchain_google_genai import ChatGoogleGenerativeAI import os from PIL import Image import json # Retrieve the API keys and other secrets from the environment api_key = os.environ.get('GOOGLE_API_KEY') if api_key is None: raise ValueError("No API key found. Please set the 'GOOGLE_API_KEY' environment variable.") tracking_id = os.environ.get('TRACKING_ID') if tracking_id is None: raise ValueError("No tracking ID found. Please set the 'TRACKING_ID' environment variable.") initial_prompt = os.environ.get('initial_prompt') if initial_prompt is None: raise ValueError("No initial prompt found. Please set the 'initial_prompt' environment variable.") description_json = os.environ.get('description') if description_json is None: raise ValueError("No description found. Please set the 'description' environment variable.") # Convert the description JSON string to a dictionary description = json.loads(description_json) # Set the API key for Google os.environ['GOOGLE_API_KEY'] = api_key # Initialize the OCR pipeline ocr_pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD") # Initialize the LLM llm_model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.4, top_p=0.85) # Define the prompt template prompt = PromptTemplate(input_variables=['task_type', 'task_number', 'question', 'content', 'description'], template=initial_prompt) # Define the LLM chain chain = LLMChain(llm=llm_model, prompt=prompt) def evaluate(task_type, task_number, question, input_type, image=None, text=None): if input_type == "Image" and image is not None: # Ensure the image is in the correct format if isinstance(image, str): # Load the image if it's a URL or path image = Image.open(image) # Process the image to extract text text_content = ocr_pipe(image) content = text_content[0]['generated_text'] elif input_type == "Text" and text is not None: content = text else: return "Please provide the required input based on your selection." # Retrieve the description for the given task type and number, or use a default value task_description = description.get((task_type, task_number), "No description available for this task.") # Run the chain result = chain.run({ 'task_type': task_type, 'task_number': task_number, 'question': question, 'content': content, 'description': task_description }) return result # Create the Gradio interface inputs = [ gr.Dropdown(choices=["Academic", "General"], label="Test Type", value="Academic"), gr.Dropdown(choices=["Task 1", "Task 2"], label="Task Number", value="Task 1"), gr.Textbox(label="Question", value=""), gr.Radio(choices=["Image", "Text"], label="Input Type", value="Image"), gr.Image(type="pil", label="Upload Image", visible=True), gr.Textbox(label="Enter Text", visible=False) ] def toggle_input(input_type): if input_type == "Image": return gr.update(visible=True), gr.update(visible=False) else: return gr.update(visible=False), gr.update(visible=True) footer_html_with_analytics = f"""
""" outputs = gr.Markdown(label="Result") # Define the Gradio Blocks and Interface with gr.Blocks() as demo: gr.Markdown("# IELTS Writing Evaluation") with gr.Row(): with gr.Column(): input_type_radio = gr.Radio(choices=["Image", "Text"], label="Input Type", value="Image") image_input = gr.Image(type="pil", label="Upload Image", visible=True) text_input = gr.Textbox(label="Enter Text", visible=False) input_type_radio.change(toggle_input, input_type_radio, [image_input, text_input]) gr.Interface(fn=evaluate, inputs=inputs, outputs=outputs) gr.HTML(footer_html_with_analytics) # Launch the interface demo.launch(share=True, debug=True)