zoosign_reader / app.py
panda1835's picture
Update app.py
2b179b8 verified
raw
history blame
6.44 kB
import gradio as gr
import base64
import requests
import random
import os
from openai import OpenAI
from PIL import Image
import json
import cohere
iucn_api_token = os.environ.get('IUCN_API')
cohere_api_token = os.environ.get('COHERE_API')
openai_api_token = os.environ.get('OPENAI_API')
client = OpenAI(api_key=openai_api_token)
co = cohere.Client(cohere_api_token)
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def summarize_with_llm(text, prompt, max_token=210):
response = co.generate(
model='command',
prompt=f'This is a piece of information about an animal: "{text}". {prompt}',
max_tokens=max_token,
temperature=0.5,
k=0,
stop_sequences=[],
return_likelihoods='NONE')
return response.generations[0].text
def get_iucn_data(genus, species):
iucn_narrative = requests.get(f"https://apiv3.iucnredlist.org/api/v3/species/narrative/{genus}%20{species}?token={iucn_api_token}")
iucn_status = requests.get(f"https://apiv3.iucnredlist.org/api/v3/species/history/name/{genus}%20{species}?token={iucn_api_token}")
iucn_common_name = requests.get(f"https://apiv3.iucnredlist.org/api/v3/species/common_names/{genus}%20{species}?token={iucn_api_token}")
iucn_web_link = requests.get(f"https://apiv3.iucnredlist.org/api/v3/weblink/{genus}%20{species}")
if iucn_narrative.status_code == 200:
narratives = iucn_narrative.json()
conservation_status = iucn_status.json()
if conservation_status['result'] == []:
return dict()
status_category = conservation_status['result'][0]['category']
status_code = conservation_status['result'][0]['code']
common_name = iucn_common_name.json()['result'][0]['taxonname']
web_link = iucn_web_link.json()['rlurl']
threats = summarize_with_llm(narratives['result'][0]['threats'], 'In one sentence, the threats posing this species are', max_token=210)
population = summarize_with_llm(narratives['result'][0]['population'], 'In one sentence, estimation of the population of this species is', max_token=210)
habitat = summarize_with_llm(narratives['result'][0]['habitat'], 'Description of the habitat of this species is')
return {
"status_category": status_category,
"status_code": status_code,
"common_name": common_name,
"web_link": web_link,
"threats": threats.strip().split('.')[0],
"population": population.strip().split('.')[0],
"habitat": habitat.strip().split('.')[0]
}
else:
return dict()
def get_taxonomy(image):
# Path to your image
id = random.randint(0, 1000)
image_path = f"upload_{id}.png"
image.save(image_path)
# Getting the base64 string
base64_image = encode_image(image_path)
os.remove(image_path)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai_api_token}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": """
Your role is to identify scientific names of species from zoo signs in images, focusing strictly on extracting the scientific name.
If the image is low quality or unreadable, the response in the JSON will be 'low quality image'.
If no informational sign is detected, it will respond with 'no sign found'.
When multiple signs are present, the response will be 'more than one sign'.
The GPT interacts minimally, responding in a dictionary format with the key "result" and the value being the scientific name or the specific response based on the image analysis.
"""
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "low"
}
}
]
}
],
"max_tokens": 300
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
result = response.json()['choices'][0]['message']['content']
json_string = "".join(result.split("\n")[1:-1])
# Parse the JSON string into a dictionary
result_dict = json.loads(json_string)
return result_dict['result']
def get_information(image):
taxonomy = get_taxonomy(image)
genus, species = taxonomy.split()[0], taxonomy.split()[1]
iucn_data = get_iucn_data(genus, species)
information = f"## {taxonomy}"
if len(list(iucn_data.keys())) > 0:
information += f"""
## {iucn_data['common_name']}
**Conservation status**: {iucn_data['status_category']} ({iucn_data['status_code']}).
**Threats**: {iucn_data['threats']}.
**Population**: {iucn_data['population']}.
**Habitat**: {iucn_data['habitat']}.
*For more information, please visit this species page*: {iucn_data['web_link']}
"""
return information
image = gr.Image(label="Image", type='pil')
output = gr.Markdown()
demo = gr.Interface(
fn=get_information,
inputs=[image],
outputs=output,
title="ZooSign Reader",
examples=['example_2.jpeg'],
description="""
Introducing **ZooSign Reader**, an innovative application designed to enhance your zoo experience! **ZooSign Reader** allows users to effortlessly upload images of zoo informational signs and receive detailed information about the species mentioned on those signs.
With **ZooSign Reader**, you no longer need to spend time searching for information about a particular animal or bird species while visiting the zoo. Simply capture an image of the sign using your smartphone camera, or choose an existing image from your gallery, and let **ZooSign Reader** do the rest.
Using cutting-edge image recognition and natural language processing technologies, **ZooSign Reader** quickly analyzes the uploaded image and extracts the text containing the scientific name. The app then searches the IUCN Redlist's extensive database, which includes a wide range of animals, birds, and reptiles found in zoos worldwide.
"""
)
demo.launch(cache_examples=False)