Spaces:
Runtime error
Runtime error
import embed_anything | |
from embed_anything import EmbedData | |
from tqdm.autonotebook import tqdm | |
from pinecone import Pinecone, ServerlessSpec | |
import numpy as np | |
import os | |
from pinecone import PineconeApiException | |
import uuid | |
import re | |
import gradio as gr | |
audio_files = ["samples_hp0.wav", "samples_gb0.wav"] | |
embeddings: list[list[EmbedData]] = [] | |
for file in audio_files: | |
embedding = embed_anything.embed_file(file, "Whisper-Jina") | |
embeddings.append(embedding) | |
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) | |
pc.delete_index("search-in-audio") | |
try: | |
index = pc.create_index( | |
name="search-in-audio", | |
dimension=768, # Replace with your model dimensions | |
metric="cosine", # Replace with your model metric | |
spec=ServerlessSpec(cloud="aws", region="us-east-1"), | |
) | |
index = pc.Index("search-in-audio") | |
except PineconeApiException as e: | |
index = pc.Index("search-in-audio") | |
if e.status == 409: | |
print("Index already exists") | |
else: | |
print(e) | |
## convert embeddings which is of the form EmbedData : text, embedding, metadata to the form required by pinecone which is id, values, metadata | |
def convert_to_pinecone_format(embeddings: list[list[EmbedData]]): | |
data = [] | |
for i, embedding in enumerate(embeddings): | |
for j, emb in enumerate(embedding): | |
data.append( | |
{ | |
"id": str(uuid.uuid4()), | |
"values": emb.embedding, | |
"metadata": { | |
"text": emb.text, | |
"start": emb.metadata["start"], | |
"end": emb.metadata["end"], | |
"file": re.split(r"/|\\", emb.metadata["file_name"])[-1], | |
}, | |
} | |
) | |
return data | |
data = convert_to_pinecone_format(embeddings) | |
index.upsert(data) | |
files = ["samples_hp0.wav", "samples_gb0.wav"] | |
def search(query, audio): | |
results = [] | |
query = embed_anything.embed_query([query], "Jina")[0] | |
if re.split(r"/|\\", audio)[-1] not in files: | |
print(file, re.split(r"/|\\", audio)[-1]) | |
embeddings = embed_anything.embed_file(audio, "Whisper-Jina") | |
embeddings = convert_to_pinecone_format([embeddings]) | |
index.upsert(embeddings) | |
files.append(re.split(r"/|\\", audio)[-1]) | |
result = index.query( | |
vector=query.embedding, | |
top_k=5, | |
include_metadata=True, | |
) | |
for res in result.matches: | |
results.append(res.metadata) | |
formatted_results = [] | |
for result in results: | |
display_text = f""" | |
`File: {result['file']}` | |
`Start: {result['start']}` | |
`End: {result['end']}` | |
Text: {result['text']}""" | |
formatted_results.append(display_text) | |
return ( | |
formatted_results[0], | |
results[0]["file"], | |
formatted_results[1], | |
results[1]["file"], | |
formatted_results[2], | |
results[2]["file"], | |
) | |
demo = gr.Interface( | |
title="Search π in Audio ποΈ", | |
description=""" | |
<img width=250 src = "https://res.cloudinary.com/dltwftrgc/image/upload/v1712504276/Projects/EmbedAnything_500_x_200_px_a4l8xu.png"> | |
# Search within audio files using text queries. | |
## Models used: | |
- **Audio Decoder**: [openai/whisper-tiny.en](https://huggingface.co/openai/whisper-tiny.en) | |
- **Embedding Model**: [Jina Embeddings v2 base-en](https://huggingface.co/jinaai/jina-embeddings-v2-base-en) | |
## Vector Database used: **Pinecone** | |
## Powered by [EmbedAnything by Starlight](https://github.com/StarlightSearch/EmbedAnything) π | |
""", | |
article = "Created by [Akshay Ballal](https://www.akshaymakes.com)", | |
fn=search, | |
inputs=["text", gr.Audio(label="Audio", type="filepath")], | |
outputs=[ | |
gr.Markdown(label="Text"), | |
gr.Audio(label="Audio", type="filepath"), | |
gr.Markdown(label="Text"), | |
gr.Audio(label="Audio", type="filepath"), | |
gr.Markdown(label="Text"), | |
gr.Audio(label="Audio", type="filepath"), | |
], | |
examples=[ | |
["screwdriver", "samples_hp0.wav"], | |
["united states", "samples_gb0.wav"], | |
["united states", "samples_hp0.wav"], | |
], | |
) | |
demo.launch() | |