Spaces:
Sleeping
Sleeping
import pickle | |
import re | |
from PIL import Image | |
from transformers import pipeline | |
import io | |
def clean_text(text): | |
clean_text = re.sub(r'<[^>]+>', '', text) | |
clean_text = clean_text.strip() | |
clean_text = re.sub(r'\s+', ' ', clean_text) | |
return clean_text | |
pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD") | |
def extract_text(binary_image): | |
image = Image.open(io.BytesIO(binary_image)) | |
result = pipe(image) | |
text = result[0]['generated_text'] | |
cleaned_text = clean_text(text) | |
return cleaned_text | |
# print(extract_text(open("pictures/users/2.jpg", "rb").read())) | |
print("OCR pipeline loaded successfully!") |