hprasath's picture
Upload 9 files
bbcc5b2 verified
raw
history blame
647 Bytes
import pickle
import re
from PIL import Image
from transformers import pipeline
import io
def clean_text(text):
clean_text = re.sub(r'<[^>]+>', '', text)
clean_text = clean_text.strip()
clean_text = re.sub(r'\s+', ' ', clean_text)
return clean_text
pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")
def extract_text(binary_image):
image = Image.open(io.BytesIO(binary_image))
result = pipe(image)
text = result[0]['generated_text']
cleaned_text = clean_text(text)
return cleaned_text
# print(extract_text(open("pictures/users/2.jpg", "rb").read()))
print("OCR pipeline loaded successfully!")