RishuD7 commited on
Commit
76aeaea
1 Parent(s): ffcdcf0

first commit

Browse files
Files changed (2) hide show
  1. app.py +29 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from http import client
3
+ import os,json
4
+ import pandas as pd
5
+ import requests
6
+ from PIL import Image
7
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
8
+ st.header("Xelpmoc - Optical Character Recognition - Document AI")
9
+
10
+ processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed')
11
+ model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')
12
+
13
+ def TrOCR_predict(pixel_values, processor, model):
14
+ generated_ids = model.generate(pixel_values,output_scores=True,return_dict_in_generate=True, max_length = 64)
15
+ predicted_text = processor.batch_decode(generated_ids[0], skip_special_tokens=True)
16
+ return predicted_text
17
+
18
+
19
+ df = pd.DataFrame()
20
+ uploaded_file = st.file_uploader("Choose a file")
21
+ if uploaded_file is not None:
22
+ content = uploaded_file.read()
23
+ st.image(uploaded_file)
24
+ image = Image.open(uploaded_file)
25
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
26
+ predicted_text = TrOCR_predict(pixel_values, processor, model)[0]
27
+ texts = predicted_text
28
+
29
+ st.write(texts)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pandas
2
+ streamlit
3
+ numpy
4
+ transformers
5
+ pillow
6
+ torch