Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,14 +13,33 @@ gpt_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
|
|
13 |
gpt_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
|
14 |
generate_answer = pipeline("text2text-generation", model=gpt_model, tokenizer=gpt_tokenizer)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Fordítás funkciók
|
17 |
def translate_to_english(text):
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
20 |
|
21 |
def translate_to_hungarian(text):
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# PDF szöveg kinyerése
|
26 |
def extract_text_from_pdf(pdf_file):
|
|
|
13 |
gpt_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
|
14 |
generate_answer = pipeline("text2text-generation", model=gpt_model, tokenizer=gpt_tokenizer)
|
15 |
|
16 |
+
# Szöveg feldarabolása max hosszal
|
17 |
+
def chunk_text(text, max_length=512):
|
18 |
+
words = text.split()
|
19 |
+
chunks = []
|
20 |
+
while len(words) > max_length:
|
21 |
+
chunk = " ".join(words[:max_length])
|
22 |
+
chunks.append(chunk)
|
23 |
+
words = words[max_length:]
|
24 |
+
chunks.append(" ".join(words))
|
25 |
+
return chunks
|
26 |
+
|
27 |
# Fordítás funkciók
|
28 |
def translate_to_english(text):
|
29 |
+
chunks = chunk_text(text, max_length=512)
|
30 |
+
translated_text = ""
|
31 |
+
for chunk in chunks:
|
32 |
+
translation = translation_pipeline(chunk, src_lang="hu", tgt_lang="en", max_length=512)[0]['translation_text']
|
33 |
+
translated_text += " " + translation
|
34 |
+
return translated_text.strip()
|
35 |
|
36 |
def translate_to_hungarian(text):
|
37 |
+
chunks = chunk_text(text, max_length=512)
|
38 |
+
translated_text = ""
|
39 |
+
for chunk in chunks:
|
40 |
+
translation = translation_pipeline(chunk, src_lang="en", tgt_lang="hu", max_length=512)[0]['translation_text']
|
41 |
+
translated_text += " " + translation
|
42 |
+
return translated_text.strip()
|
43 |
|
44 |
# PDF szöveg kinyerése
|
45 |
def extract_text_from_pdf(pdf_file):
|