albhu commited on
Commit
243487d
1 Parent(s): 22ff05f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -4
app.py CHANGED
@@ -13,14 +13,33 @@ gpt_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
13
  gpt_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
14
  generate_answer = pipeline("text2text-generation", model=gpt_model, tokenizer=gpt_tokenizer)
15
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Fordítás funkciók
17
  def translate_to_english(text):
18
- translated_text = translation_pipeline(text, src_lang="hu", tgt_lang="en", max_length=512)[0]['translation_text']
19
- return translated_text
 
 
 
 
20
 
21
  def translate_to_hungarian(text):
22
- translated_text = translation_pipeline(text, src_lang="en", tgt_lang="hu", max_length=512)[0]['translation_text']
23
- return translated_text
 
 
 
 
24
 
25
  # PDF szöveg kinyerése
26
  def extract_text_from_pdf(pdf_file):
 
13
  gpt_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
14
  generate_answer = pipeline("text2text-generation", model=gpt_model, tokenizer=gpt_tokenizer)
15
 
16
+ # Szöveg feldarabolása max hosszal
17
+ def chunk_text(text, max_length=512):
18
+ words = text.split()
19
+ chunks = []
20
+ while len(words) > max_length:
21
+ chunk = " ".join(words[:max_length])
22
+ chunks.append(chunk)
23
+ words = words[max_length:]
24
+ chunks.append(" ".join(words))
25
+ return chunks
26
+
27
  # Fordítás funkciók
28
  def translate_to_english(text):
29
+ chunks = chunk_text(text, max_length=512)
30
+ translated_text = ""
31
+ for chunk in chunks:
32
+ translation = translation_pipeline(chunk, src_lang="hu", tgt_lang="en", max_length=512)[0]['translation_text']
33
+ translated_text += " " + translation
34
+ return translated_text.strip()
35
 
36
  def translate_to_hungarian(text):
37
+ chunks = chunk_text(text, max_length=512)
38
+ translated_text = ""
39
+ for chunk in chunks:
40
+ translation = translation_pipeline(chunk, src_lang="en", tgt_lang="hu", max_length=512)[0]['translation_text']
41
+ translated_text += " " + translation
42
+ return translated_text.strip()
43
 
44
  # PDF szöveg kinyerése
45
  def extract_text_from_pdf(pdf_file):