Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, AutoTokenizer
|
3 |
from sentence_transformers import SentenceTransformer, util
|
4 |
-
import math
|
5 |
|
6 |
# Translation models
|
7 |
translation_models = {
|
@@ -45,10 +44,13 @@ def split_text(text, max_tokens=1024):
|
|
45 |
total_tokens = len(input_ids)
|
46 |
|
47 |
chunks = []
|
48 |
-
|
49 |
-
|
|
|
|
|
50 |
chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
|
51 |
chunks.append(chunk_text)
|
|
|
52 |
|
53 |
return chunks
|
54 |
|
@@ -92,3 +94,4 @@ iface.launch()
|
|
92 |
|
93 |
|
94 |
|
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, AutoTokenizer
|
3 |
from sentence_transformers import SentenceTransformer, util
|
|
|
4 |
|
5 |
# Translation models
|
6 |
translation_models = {
|
|
|
44 |
total_tokens = len(input_ids)
|
45 |
|
46 |
chunks = []
|
47 |
+
start = 0
|
48 |
+
while start < total_tokens:
|
49 |
+
end = min(start + max_tokens, total_tokens)
|
50 |
+
chunk_ids = input_ids[start:end]
|
51 |
chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
|
52 |
chunks.append(chunk_text)
|
53 |
+
start = end
|
54 |
|
55 |
return chunks
|
56 |
|
|
|
94 |
|
95 |
|
96 |
|
97 |
+
|