codeteach commited on
Commit
7cd8f48
1 Parent(s): c81d8ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
  from sentence_transformers import SentenceTransformer, util
4
- import math
5
 
6
  # Translation models
7
  translation_models = {
@@ -45,10 +44,13 @@ def split_text(text, max_tokens=1024):
45
  total_tokens = len(input_ids)
46
 
47
  chunks = []
48
- for i in range(0, total_tokens, max_tokens):
49
- chunk_ids = input_ids[i:i+max_tokens]
 
 
50
  chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
51
  chunks.append(chunk_text)
 
52
 
53
  return chunks
54
 
@@ -92,3 +94,4 @@ iface.launch()
92
 
93
 
94
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
  from sentence_transformers import SentenceTransformer, util
 
4
 
5
  # Translation models
6
  translation_models = {
 
44
  total_tokens = len(input_ids)
45
 
46
  chunks = []
47
+ start = 0
48
+ while start < total_tokens:
49
+ end = min(start + max_tokens, total_tokens)
50
+ chunk_ids = input_ids[start:end]
51
  chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
52
  chunks.append(chunk_text)
53
+ start = end
54
 
55
  return chunks
56
 
 
94
 
95
 
96
 
97
+