Spaces:
Runtime error
Runtime error
File size: 472 Bytes
66340f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
from textwrap3 import dedent
from unidecode import unidecode
import re
def chunk_text(text, max_size=4000):
paragraphs = dedent(text)
ascii_paragraphs = re.findall(r"[^.?!]+[(\.)?!]", unidecode(paragraphs))
chuncks = []
chunck = ""
for sentence in ascii_paragraphs:
if len(chunck) + len(sentence) < max_size:
chunck += sentence
else:
chuncks.append(chunck.strip())
chunck = ""
return chuncks
|