Spaces:
Runtime error
Runtime error
from textwrap3 import dedent | |
from unidecode import unidecode | |
import re | |
def chunk_text(text, max_size=4000): | |
paragraphs = dedent(text) | |
ascii_paragraphs = re.findall(r"[^.?!]+[(\.)?!]", unidecode(paragraphs)) | |
chuncks = [] | |
chunck = "" | |
for sentence in ascii_paragraphs: | |
if len(chunck) + len(sentence) < max_size: | |
chunck += sentence | |
else: | |
chuncks.append(chunck.strip()) | |
chunck = "" | |
return chuncks | |