Spaces:

Remsky
/

Kokoro-TTS-Zero

Running on Zero

File size: 2,749 Bytes

9a2b6d1

# import re
# import os
# from xml.etree import ElementTree as ET
# from xml.dom import minidom

# def process_dorian_grey():
#     # Create processed directory if it doesn't exist
#     os.makedirs('texts/processed', exist_ok=True)
    
#     # Read the file
#     with open('texts/dorian_grey.txt', 'r', encoding='utf-8') as f:
#         text = f.read()
    
#     # Create root XML element
#     root = ET.Element("book")
#     root.set("title", "The Picture of Dorian Gray")
    
#     # Split into chapters using regex
#     # Look for chapter markers and keep them with the content
#     chapter_pattern = r'(CHAPTER [IVXLC\d]+\..*?)(?=CHAPTER [IVXLC\d]+\.|$)'
#     chapters = re.findall(chapter_pattern, text, re.DOTALL)
    
#     # Process chapters
#     for i, content in enumerate(chapters):
#         # Create chapter element
#         chapter = ET.SubElement(root, "chapter")
#         chapter.set("id", f"chapter_{i}")
#         chapter.set("title", f"Chapter {i}")
#         chapter.text = content.strip()
    
#     # Pretty print XML
#     xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent="  ")
    
#     # Save as XML
#     output_path = 'texts/processed/dorian_grey.xml'
#     with open(output_path, 'w', encoding='utf-8') as f:
#         f.write(xml_str)
    
#     print(f"Processed and saved to {output_path}")

# def process_time_machine():
#     # Create processed directory if it doesn't exist
#     os.makedirs('texts/processed', exist_ok=True)
    
#     # Read the file
#     with open('texts/time_machine.txt', 'r', encoding='utf-8') as f:
#         text = f.read()
    
#     # Create root XML element
#     root = ET.Element("book")
#     root.set("title", "The Time Machine")
    
#     # Split into chapters using 4 or more newlines as separator
#     chapters = re.split(r'\n{4,}', text)
    
#     # Track actual chapter number (no skipping)
#     chapter_num = 1
    
#     # Process chapters
#     for content in chapters:
#         if content.strip():  # Only process non-empty chapters
#             # Create chapter element
#             chapter = ET.SubElement(root, "chapter")
#             chapter.set("id", f"chapter_{chapter_num-1}")  # Keep 0-based ids
#             chapter.set("title", f"Chapter {chapter_num}")
#             chapter.text = content.strip()
#             chapter_num += 1
    
#     # Pretty print XML
#     xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent="  ")
    
#     # Save as XML
#     output_path = 'texts/processed/time_machine.xml'
#     with open(output_path, 'w', encoding='utf-8') as f:
#         f.write(xml_str)
    
#     print(f"Processed and saved to {output_path}")

# if __name__ == "__main__":
#     process_time_machine()