Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,011 Bytes
9a2b6d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import xml.etree.ElementTree as ET
def parse_chapters(xml_path):
# Parse the XML file
tree = ET.parse(xml_path)
root = tree.getroot()
# Get book title
book_title = root.get('title')
print(f"\nBook: {book_title}\n")
# Find all chapter elements
chapters = root.findall('chapter')
for chapter in chapters:
# Get chapter info
chapter_id = chapter.get('id')
chapter_title = chapter.get('title')
# Get chapter text and limit to first 100 chars
chapter_text = chapter.text.strip() if chapter.text else ""
# cut off top line and strip
chapter_text = chapter_text[chapter_text.find("\n") + 1:].strip()
preview = chapter_text[:100] + "..." if len(chapter_text) > 100 else chapter_text
print(f"=== {chapter_title} ({chapter_id}) ===")
print(f"{preview}\n")
if __name__ == "__main__":
xml_path = "texts/processed/dorian_grey.xml"
parse_chapters(xml_path)
|