File size: 1,011 Bytes
21dee20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import xml.etree.ElementTree as ET

def parse_chapters(xml_path):
    # Parse the XML file
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    # Get book title
    book_title = root.get('title')
    print(f"\nBook: {book_title}\n")
    
    # Find all chapter elements
    chapters = root.findall('chapter')
    
    for chapter in chapters:
        # Get chapter info
        chapter_id = chapter.get('id')
        chapter_title = chapter.get('title')
        
        # Get chapter text and limit to first 100 chars
        chapter_text = chapter.text.strip() if chapter.text else ""
        # cut off top line and strip
        chapter_text = chapter_text[chapter_text.find("\n") + 1:].strip()
        
        preview = chapter_text[:100] + "..." if len(chapter_text) > 100 else chapter_text
        
        print(f"=== {chapter_title} ({chapter_id}) ===")
        print(f"{preview}\n")

if __name__ == "__main__":
    xml_path = "texts/processed/dorian_grey.xml"
    parse_chapters(xml_path)