Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,158 Bytes
21dee20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import xml.etree.ElementTree as ET
import os
from typing import Dict, List, Tuple
from .text_utils import count_tokens
import logging
logger = logging.getLogger(__name__)
def get_available_books() -> List[Dict[str, str]]:
"""Get list of available book XML files
Returns:
List of dicts with keys:
- value: filename with extension (for internal use)
- label: display name without extension
"""
processed_dir = "texts/processed"
books = []
logger.info(f"Checking directory: {processed_dir}")
for file in os.listdir(processed_dir):
logger.info(f"Found file: {file}")
if file.endswith('.xml'):
books.append({
'value': file,
'label': file[:-4] # Remove .xml extension for display
})
return books
def get_book_info(xml_path: str) -> Tuple[str, List[Dict]]:
"""Get book title and chapter information from XML file
Returns:
Tuple containing:
- Book title (str)
- List of chapter dicts with keys: id, title, text
"""
tree = ET.parse(xml_path)
root = tree.getroot()
book_title = root.get('title')
chapters = []
for chapter in root.findall('chapter'):
chapter_info = {
'id': chapter.get('id'),
'title': chapter.get('title'),
'text': chapter.text.strip() if chapter.text else ""
}
# Remove first line and strip whitespace
chapter_info['text'] = chapter_info['text'][chapter_info['text'].find("\n") + 1:].strip()
chapters.append(chapter_info)
return book_title, chapters
def get_chapter_text(xml_path: str, chapter_id: str) -> str:
"""Get text content for a specific chapter"""
_, chapters = get_book_info(xml_path)
for chapter in chapters:
if chapter['id'] == chapter_id:
return chapter['text']
return ""
def get_book_chapters(xml_path: str) -> List[Dict]:
"""Get list of chapters with id and title for dropdown"""
_, chapters = get_book_info(xml_path)
return [{'id': ch['id'], 'title': ch['title']} for ch in chapters]
|