Spaces:
Running
Running
File size: 2,325 Bytes
183168e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import sqlite3
import json
import os
from config import DB_PATH, METADATA_FILE, PDF_DIR
def initialize_database():
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# Create tables
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS items (
key TEXT PRIMARY KEY,
title TEXT,
abstract TEXT,
authors TEXT,
year INTEGER,
doi TEXT
)
"""
)
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS attachments (
key TEXT PRIMARY KEY,
parent_key TEXT,
content BLOB,
FOREIGN KEY (parent_key) REFERENCES items (key)
)
"""
)
conn.commit()
conn.close()
def populate_database():
if not os.path.exists(METADATA_FILE):
print(f"Metadata file not found: {METADATA_FILE}")
return
with open(METADATA_FILE, "r") as f:
metadata = json.load(f)
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
for item_key, item_data in metadata.items():
metadata = item_data["metadata"]
cursor.execute(
"""
INSERT OR REPLACE INTO items (key, title, abstract, authors, year, doi)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
item_key,
metadata["title"],
metadata["abstract"],
metadata["authors"],
metadata["year"],
metadata["doi"],
),
)
pdf_path = item_data.get("pdf_path")
if pdf_path:
full_pdf_path = os.path.join(PDF_DIR, os.path.basename(pdf_path))
if os.path.exists(full_pdf_path):
with open(full_pdf_path, "rb") as pdf_file:
pdf_content = pdf_file.read()
cursor.execute(
"""
INSERT OR REPLACE INTO attachments (key, parent_key, content)
VALUES (?, ?, ?)
""",
(os.path.basename(pdf_path), item_key, pdf_content),
)
else:
print(f"PDF file not found: {full_pdf_path}")
conn.commit()
conn.close()
if __name__ == "__main__":
initialize_database()
populate_database()
print("Database initialized and populated.")
|