acres / initialize_db.py
ak3ra's picture
added db file
183168e
raw
history blame
2.33 kB
import sqlite3
import json
import os
from config import DB_PATH, METADATA_FILE, PDF_DIR
def initialize_database():
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# Create tables
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS items (
key TEXT PRIMARY KEY,
title TEXT,
abstract TEXT,
authors TEXT,
year INTEGER,
doi TEXT
)
"""
)
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS attachments (
key TEXT PRIMARY KEY,
parent_key TEXT,
content BLOB,
FOREIGN KEY (parent_key) REFERENCES items (key)
)
"""
)
conn.commit()
conn.close()
def populate_database():
if not os.path.exists(METADATA_FILE):
print(f"Metadata file not found: {METADATA_FILE}")
return
with open(METADATA_FILE, "r") as f:
metadata = json.load(f)
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
for item_key, item_data in metadata.items():
metadata = item_data["metadata"]
cursor.execute(
"""
INSERT OR REPLACE INTO items (key, title, abstract, authors, year, doi)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
item_key,
metadata["title"],
metadata["abstract"],
metadata["authors"],
metadata["year"],
metadata["doi"],
),
)
pdf_path = item_data.get("pdf_path")
if pdf_path:
full_pdf_path = os.path.join(PDF_DIR, os.path.basename(pdf_path))
if os.path.exists(full_pdf_path):
with open(full_pdf_path, "rb") as pdf_file:
pdf_content = pdf_file.read()
cursor.execute(
"""
INSERT OR REPLACE INTO attachments (key, parent_key, content)
VALUES (?, ?, ?)
""",
(os.path.basename(pdf_path), item_key, pdf_content),
)
else:
print(f"PDF file not found: {full_pdf_path}")
conn.commit()
conn.close()
if __name__ == "__main__":
initialize_database()
populate_database()
print("Database initialized and populated.")