Spaces:
Running
Running
testing
Browse files- .gitattributes +1 -2
- initialize_db.py +3 -91
.gitattributes
CHANGED
@@ -34,5 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
*db* filter=lfs diff=lfs merge=lfs -text
|
37 |
-
vaccine_coverage_study.db filter=lfs diff=lfs merge=lfs -text
|
38 |
-
*.db filter=lfs diff=lfs merge=lfs -text
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
*db* filter=lfs diff=lfs merge=lfs -text
|
37 |
+
vaccine_coverage_study.db filter=lfs diff=lfs merge=lfs -text
|
|
initialize_db.py
CHANGED
@@ -1,91 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
from config import DB_PATH, METADATA_FILE, PDF_DIR
|
5 |
-
|
6 |
-
|
7 |
-
def initialize_database():
|
8 |
-
conn = sqlite3.connect(DB_PATH)
|
9 |
-
cursor = conn.cursor()
|
10 |
-
|
11 |
-
# Create tables
|
12 |
-
cursor.execute(
|
13 |
-
"""
|
14 |
-
CREATE TABLE IF NOT EXISTS items (
|
15 |
-
key TEXT PRIMARY KEY,
|
16 |
-
title TEXT,
|
17 |
-
abstract TEXT,
|
18 |
-
authors TEXT,
|
19 |
-
year INTEGER,
|
20 |
-
doi TEXT
|
21 |
-
)
|
22 |
-
"""
|
23 |
-
)
|
24 |
-
|
25 |
-
cursor.execute(
|
26 |
-
"""
|
27 |
-
CREATE TABLE IF NOT EXISTS attachments (
|
28 |
-
key TEXT PRIMARY KEY,
|
29 |
-
parent_key TEXT,
|
30 |
-
content BLOB,
|
31 |
-
FOREIGN KEY (parent_key) REFERENCES items (key)
|
32 |
-
)
|
33 |
-
"""
|
34 |
-
)
|
35 |
-
|
36 |
-
conn.commit()
|
37 |
-
conn.close()
|
38 |
-
|
39 |
-
|
40 |
-
def populate_database():
|
41 |
-
if not os.path.exists(METADATA_FILE):
|
42 |
-
print(f"Metadata file not found: {METADATA_FILE}")
|
43 |
-
return
|
44 |
-
|
45 |
-
with open(METADATA_FILE, "r") as f:
|
46 |
-
metadata = json.load(f)
|
47 |
-
|
48 |
-
conn = sqlite3.connect(DB_PATH)
|
49 |
-
cursor = conn.cursor()
|
50 |
-
|
51 |
-
for item_key, item_data in metadata.items():
|
52 |
-
metadata = item_data["metadata"]
|
53 |
-
cursor.execute(
|
54 |
-
"""
|
55 |
-
INSERT OR REPLACE INTO items (key, title, abstract, authors, year, doi)
|
56 |
-
VALUES (?, ?, ?, ?, ?, ?)
|
57 |
-
""",
|
58 |
-
(
|
59 |
-
item_key,
|
60 |
-
metadata["title"],
|
61 |
-
metadata["abstract"],
|
62 |
-
metadata["authors"],
|
63 |
-
metadata["year"],
|
64 |
-
metadata["doi"],
|
65 |
-
),
|
66 |
-
)
|
67 |
-
|
68 |
-
pdf_path = item_data.get("pdf_path")
|
69 |
-
if pdf_path:
|
70 |
-
full_pdf_path = os.path.join(PDF_DIR, os.path.basename(pdf_path))
|
71 |
-
if os.path.exists(full_pdf_path):
|
72 |
-
with open(full_pdf_path, "rb") as pdf_file:
|
73 |
-
pdf_content = pdf_file.read()
|
74 |
-
cursor.execute(
|
75 |
-
"""
|
76 |
-
INSERT OR REPLACE INTO attachments (key, parent_key, content)
|
77 |
-
VALUES (?, ?, ?)
|
78 |
-
""",
|
79 |
-
(os.path.basename(pdf_path), item_key, pdf_content),
|
80 |
-
)
|
81 |
-
else:
|
82 |
-
print(f"PDF file not found: {full_pdf_path}")
|
83 |
-
|
84 |
-
conn.commit()
|
85 |
-
conn.close()
|
86 |
-
|
87 |
-
|
88 |
-
if __name__ == "__main__":
|
89 |
-
initialize_database()
|
90 |
-
populate_database()
|
91 |
-
print("Database initialized and populated.")
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0cb2cf50f14d131b1e999cee44652575fd1029141514dfc2e028af1419b0d46
|
3 |
+
size 2344
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|