cmcmaster commited on
Commit
99ab61a
1 Parent(s): 55b4b51

deploy at 2024-07-31 15:38:10.387689

Browse files
Files changed (6) hide show
  1. Dockerfile +10 -0
  2. app.py +214 -0
  3. config.ini +5 -0
  4. pbs_data.py +487 -0
  5. requirements.txt +1 -0
  6. rheumatology_biologics_data.db +0 -0
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+ WORKDIR /code
3
+ COPY --link --chown=1000 . .
4
+ RUN mkdir -p /tmp/cache/
5
+ RUN chmod a+rwx -R /tmp/cache/
6
+ ENV HF_HUB_CACHE=HF_HOME
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ ENV PYTHONUNBUFFERED=1 PORT=7860
10
+ CMD ["python", "main.py"]
app.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fasthtml.common import *
2
+ import sqlite3
3
+ import os
4
+ import datetime
5
+ from apscheduler.schedulers.background import BackgroundScheduler
6
+ from pbs_data import PBSPublicDataAPIClient
7
+ import os
8
+ from fasthtml_hf import setup_hf_backup # Add this line
9
+
10
+ # Database file
11
+ DB_FILE = 'rheumatology_biologics_data.db'
12
+
13
+ if not os.path.exists(DB_FILE):
14
+ print(f"Database file {DB_FILE} does not exist!")
15
+
16
+ def load_data():
17
+ try:
18
+ conn = sqlite3.connect(DB_FILE)
19
+ cursor = conn.cursor()
20
+ # ... (rest of the function)
21
+ except sqlite3.Error as e:
22
+ print(f"An error occurred: {e}")
23
+ return {
24
+ 'combinations': [],
25
+ 'drugs': [],
26
+ 'brands': [],
27
+ 'formulations': [],
28
+ 'indications': [],
29
+ 'treatment_phases': []
30
+ }
31
+
32
+ # Fetch all data
33
+ cursor.execute('''SELECT c.pbs_code, d.name as drug, b.name as brand, f.name as formulation,
34
+ i.name as indication, tp.name as treatment_phase, c.streamlined_code,
35
+ c.online_application, c.authority_method
36
+ FROM combinations c
37
+ JOIN drugs d ON c.drug_id = d.id
38
+ JOIN brands b ON c.brand_id = b.id
39
+ JOIN formulations f ON c.formulation_id = f.id
40
+ JOIN indications i ON c.indication_id = i.id
41
+ JOIN treatment_phases tp ON c.treatment_phase_id = tp.id''')
42
+
43
+ data = cursor.fetchall()
44
+
45
+ # Fetch distinct values for dropdowns
46
+ cursor.execute('SELECT name FROM drugs')
47
+ drugs = [row[0] for row in cursor.fetchall()]
48
+
49
+ cursor.execute('SELECT name FROM brands')
50
+ brands = [row[0] for row in cursor.fetchall()]
51
+
52
+ cursor.execute('SELECT name FROM formulations')
53
+ formulations = [row[0] for row in cursor.fetchall()]
54
+
55
+ cursor.execute('SELECT name FROM indications')
56
+ indications = [row[0] for row in cursor.fetchall()]
57
+
58
+ cursor.execute('SELECT name FROM treatment_phases')
59
+ treatment_phases = [row[0] for row in cursor.fetchall()]
60
+
61
+ conn.close()
62
+
63
+ return {
64
+ 'combinations': data,
65
+ 'drugs': drugs,
66
+ 'brands': brands,
67
+ 'formulations': formulations,
68
+ 'indications': indications,
69
+ 'treatment_phases': treatment_phases
70
+ }
71
+
72
+ biologics_data = load_data()
73
+
74
+ app, rt = fast_app()
75
+
76
+ def search_biologics(drug, brand, formulation, indication, treatment_phase):
77
+ field_indices = {
78
+ 'drug': 1,
79
+ 'brand': 2,
80
+ 'formulation': 3,
81
+ 'indication': 4,
82
+ 'treatment_phase': 5
83
+ }
84
+
85
+ results = [combo for combo in biologics_data['combinations'] if all(
86
+ not value or combo[field_indices[key]] == value
87
+ for key, value in {'drug': drug, 'brand': brand, 'formulation': formulation, 'indication': indication, 'treatment_phase': treatment_phase}.items()
88
+ )]
89
+
90
+ if not results:
91
+ return "No results found."
92
+
93
+ output = ""
94
+ for item in results:
95
+ output += f"""
96
+ <h2>{item[1]} ({item[2]})</h2>
97
+ <p>PBS Code: <a href="https://www.pbs.gov.au/medicine/item/{item[0]}" target="_blank">{item[0]}</a></p>
98
+ <p>Formulation: {item[3]}</p>
99
+ <p>Indication: {item[4]}</p>
100
+ <p>Treatment Phase: {item[5]}</p>
101
+ <p>Streamlined Code: {item[6] or 'N/A'}</p>
102
+ <p>Authority Method: {item[8]}</p>
103
+ <p>Online Application: {'Yes' if item[7] else 'No'}</p>
104
+ <hr>
105
+ """
106
+
107
+ return output
108
+
109
+ def update_options(drug, brand, formulation, indication, treatment_phase):
110
+ selected = {
111
+ 'drug': drug,
112
+ 'brand': brand,
113
+ 'formulation': formulation,
114
+ 'indication': indication,
115
+ 'treatment_phase': treatment_phase
116
+ }
117
+
118
+ field_indices = {
119
+ 'drug': 1,
120
+ 'brand': 2,
121
+ 'formulation': 3,
122
+ 'indication': 4,
123
+ 'treatment_phase': 5
124
+ }
125
+
126
+ filtered = [combo for combo in biologics_data['combinations'] if all(
127
+ not selected[key] or combo[field_indices[key]] == selected[key]
128
+ for key in selected
129
+ )]
130
+
131
+ options = {
132
+ 'drugs': sorted(set(combo[1] for combo in filtered)),
133
+ 'brands': sorted(set(combo[2] for combo in filtered)),
134
+ 'formulations': sorted(set(combo[3] for combo in filtered)),
135
+ 'indications': sorted(set(combo[4] for combo in filtered)),
136
+ 'treatment_phases': sorted(set(combo[5] for combo in filtered))
137
+ }
138
+
139
+ return options
140
+
141
+ @rt('/')
142
+ def get():
143
+ return Titled("Biologics Prescriber Helper",
144
+ Form(
145
+ Select(Option("All", value=""), *[Option(drug, value=drug) for drug in biologics_data['drugs']], label="Drug", name="drug", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
146
+ Select(Option("All", value=""), *[Option(brand, value=brand) for brand in biologics_data['brands']], label="Brand", name="brand", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
147
+ Select(Option("All", value=""), *[Option(formulation, value=formulation) for formulation in biologics_data['formulations']], label="Formulation", name="formulation", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
148
+ Select(Option("All", value=""), *[Option(indication, value=indication) for indication in biologics_data['indications']], label="Indication", name="indication", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
149
+ Select(Option("All", value=""), *[Option(phase, value=phase) for phase in biologics_data['treatment_phases']], label="Treatment Phase", name="treatment_phase", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
150
+ Button("Search", type="submit"),
151
+ Button("Reset", hx_get="/reset", hx_target="#options"),
152
+ hx_post="/search",
153
+ hx_target="#results",
154
+ id="options"
155
+ ),
156
+ Div(id="results")
157
+ )
158
+
159
+ @rt('/reset')
160
+ def get():
161
+ return Form(
162
+ Select(Option("All", value=""), *[Option(drug, value=drug) for drug in biologics_data['drugs']], label="Drug", name="drug", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
163
+ Select(Option("All", value=""), *[Option(brand, value=brand) for brand in biologics_data['brands']], label="Brand", name="brand", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
164
+ Select(Option("All", value=""), *[Option(formulation, value=formulation) for formulation in biologics_data['formulations']], label="Formulation", name="formulation", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
165
+ Select(Option("All", value=""), *[Option(indication, value=indication) for indication in biologics_data['indications']], label="Indication", name="indication", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
166
+ Select(Option("All", value=""), *[Option(phase, value=phase) for phase in biologics_data['treatment_phases']], label="Treatment Phase", name="treatment_phase", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
167
+ Button("Search", type="submit"),
168
+ Button("Reset", hx_get="/reset", hx_target="#options"),
169
+ hx_post="/search",
170
+ hx_target="#results",
171
+ id="options"
172
+ )
173
+
174
+ @rt('/update_options')
175
+ def get(drug: str = '', brand: str = '', formulation: str = '', indication: str = '', treatment_phase: str = ''):
176
+ options = update_options(drug, brand, formulation, indication, treatment_phase)
177
+ return Form(
178
+ Select(Option("All", value=""), *[Option(d, value=d, selected=(d == drug)) for d in options['drugs']], label="Drug", name="drug", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
179
+ Select(Option("All", value=""), *[Option(b, value=b, selected=(b == brand)) for b in options['brands']], label="Brand", name="brand", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
180
+ Select(Option("All", value=""), *[Option(f, value=f, selected=(f == formulation)) for f in options['formulations']], label="Formulation", name="formulation", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
181
+ Select(Option("All", value=""), *[Option(i, value=i, selected=(i == indication)) for i in options['indications']], label="Indication", name="indication", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
182
+ Select(Option("All", value=""), *[Option(p, value=p, selected=(p == treatment_phase)) for p in options['treatment_phases']], label="Treatment Phase", name="treatment_phase", hx_get="/update_options", hx_target="#options", hx_trigger="change", hx_include="[name='drug'],[name='brand'],[name='formulation'],[name='indication'],[name='treatment_phase']"),
183
+ Button("Search", type="submit"),
184
+ Button("Reset", hx_get="/reset", hx_target="#options"),
185
+ hx_post="/search",
186
+ hx_target="#results",
187
+ id="options"
188
+ )
189
+
190
+ @rt('/search')
191
+ def post(drug: str = '', brand: str = '', formulation: str = '', indication: str = '', treatment_phase: str = ''):
192
+ results = search_biologics(drug, brand, formulation, indication, treatment_phase)
193
+ return results
194
+
195
+
196
+ def update_data():
197
+ print(f"Updating data at {datetime.datetime.now()}")
198
+ client = PBSPublicDataAPIClient("2384af7c667342ceb5a736fe29f1dc6b", rate_limit=0.2)
199
+ try:
200
+ data = client.fetch_rheumatology_biologics_data()
201
+ client.save_data_to_sqlite(data, DB_FILE)
202
+ print("Data updated successfully")
203
+ global biologics_data
204
+ biologics_data = load_data()
205
+ except Exception as e:
206
+ print(f"An error occurred while updating data: {str(e)}")
207
+
208
+ # Set up the scheduler
209
+ scheduler = BackgroundScheduler()
210
+ scheduler.add_job(func=update_data, trigger='cron', day='1', hour='0', minute='0')
211
+ scheduler.start()
212
+
213
+ setup_hf_backup(app) # Add this line
214
+ serve()
config.ini ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [DEFAULT]
2
+ dataset_id = space-backup
3
+ db_dir = data
4
+ private_backup = True
5
+
pbs_data.py ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import csv
3
+ from io import StringIO
4
+ import json
5
+ import time
6
+ from requests.adapters import HTTPAdapter
7
+ from requests.packages.urllib3.util.retry import Retry
8
+ import sqlite3
9
+ import datetime
10
+ import os
11
+
12
+ class PBSPublicDataAPIClient:
13
+ def __init__(self, subscription_key, base_url='https://data-api.health.gov.au/pbs/api/v3', rate_limit=0.2):
14
+ self.subscription_key = subscription_key
15
+ self.base_url = base_url
16
+ self.rate_limit = rate_limit # Requests per second
17
+ self.last_request_time = 0
18
+
19
+ # Set up a session with retry strategy
20
+ self.session = requests.Session()
21
+ retries = Retry(total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
22
+ self.session.mount('https://', HTTPAdapter(max_retries=retries))
23
+
24
+ def get_sample_data(self, endpoint, limit=5):
25
+ params = {"limit": limit}
26
+ response = self.make_request(endpoint, params=params, accept="text/csv")
27
+ csv_content = StringIO(response.text)
28
+ return list(csv.DictReader(csv_content))
29
+
30
+ def fetch_sample_data(self):
31
+ schedules = self.get_schedules()
32
+ latest_schedule = schedules[0]['schedule_code']
33
+
34
+ endpoints = [
35
+ "amt-items",
36
+ "atc-codes",
37
+ "indications",
38
+ "prescribing-texts",
39
+ "item-prescribing-text-relationships",
40
+ "restrictions",
41
+ "item-restriction-relationships"
42
+ ]
43
+
44
+ sample_data = {}
45
+ for endpoint in endpoints:
46
+ print(f"Fetching sample data from /{endpoint}...")
47
+ data = self.get_sample_data(endpoint)
48
+ if data:
49
+ sample_data[endpoint] = data
50
+ print(f"Sample keys for {endpoint}: {data[0].keys()}")
51
+ else:
52
+ print(f"No data found for {endpoint}")
53
+ time.sleep(2) # Wait 2 seconds between requests to avoid rate limiting
54
+
55
+ return sample_data
56
+
57
+ def get_raw_data(self, endpoint, params=None, accept="application/json"):
58
+ response = self.make_request(endpoint, params=params, accept=accept)
59
+ return response.text
60
+
61
+ def make_request(self, endpoint, params=None, accept="application/json"):
62
+ url = f"{self.base_url}/{endpoint}"
63
+ headers = {
64
+ "subscription-key": self.subscription_key,
65
+ "Accept": accept
66
+ }
67
+
68
+ while True:
69
+ current_time = time.time()
70
+ time_since_last_request = current_time - self.last_request_time
71
+ if time_since_last_request < 1 / self.rate_limit:
72
+ sleep_time = (1 / self.rate_limit) - time_since_last_request
73
+ time.sleep(sleep_time)
74
+
75
+ try:
76
+ response = self.session.get(url, headers=headers, params=params)
77
+ self.last_request_time = time.time()
78
+
79
+ if response.status_code == 429:
80
+ retry_after = int(response.headers.get('Retry-After', 60))
81
+ print(f"Rate limit exceeded. Waiting for {retry_after} seconds.")
82
+ time.sleep(retry_after)
83
+ continue
84
+
85
+ response.raise_for_status()
86
+ return response
87
+
88
+ except requests.exceptions.RequestException as e:
89
+ print(f"Request failed: {str(e)}. Retrying in 5 seconds...")
90
+ time.sleep(5)
91
+
92
+ def get_schedules(self, limit=100):
93
+ endpoint = "schedules"
94
+ params = {"limit": limit}
95
+ response = self.make_request(endpoint, params=params)
96
+ json_data = response.json()
97
+ return json_data['data']
98
+
99
+ def get_amt_items(self, schedule_code, limit=100000):
100
+ endpoint = "amt-items"
101
+ params = {
102
+ "schedule_code": schedule_code,
103
+ "limit": limit
104
+ }
105
+ response = self.make_request(endpoint, params=params, accept="text/csv")
106
+ csv_content = StringIO(response.text)
107
+ return list(csv.DictReader(csv_content))
108
+
109
+ def get_atc_codes(self, schedule_code, limit=100000):
110
+ endpoint = "atc-codes"
111
+ params = {
112
+ "schedule_code": schedule_code,
113
+ "limit": limit
114
+ }
115
+ response = self.make_request(endpoint, params=params, accept="text/csv")
116
+ csv_content = StringIO(response.text)
117
+ return list(csv.DictReader(csv_content))
118
+
119
+ def get_indications(self, schedule_code, limit=100000):
120
+ endpoint = "indications"
121
+ params = {
122
+ "schedule_code": schedule_code,
123
+ "limit": limit
124
+ }
125
+ response = self.make_request(endpoint, params=params, accept="text/csv")
126
+ csv_content = StringIO(response.text)
127
+ return list(csv.DictReader(csv_content))
128
+
129
+ def get_prescribing_texts(self, schedule_code, limit=100000):
130
+ endpoint = "prescribing-texts"
131
+ params = {
132
+ "schedule_code": schedule_code,
133
+ "limit": limit
134
+ }
135
+ response = self.make_request(endpoint, params=params, accept="text/csv")
136
+ csv_content = StringIO(response.text)
137
+ return list(csv.DictReader(csv_content))
138
+
139
+ def get_item_prescribing_text_relationships(self, schedule_code, limit=100000):
140
+ endpoint = "item-prescribing-text-relationships"
141
+ params = {
142
+ "schedule_code": schedule_code,
143
+ "limit": limit
144
+ }
145
+ response = self.make_request(endpoint, params=params, accept="text/csv")
146
+ csv_content = StringIO(response.text)
147
+ return list(csv.DictReader(csv_content))
148
+
149
+ def get_restrictions(self, schedule_code, limit=100000):
150
+ endpoint = "restrictions"
151
+ params = {
152
+ "schedule_code": schedule_code,
153
+ "limit": limit
154
+ }
155
+ response = self.make_request(endpoint, params=params, accept="text/csv")
156
+ csv_content = StringIO(response.text)
157
+ return list(csv.DictReader(csv_content))
158
+
159
+ def get_item_restriction_relationships(self, schedule_code, limit=100000):
160
+ endpoint = "item-restriction-relationships"
161
+ params = {
162
+ "schedule_code": schedule_code,
163
+ "limit": limit
164
+ }
165
+ response = self.make_request(endpoint, params=params, accept="text/csv")
166
+ csv_content = StringIO(response.text)
167
+ return list(csv.DictReader(csv_content))
168
+
169
+ def get_restriction_prescribing_text_relationships(self, schedule_code, limit=100000):
170
+ endpoint = "restriction-prescribing-text-relationships"
171
+ params = {
172
+ "schedule_code": schedule_code,
173
+ "limit": limit
174
+ }
175
+ response = self.make_request(endpoint, params=params, accept="text/csv")
176
+ csv_content = StringIO(response.text)
177
+ return list(csv.DictReader(csv_content))
178
+
179
+ def get_items(self, schedule_code, limit=100000):
180
+ endpoint = "items"
181
+ params = {
182
+ "schedule_code": schedule_code,
183
+ "limit": limit
184
+ }
185
+ response = self.make_request(endpoint, params=params, accept="text/csv")
186
+ csv_content = StringIO(response.text)
187
+ return list(csv.DictReader(csv_content))
188
+
189
+ def fetch_rheumatology_biologics_data(self):
190
+ biologics = [
191
+ "adalimumab", "etanercept", "infliximab", "certolizumab", "golimumab",
192
+ "rituximab", "abatacept", "tocilizumab", "secukinumab", "ixekizumab",
193
+ "ustekinumab", "guselkumab", "tofacitinib", "baricitinib", "secukinumab",
194
+ "upadacitinib"
195
+ ]
196
+
197
+ rheumatic_diseases = [
198
+ "rheumatoid arthritis", "psoriatic arthritis", "ankylosing spondylitis",
199
+ "non-radiographic axial spondyloarthritis", "giant cell arteritis",
200
+ "juvenile idiopathic arthritis"
201
+ ]
202
+
203
+ data = {}
204
+ schedules = self.get_schedules()
205
+
206
+ # Select schedule based on current month
207
+ current_date = datetime.datetime.now()
208
+ current_schedule = next(
209
+ (s for s in schedules if s['effective_year'] == current_date.year and s['effective_month'] == current_date.strftime('%B').upper()),
210
+ schedules[0] # fallback to the most recent schedule if no match
211
+ )
212
+ latest_schedule = current_schedule['schedule_code']
213
+
214
+ print(f"Selected schedule: {latest_schedule} (Effective: {current_schedule['effective_date']})")
215
+
216
+ print("Fetching items...")
217
+ items = self.get_items(latest_schedule)
218
+ time.sleep(5)
219
+
220
+ print("Fetching indications...")
221
+ indications = self.get_indications(latest_schedule)
222
+ print(f"Number of indications fetched: {len(indications)}")
223
+ print("Sample of raw indications data:")
224
+ for indication in indications[:5]:
225
+ print(indication)
226
+ time.sleep(5)
227
+
228
+ print("Fetching prescribing texts...")
229
+ prescribing_texts = self.get_prescribing_texts(latest_schedule)
230
+ time.sleep(5)
231
+
232
+ print("Fetching item-prescribing-text relationships...")
233
+ item_prescribing_text_relationships = self.get_item_prescribing_text_relationships(latest_schedule)
234
+ time.sleep(5)
235
+
236
+ print("Fetching restrictions...")
237
+ restrictions = self.get_restrictions(latest_schedule)
238
+ time.sleep(5)
239
+
240
+ print("Fetching item-restriction relationships...")
241
+ item_restriction_relationships = self.get_item_restriction_relationships(latest_schedule)
242
+
243
+ print("Fetching restriction-prescribing-text relationships...")
244
+ restriction_prescribing_text_relationships = self.get_restriction_prescribing_text_relationships(latest_schedule)
245
+ print(f"Number of restriction-prescribing-text relationships fetched: {len(restriction_prescribing_text_relationships)}")
246
+ time.sleep(5)
247
+
248
+ # Create lookup dictionaries
249
+ prescribing_text_lookup = {text['prescribing_txt_id']: text for text in prescribing_texts if 'prescribing_txt_id' in text}
250
+ restriction_lookup = {res['res_code']: res for res in restrictions if 'res_code' in res}
251
+
252
+ # Create indication lookup
253
+ indication_lookup = {}
254
+ for ind in indications:
255
+ # Print all keys in the first indication to see available fields
256
+ if not indication_lookup:
257
+ print("Keys in indication data:", ind.keys())
258
+
259
+ # Try different possible keys for the prescribing text ID
260
+ prescribing_text_id = ind.get('prescribing_text_id') or ind.get('indication_prescribing_txt_id') or ind.get('prescribing_txt_id')
261
+ if prescribing_text_id:
262
+ indication_lookup[prescribing_text_id] = ind
263
+
264
+ print(f"Number of items in indication_lookup: {len(indication_lookup)}")
265
+ print("Sample of indication_lookup:")
266
+ for key, value in list(indication_lookup.items())[:5]:
267
+ print(f" {key}: {value}")
268
+
269
+ # Create a lookup for item-prescribing-text relationships
270
+ item_prescribing_text_lookup = {}
271
+ for relationship in item_prescribing_text_relationships:
272
+ pbs_code = relationship.get('pbs_code')
273
+ prescribing_txt_id = relationship.get('prescribing_txt_id')
274
+ if pbs_code and prescribing_txt_id:
275
+ if pbs_code not in item_prescribing_text_lookup:
276
+ item_prescribing_text_lookup[pbs_code] = []
277
+ item_prescribing_text_lookup[pbs_code].append(prescribing_txt_id)
278
+
279
+ # Create a lookup for restriction-prescribing-text relationships
280
+ restriction_prescribing_text_lookup = {}
281
+ print("\nDebugging restriction-prescribing-text relationships:")
282
+ print("Full structure of first 5 relationships:")
283
+ for relationship in restriction_prescribing_text_relationships[:5]:
284
+ print(relationship)
285
+
286
+ for relationship in restriction_prescribing_text_relationships:
287
+ res_code = relationship.get('res_code')
288
+ prescribing_text_id = relationship.get('prescribing_text_id')
289
+ if res_code and prescribing_text_id:
290
+ if res_code not in restriction_prescribing_text_lookup:
291
+ restriction_prescribing_text_lookup[res_code] = []
292
+ restriction_prescribing_text_lookup[res_code].append(prescribing_text_id)
293
+
294
+ print(f"Number of items in restriction_prescribing_text_lookup: {len(restriction_prescribing_text_lookup)}")
295
+ print("Sample of restriction_prescribing_text_lookup:")
296
+ for key, value in list(restriction_prescribing_text_lookup.items())[:5]:
297
+ print(f" {key}: {value}")
298
+
299
+ print("Debugging: Inspecting lookups")
300
+ print(f"Number of items in prescribing_text_lookup: {len(prescribing_text_lookup)}")
301
+ print(f"Number of items in restriction_lookup: {len(restriction_lookup)}")
302
+ print(f"Number of items in indication_lookup: {len(indication_lookup)}")
303
+ print(f"Number of items in item_prescribing_text_lookup: {len(item_prescribing_text_lookup)}")
304
+ print(f"Number of items in restriction_prescribing_text_lookup: {len(restriction_prescribing_text_lookup)}")
305
+
306
+ def classify_formulation(description):
307
+ # Define keywords for each formulation type
308
+ tablet_keywords = ['Tablet']
309
+ pen_keywords = ['pen', 'auto-injector', 'autoinjector']
310
+ syringe_keywords = ['syringe']
311
+ infusion_keywords = ['I.V. infusion', 'Concentrate for injection']
312
+
313
+ # Normalize the description to lowercase for case-insensitive matching
314
+ desc_lower = description.lower()
315
+
316
+ # Check for keywords and return the corresponding formulation type
317
+ if any(keyword.lower() in desc_lower for keyword in tablet_keywords):
318
+ return 'tablet'
319
+ elif any(keyword.lower() in desc_lower for keyword in pen_keywords):
320
+ return 'subcut pen'
321
+ elif any(keyword.lower() in desc_lower for keyword in syringe_keywords):
322
+ return 'subcut syringe'
323
+ elif any(keyword.lower() in desc_lower for keyword in infusion_keywords):
324
+ return 'infusion'
325
+ else:
326
+ return 'unknown' # For cases that don't match any category
327
+
328
+ for item in items:
329
+ if any(biologic.lower() in item['drug_name'].lower() for biologic in biologics):
330
+ pbs_code = item['pbs_code']
331
+ if pbs_code not in data:
332
+ data[pbs_code] = {
333
+ "name": item['drug_name'],
334
+ "brands": [], # Change this to a list
335
+ "formulation": classify_formulation(item['li_form']),
336
+ "li_form": item['li_form'],
337
+ "schedule_form": item['schedule_form'],
338
+ "manner_of_administration": item['manner_of_administration'],
339
+ "maximum_quantity": item['maximum_quantity_units'],
340
+ "number_of_repeats": item['number_of_repeats'],
341
+ "restrictions": []
342
+ }
343
+ # Append the brand name if it's not already in the list
344
+ if item['brand_name'] not in data[pbs_code]['brands']:
345
+ data[pbs_code]['brands'].append(item['brand_name'])
346
+
347
+ for pbs_code in list(data.keys()):
348
+ for relationship in item_restriction_relationships:
349
+ if relationship.get('pbs_code') == pbs_code:
350
+ res_code = relationship.get('res_code')
351
+ restriction = restriction_lookup.get(res_code)
352
+ if restriction:
353
+ prescribing_text_ids = restriction_prescribing_text_lookup.get(res_code, [])
354
+ for prescribing_text_id in prescribing_text_ids:
355
+ indication = indication_lookup.get(prescribing_text_id)
356
+ if indication:
357
+ condition = indication.get('condition', '').lower()
358
+ found_indication = next((disease for disease in rheumatic_diseases if disease.lower() in condition), None)
359
+ if found_indication:
360
+ restriction_data = {
361
+ 'res_code': res_code,
362
+ 'indications': found_indication,
363
+ 'treatment_phase': restriction.get('treatment_phase', ''),
364
+ 'restriction_text': restriction.get('li_html_text', ''),
365
+ 'authority_method': restriction.get('authority_method', ''),
366
+ 'streamlined_code': restriction.get('treatment_of_code') if restriction.get('authority_method') == "STREAMLINED" else None,
367
+ 'online_application': "HOBART TAS 7001" not in restriction.get('schedule_html_text', '')
368
+ }
369
+ data[pbs_code]['restrictions'].append(restriction_data)
370
+ break # Stop after finding the first matching indication
371
+
372
+ # Drop entries if restrictions are empty
373
+ data = {k: v for k, v in data.items() if v['restrictions']}
374
+ return data
375
+
376
+ def preprocess_data(self, data):
377
+ processed = {
378
+ 'drugs': set(),
379
+ 'brands': set(),
380
+ 'formulations': set(),
381
+ 'indications': set(),
382
+ 'treatment_phases': set(),
383
+ 'combinations': []
384
+ }
385
+
386
+ for pbs_code, item in data.items():
387
+ processed['drugs'].add(item['name'])
388
+ processed['brands'].update(item['brands']) # Update this line
389
+ processed['formulations'].add(item['li_form'])
390
+
391
+ for restriction in item['restrictions']:
392
+ processed['indications'].add(restriction['indications'])
393
+ processed['treatment_phases'].add(restriction['treatment_phase'])
394
+
395
+ for brand in item['brands']: # Add this loop
396
+ processed['combinations'].append({
397
+ 'pbs_code': pbs_code,
398
+ 'drug': item['name'],
399
+ 'brand': brand, # Update this line
400
+ 'formulation': item['li_form'],
401
+ 'indication': restriction['indications'],
402
+ 'treatment_phase': restriction['treatment_phase'],
403
+ 'streamlined_code': restriction['streamlined_code'],
404
+ 'online_application': restriction['online_application'],
405
+ 'authority_method': restriction['authority_method']
406
+ })
407
+
408
+ return {k: sorted(v) if isinstance(v, set) else v for k, v in processed.items()}
409
+
410
+ def save_data_to_sqlite(self, data, db_path="rheumatology_biologics_data.db"):
411
+ processed_data = self.preprocess_data(data)
412
+
413
+ # Remove the existing database file if it exists
414
+ if os.path.exists(db_path):
415
+ os.remove(db_path)
416
+
417
+ conn = sqlite3.connect(db_path)
418
+ cursor = conn.cursor()
419
+
420
+ # Create tables
421
+ cursor.execute('''CREATE TABLE IF NOT EXISTS drugs
422
+ (id INTEGER PRIMARY KEY, name TEXT UNIQUE)''')
423
+ cursor.execute('''CREATE TABLE IF NOT EXISTS brands
424
+ (id INTEGER PRIMARY KEY, name TEXT UNIQUE)''')
425
+ cursor.execute('''CREATE TABLE IF NOT EXISTS formulations
426
+ (id INTEGER PRIMARY KEY, name TEXT UNIQUE)''')
427
+ cursor.execute('''CREATE TABLE IF NOT EXISTS indications
428
+ (id INTEGER PRIMARY KEY, name TEXT UNIQUE)''')
429
+ cursor.execute('''CREATE TABLE IF NOT EXISTS treatment_phases
430
+ (id INTEGER PRIMARY KEY, name TEXT UNIQUE)''')
431
+ cursor.execute('''CREATE TABLE IF NOT EXISTS combinations
432
+ (id INTEGER PRIMARY KEY, pbs_code TEXT, drug_id INTEGER, brand_id INTEGER,
433
+ formulation_id INTEGER, indication_id INTEGER, treatment_phase_id INTEGER,
434
+ streamlined_code TEXT, online_application BOOLEAN, authority_method TEXT,
435
+ FOREIGN KEY (drug_id) REFERENCES drugs(id),
436
+ FOREIGN KEY (brand_id) REFERENCES brands(id),
437
+ FOREIGN KEY (formulation_id) REFERENCES formulations(id),
438
+ FOREIGN KEY (indication_id) REFERENCES indications(id),
439
+ FOREIGN KEY (treatment_phase_id) REFERENCES treatment_phases(id))''')
440
+
441
+ # Insert data
442
+ for table in ['drugs', 'brands', 'formulations', 'indications', 'treatment_phases']:
443
+ cursor.executemany(f"INSERT OR IGNORE INTO {table} (name) VALUES (?)",
444
+ [(item,) for item in processed_data[table]])
445
+
446
+ # Insert combinations
447
+ for combo in processed_data['combinations']:
448
+ cursor.execute('''INSERT INTO combinations
449
+ (pbs_code, drug_id, brand_id, formulation_id, indication_id,
450
+ treatment_phase_id, streamlined_code, online_application, authority_method)
451
+ VALUES (?,
452
+ (SELECT id FROM drugs WHERE name = ?),
453
+ (SELECT id FROM brands WHERE name = ?),
454
+ (SELECT id FROM formulations WHERE name = ?),
455
+ (SELECT id FROM indications WHERE name = ?),
456
+ (SELECT id FROM treatment_phases WHERE name = ?),
457
+ ?, ?, ?)''',
458
+ (combo['pbs_code'], combo['drug'], combo['brand'], combo['formulation'],
459
+ combo['indication'], combo['treatment_phase'], combo['streamlined_code'],
460
+ combo['online_application'], combo['authority_method']))
461
+
462
+ # Add last_updated column and insert timestamp
463
+ cursor.execute('''CREATE TABLE IF NOT EXISTS metadata
464
+ (key TEXT PRIMARY KEY, value TEXT)''')
465
+ cursor.execute('''INSERT OR REPLACE INTO metadata (key, value)
466
+ VALUES ('last_updated', ?)''', (datetime.datetime.now().isoformat(),))
467
+
468
+ conn.commit()
469
+ conn.close()
470
+
471
+ def main():
472
+ client = PBSPublicDataAPIClient("2384af7c667342ceb5a736fe29f1dc6b", rate_limit=0.2)
473
+
474
+ try:
475
+ print("Fetching data on biologics used for rheumatological diseases...")
476
+ data = client.fetch_rheumatology_biologics_data()
477
+
478
+ print(f"Data fetched for {len(data)} items.")
479
+
480
+ client.save_data_to_sqlite(data)
481
+ print("Data saved to rheumatology_biologics_data.db")
482
+
483
+ except Exception as e:
484
+ print(f"An error occurred: {str(e)}")
485
+
486
+ if __name__ == "__main__":
487
+ main()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-fasthtml
rheumatology_biologics_data.db ADDED
Binary file (143 kB). View file