BraydenMoore commited on
Commit
bfbcac4
1 Parent(s): c2973db

fix get_week

Browse files
Source/Predict/.DS_Store ADDED
Binary file (6.15 kB). View file
 
Source/Predict/__pycache__/predict.cpython-311.pyc CHANGED
Binary files a/Source/Predict/__pycache__/predict.cpython-311.pyc and b/Source/Predict/__pycache__/predict.cpython-311.pyc differ
 
Source/Predict/__pycache__/predict.cpython-312.pyc ADDED
Binary file (10.1 kB). View file
 
Source/Predict/predict.py CHANGED
@@ -1,169 +1,156 @@
1
- import xgboost as xgb
2
- import numpy as np
3
- import pandas as pd
4
- import pickle as pkl
5
- import os
6
- import requests
7
- from bs4 import BeautifulSoup
8
- import warnings
9
- warnings.filterwarnings("ignore")
10
- from datetime import datetime
11
-
12
- # set dirs for other files
13
- current_directory = os.path.dirname(os.path.abspath(__file__))
14
- parent_directory = os.path.dirname(current_directory)
15
- data_directory = os.path.join(parent_directory, 'Data')
16
- model_directory = os.path.join(parent_directory, 'Models')
17
- pickle_directory = os.path.join(parent_directory, 'Pickles')
18
-
19
- file_path = os.path.join(data_directory, 'gbg_this_year.csv')
20
- gbg = pd.read_csv(file_path, low_memory=False)
21
-
22
- file_path = os.path.join(data_directory, 'results.csv')
23
- results = pd.read_csv(file_path, low_memory=False)
24
-
25
- # get team abbreviations
26
- file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
27
- with open(file_path, 'rb') as f:
28
- team_name_to_abbreviation = pkl.load(f)
29
-
30
- file_path = os.path.join(pickle_directory, 'team_abbreviation_to_name.pkl')
31
- with open(file_path, 'rb') as f:
32
- team_abbreviation_to_name = pkl.load(f)
33
-
34
- # get schedule
35
- file_path = os.path.join(pickle_directory, 'schedule.pkl')
36
- with open(file_path, 'rb') as f:
37
- schedule = pkl.load(f)
38
-
39
- # load models
40
- # moneyline
41
- model = 'xgboost_ML_no_odds_71.4%'
42
- file_path = os.path.join(model_directory, f'{model}.json')
43
- xgb_ml = xgb.Booster()
44
- xgb_ml.load_model(file_path)
45
-
46
- # over/under
47
- model = 'xgboost_OU_no_odds_59.8%'
48
- file_path = os.path.join(model_directory, f'{model}.json')
49
- xgb_ou = xgb.Booster()
50
- xgb_ou.load_model(file_path)
51
-
52
-
53
- def get_week():
54
- headers = {
55
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
56
- 'Accept-Encoding': 'gzip, deflate',
57
- 'Accept-Language': 'en-US,en;q=0.9',
58
- 'Cache-Control': 'max-age=0',
59
- 'Connection': 'keep-alive',
60
- 'Dnt': '1',
61
- 'Upgrade-Insecure-Requests': '1',
62
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
63
- }
64
- url = 'https://www.nfl.com/schedules/'
65
- resp = requests.get(url,headers=headers)
66
- soup = BeautifulSoup(resp.text, 'html.parser')
67
- h2_tags = soup.find_all('h2')
68
- year = h2_tags[0].getText().split(' ')[0]
69
- week = h2_tags[0].getText().split(' ')[-1]
70
- return int(week), int(year)
71
-
72
-
73
- def get_games(week):
74
- # pull from NBC
75
- #url = 'https://www.nbcsports.com/nfl/schedule'
76
- #df = pd.read_html(url)[week-1]
77
- df = schedule[week-1]
78
- df['Away Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Away TeamAway Team']]
79
- df['Home Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Home TeamHome Team']]
80
- df['Date'] = pd.to_datetime(df['Game TimeGame Time'])
81
- df['Date'] = df['Date'].dt.strftime('%A %d/%m %I:%M %p')
82
- df['Date'] = df['Date'].apply(lambda x: f"{x.split()[0]} {int(x.split()[1].split('/')[1])}/{int(x.split()[1].split('/')[0])} {x.split()[2]}".capitalize())
83
-
84
- return df[['Away Team','Home Team','Date']]
85
-
86
-
87
- def get_one_week(home,away,season,week):
88
- try:
89
- max_GP_home = gbg.loc[((gbg['home_team'] == home) | (gbg['away_team'] == home)) & (gbg['GP'] < week)]['GP'].max()
90
- max_GP_away = gbg.loc[((gbg['home_team'] == away) | (gbg['away_team'] == away)) & (gbg['GP'] < week)]['GP'].max()
91
-
92
- home_df = gbg.loc[((gbg['away_team']==home) | (gbg['home_team']==home)) & (gbg['Season']==season) & (gbg['GP']==max_GP_home)]
93
- gbg_home_team = home_df['home_team'].item()
94
- home_df.drop(columns=['game_id','home_team','away_team','Season','game_date'], inplace=True)
95
- home_df = home_df[[i for i in home_df.columns if '.Away' not in i] if gbg_home_team==home else [i for i in home_df.columns if '.Away' in i]]
96
- home_df.columns = [i.replace('.Away','') for i in home_df.columns]
97
-
98
- away_df = gbg.loc[((gbg['away_team']==away) | (gbg['home_team']==away)) & (gbg['Season']==season) & (gbg['GP']==max_GP_away)]
99
- gbg_home_team = away_df['home_team'].item()
100
- away_df.drop(columns=['game_id','home_team','away_team','Season','game_date'], inplace=True)
101
- away_df = away_df[[i for i in away_df.columns if '.Away' not in i] if gbg_home_team==away else [i for i in away_df.columns if '.Away' in i]]
102
- away_df.columns = [i.replace('.Away','') + '.Away' for i in away_df.columns]
103
-
104
- df = home_df.reset_index(drop=True).merge(away_df.reset_index(drop=True), left_index=True, right_index=True)
105
- return df
106
- except ValueError:
107
- return pd.DataFrame()
108
-
109
-
110
- def predict(home,away,season,week,total):
111
- global results
112
-
113
- # finish preparing data
114
- if len(home)>4:
115
- home_abbrev = team_name_to_abbreviation[home]
116
- else:
117
- home_abbrev = home
118
-
119
- if len(away)>4:
120
- away_abbrev = team_name_to_abbreviation[away]
121
- else:
122
- away_abbrev = away
123
-
124
- data = get_one_week(home_abbrev,away_abbrev,season,week)
125
- data['Total Score Close'] = total
126
- matrix = xgb.DMatrix(data.astype(float).values)
127
-
128
- # create game id
129
- if week < 10:
130
- game_id = str(season) + '_0' + str(int(week)) + '_' + away_abbrev + '_' + home_abbrev
131
- else:
132
- game_id = str(season) + '_' + str(int(week)) + '_' + away_abbrev + '_' + home_abbrev
133
-
134
- try:
135
- moneyline_result = results.loc[results['game_id']==game_id, 'winner'].item()
136
- except:
137
- moneyline_result = 'N/A'
138
-
139
- try:
140
- ml_predicted_proba = xgb_ml.predict(matrix)[0][1]
141
- winner_proba = max([ml_predicted_proba, 1-ml_predicted_proba]).item()
142
- moneyline = {'Winner': [home if ml_predicted_proba>0.5 else away if ml_predicted_proba<0.5 else 'Toss-Up'],
143
- 'Probabilities':[winner_proba],
144
- 'Result': moneyline_result}
145
- except:
146
- moneyline = {'Winner': 'NA',
147
- 'Probabilities':['N/A'],
148
- 'Result': moneyline_result}
149
-
150
- try:
151
- result = results.loc[results['game_id']==game_id, 'total'].item()
152
- over_under_result = 'Over' if float(result)>float(total) else 'Push' if float(result)==float(total) else 'Under'
153
-
154
- except:
155
- over_under_result = 'N/A'
156
-
157
- try:
158
- ou_predicted_proba = xgb_ou.predict(matrix)[0][1]
159
- ou_proba = max([ou_predicted_proba, 1-ou_predicted_proba]).item()
160
-
161
- over_under = {'Over/Under': ['Over' if ou_predicted_proba>0.5 else 'Under'],
162
- 'Probability': [ou_proba],
163
- 'Result': over_under_result}
164
- except:
165
- over_under = {'Over/Under': 'N/A',
166
- 'Probability': ['N/A'],
167
- 'Result': over_under_result}
168
-
169
- return game_id, moneyline, over_under
 
1
+ import xgboost as xgb
2
+ import numpy as np
3
+ import pandas as pd
4
+ import pickle as pkl
5
+ import os
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+ import warnings
9
+ warnings.filterwarnings("ignore")
10
+ from datetime import datetime
11
+
12
+ # set dirs for other files
13
+ current_directory = os.path.dirname(os.path.abspath(__file__))
14
+ parent_directory = os.path.dirname(current_directory)
15
+ data_directory = os.path.join(parent_directory, 'Data')
16
+ model_directory = os.path.join(parent_directory, 'Models')
17
+ pickle_directory = os.path.join(parent_directory, 'Pickles')
18
+
19
+ file_path = os.path.join(data_directory, 'gbg_this_year.csv')
20
+ gbg = pd.read_csv(file_path, low_memory=False)
21
+
22
+ file_path = os.path.join(data_directory, 'results.csv')
23
+ results = pd.read_csv(file_path, low_memory=False)
24
+
25
+ # get team abbreviations
26
+ file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
27
+ with open(file_path, 'rb') as f:
28
+ team_name_to_abbreviation = pkl.load(f)
29
+
30
+ file_path = os.path.join(pickle_directory, 'team_abbreviation_to_name.pkl')
31
+ with open(file_path, 'rb') as f:
32
+ team_abbreviation_to_name = pkl.load(f)
33
+
34
+ # get schedule
35
+ file_path = os.path.join(pickle_directory, 'schedule.pkl')
36
+ with open(file_path, 'rb') as f:
37
+ schedule = pkl.load(f)
38
+
39
+ # get current week
40
+ file_path = os.path.join(pickle_directory, 'the_week.pkl')
41
+ with open(file_path, 'rb') as f:
42
+ the_week = pkl.load(f)
43
+
44
+ # load models
45
+ # moneyline
46
+ model = 'xgboost_ML_no_odds_71.4%'
47
+ file_path = os.path.join(model_directory, f'{model}.json')
48
+ xgb_ml = xgb.Booster()
49
+ xgb_ml.load_model(file_path)
50
+
51
+ # over/under
52
+ model = 'xgboost_OU_no_odds_59.8%'
53
+ file_path = os.path.join(model_directory, f'{model}.json')
54
+ xgb_ou = xgb.Booster()
55
+ xgb_ou.load_model(file_path)
56
+
57
+
58
+ def get_week():
59
+ week = the_week['week']
60
+ year = the_week['year']
61
+ return int(week), int(year)
62
+
63
+
64
+ def get_games(week):
65
+ df = schedule[week-1]
66
+ df['Away Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Away TeamAway Team']]
67
+ df['Home Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Home TeamHome Team']]
68
+ df['Date'] = pd.to_datetime(df['Game TimeGame Time'])
69
+ df['Date'] = df['Date'].dt.strftime('%A %d/%m %I:%M %p')
70
+ df['Date'] = df['Date'].apply(lambda x: f"{x.split()[0]} {int(x.split()[1].split('/')[1])}/{int(x.split()[1].split('/')[0])} {x.split()[2]}".capitalize())
71
+ return df[['Away Team','Home Team','Date']]
72
+
73
+
74
+ def get_one_week(home,away,season,week):
75
+ try:
76
+ max_GP_home = gbg.loc[((gbg['home_team'] == home) | (gbg['away_team'] == home)) & (gbg['GP'] < week)]['GP'].max()
77
+ max_GP_away = gbg.loc[((gbg['home_team'] == away) | (gbg['away_team'] == away)) & (gbg['GP'] < week)]['GP'].max()
78
+
79
+ home_df = gbg.loc[((gbg['away_team']==home) | (gbg['home_team']==home)) & (gbg['Season']==season) & (gbg['GP']==max_GP_home)]
80
+ gbg_home_team = home_df['home_team'].item()
81
+ home_df.drop(columns=['game_id','home_team','away_team','Season','game_date'], inplace=True)
82
+ home_df = home_df[[i for i in home_df.columns if '.Away' not in i] if gbg_home_team==home else [i for i in home_df.columns if '.Away' in i]]
83
+ home_df.columns = [i.replace('.Away','') for i in home_df.columns]
84
+
85
+ away_df = gbg.loc[((gbg['away_team']==away) | (gbg['home_team']==away)) & (gbg['Season']==season) & (gbg['GP']==max_GP_away)]
86
+ gbg_home_team = away_df['home_team'].item()
87
+ away_df.drop(columns=['game_id','home_team','away_team','Season','game_date'], inplace=True)
88
+ away_df = away_df[[i for i in away_df.columns if '.Away' not in i] if gbg_home_team==away else [i for i in away_df.columns if '.Away' in i]]
89
+ away_df.columns = [i.replace('.Away','') + '.Away' for i in away_df.columns]
90
+
91
+ df = home_df.reset_index(drop=True).merge(away_df.reset_index(drop=True), left_index=True, right_index=True)
92
+ return df
93
+ except ValueError:
94
+ return pd.DataFrame()
95
+
96
+
97
+ def predict(home,away,season,week,total):
98
+ global results
99
+
100
+ # finish preparing data
101
+ if len(home)>4:
102
+ home_abbrev = team_name_to_abbreviation[home]
103
+ else:
104
+ home_abbrev = home
105
+
106
+ if len(away)>4:
107
+ away_abbrev = team_name_to_abbreviation[away]
108
+ else:
109
+ away_abbrev = away
110
+
111
+ data = get_one_week(home_abbrev,away_abbrev,season,week)
112
+ data['Total Score Close'] = total
113
+ matrix = xgb.DMatrix(data.astype(float).values)
114
+
115
+ # create game id
116
+ if week < 10:
117
+ game_id = str(season) + '_0' + str(int(week)) + '_' + away_abbrev + '_' + home_abbrev
118
+ else:
119
+ game_id = str(season) + '_' + str(int(week)) + '_' + away_abbrev + '_' + home_abbrev
120
+
121
+ try:
122
+ moneyline_result = results.loc[results['game_id']==game_id, 'winner'].item()
123
+ except:
124
+ moneyline_result = 'N/A'
125
+
126
+ try:
127
+ ml_predicted_proba = xgb_ml.predict(matrix)[0][1]
128
+ winner_proba = max([ml_predicted_proba, 1-ml_predicted_proba]).item()
129
+ moneyline = {'Winner': [home if ml_predicted_proba>0.5 else away if ml_predicted_proba<0.5 else 'Toss-Up'],
130
+ 'Probabilities':[winner_proba],
131
+ 'Result': moneyline_result}
132
+ except:
133
+ moneyline = {'Winner': 'NA',
134
+ 'Probabilities':['N/A'],
135
+ 'Result': moneyline_result}
136
+
137
+ try:
138
+ result = results.loc[results['game_id']==game_id, 'total'].item()
139
+ over_under_result = 'Over' if float(result)>float(total) else 'Push' if float(result)==float(total) else 'Under'
140
+
141
+ except:
142
+ over_under_result = 'N/A'
143
+
144
+ try:
145
+ ou_predicted_proba = xgb_ou.predict(matrix)[0][1]
146
+ ou_proba = max([ou_predicted_proba, 1-ou_predicted_proba]).item()
147
+
148
+ over_under = {'Over/Under': ['Over' if ou_predicted_proba>0.5 else 'Under'],
149
+ 'Probability': [ou_proba],
150
+ 'Result': over_under_result}
151
+ except:
152
+ over_under = {'Over/Under': 'N/A',
153
+ 'Probability': ['N/A'],
154
+ 'Result': over_under_result}
155
+
156
+ return game_id, moneyline, over_under