smartinezbragado commited on
Commit
58c2772
1 Parent(s): 950bf93

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +13 -0
  2. requirements.txt +58 -0
  3. views.py +75 -0
app.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from flask import Flask
3
+ from views import views
4
+
5
+ app = Flask(__name__)
6
+ app.register_blueprint(views, url_prefix='/reddit-app')
7
+
8
+ @app.route('/', methods=['POST', 'GET'])
9
+ def reddit_app_home():
10
+ return
11
+
12
+ if __name__ == '__main__':
13
+ app.run(debug=True, port=8000, host='0.0.0.0')
requirements.txt ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -i https://pypi.org/simple
2
+ bertopic==0.13.0
3
+ certifi==2022.12.7 ; python_version >= '3.6'
4
+ charset-normalizer==3.0.1
5
+ click==8.1.3 ; python_version >= '3.7'
6
+ cython==0.29.33 ; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'
7
+ filelock==3.9.0 ; python_version >= '3.7'
8
+ flask==2.2.2
9
+ hdbscan==0.8.29
10
+ huggingface-hub==0.11.1 ; python_full_version >= '3.7.0'
11
+ idna==3.4 ; python_version >= '3.5'
12
+ importlib-metadata==6.0.0 ; python_version < '3.10'
13
+ itsdangerous==2.1.2 ; python_version >= '3.7'
14
+ jinja2==3.1.2 ; python_version >= '3.7'
15
+ joblib==1.2.0 ; python_version >= '3.7'
16
+ llvmlite==0.39.1 ; python_version >= '3.7'
17
+ markupsafe==2.1.1 ; python_version >= '3.7'
18
+ nltk==3.8.1 ; python_version >= '3.7'
19
+ numba==0.56.4 ; python_version >= '3.7'
20
+ numpy==1.23.5 ; python_version >= '3.8'
21
+ nvidia-cublas-cu11==11.10.3.66 ; platform_system == 'Linux'
22
+ nvidia-cuda-nvrtc-cu11==11.7.99 ; platform_system == 'Linux'
23
+ nvidia-cuda-runtime-cu11==11.7.99 ; platform_system == 'Linux'
24
+ nvidia-cudnn-cu11==8.5.0.96 ; platform_system == 'Linux'
25
+ packaging==23.0 ; python_version >= '3.7'
26
+ pandas==1.5.2
27
+ pillow==9.4.0 ; python_version >= '3.7'
28
+ plotly==5.12.0 ; python_version >= '3.6'
29
+ praw==7.6.1
30
+ prawcore==2.3.0 ; python_version ~= '3.6'
31
+ pynndescent==0.5.8
32
+ python-dateutil==2.8.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'
33
+ python-dotenv==0.21.0
34
+ pytz==2022.7.1
35
+ pyyaml==5.4.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
36
+ regex==2022.10.31 ; python_version >= '3.6'
37
+ requests==2.28.2 ; python_version >= '3.7' and python_version < '4'
38
+ scikit-learn==1.2.0 ; python_version >= '3.8'
39
+ scipy==1.10.0 ; python_version < '3.12' and python_version >= '3.8'
40
+ sentence-transformers==2.2.2 ; python_full_version >= '3.6.0'
41
+ sentencepiece==0.1.97
42
+ setuptools==66.0.0 ; python_version >= '3.7'
43
+ six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'
44
+ tenacity==8.1.0 ; python_version >= '3.6'
45
+ threadpoolctl==3.1.0 ; python_version >= '3.6'
46
+ tokenizers==0.13.2
47
+ torch==1.13.1 ; python_full_version >= '3.7.0'
48
+ torchvision==0.14.1 ; python_version >= '3.7'
49
+ tqdm==4.64.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
50
+ transformers==4.25.1 ; python_full_version >= '3.7.0'
51
+ typing-extensions==4.4.0 ; python_version >= '3.7'
52
+ umap-learn==0.5.3
53
+ update-checker==0.18.0
54
+ urllib3==1.26.14 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
55
+ websocket-client==1.4.2 ; python_version >= '3.7'
56
+ werkzeug==2.2.2 ; python_version >= '3.7'
57
+ wheel==0.38.4 ; python_version >= '3.7'
58
+ zipp==3.11.0 ; python_version >= '3.7'
views.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import tempfile
4
+ from bertopic import BERTopic
5
+ from src.reddit import RedditBot
6
+ from flask import Blueprint, render_template, request, send_file, redirect, url_for, send_from_directory
7
+
8
+ DOWNLOADS_PATH = os.path.join(os.getcwd(), 'downloads')
9
+
10
+ views = Blueprint(__name__, 'views')
11
+ reddit = RedditBot()
12
+ topic_model = BERTopic()
13
+
14
+
15
+ def retrieve_subreddits(data: dict) -> pd.DataFrame:
16
+ # Retrieve subreddits through its API
17
+ posts = reddit.get_subreddits_posts(
18
+ name=data.get('subreddit'),
19
+ type=data.get('type'),
20
+ amount=int(data.get('amount'))
21
+ )
22
+ df = reddit.convert_posts_to_df(posts=posts)
23
+ df['Text'] = df.apply(lambda row: row.Title + ': ' + row.Content, axis=1)
24
+ return df
25
+
26
+ @views.route('/', methods=['POST', 'GET'])
27
+ def home():
28
+ data = request.form
29
+ if request.method == 'POST':
30
+ if (int(data.get('amount')) < 0 or int(data.get('amount')) > 1000):
31
+ return redirect(url_for('views.error', type_of_error='amount'))
32
+ elif data.get('type') not in ['hot', 'new', 'rising', 'top']:
33
+ print(data.get('type'))
34
+ return redirect(url_for('views.error', type_of_error='type'))
35
+ elif not reddit.subreddit_exists(data.get('subreddit')):
36
+ return redirect(url_for('views.error', type_of_error='subreddit'))
37
+ else:
38
+ # Retrieve subreddits
39
+ subreddits_df = retrieve_subreddits(data=data)
40
+ # Topic modelling using BERTtopic
41
+ _, _ = topic_model.fit_transform(subreddits_df.Text)
42
+ topics_df = topic_model.get_topic_info()
43
+ for t in topics_df.Topic:
44
+ topics_df.loc[topics_df.Topic == t, 'Top words'] = str([w for w, p in topic_model.get_topic(t)])
45
+ # Donwload topics
46
+ topics_df.to_csv(os.path.join(DOWNLOADS_PATH, 'topics.csv'), index=False)
47
+ send_from_directory(
48
+ directory = DOWNLOADS_PATH,
49
+ path = 'topics.csv',
50
+ as_attachment=True,
51
+ )
52
+ # Download docs info
53
+ docs_df = topic_model.get_document_info(subreddits_df.Text)
54
+ docs_df.to_csv(os.path.join(DOWNLOADS_PATH, 'docs_with_topics_info.csv'), index=False)
55
+ send_from_directory(
56
+ directory = DOWNLOADS_PATH,
57
+ path = 'docs_with_topics_info.csv',
58
+ as_attachment=True,
59
+ )
60
+ return redirect(url_for('views.success'))
61
+
62
+ return render_template('index.html')
63
+
64
+ @views.route('/succes', methods=['GET'])
65
+ def success():
66
+ return render_template('success.html')
67
+
68
+ @views.route('/error/<type_of_error>', methods=['GET'])
69
+ def error(type_of_error: str):
70
+ if type_of_error == 'amount':
71
+ return render_template('error.html', type_of_error='The amount is higher than 1000 or lower than 0')
72
+ elif type_of_error == 'type':
73
+ return render_template('error.html', type_of_error='The ordering is not within hot, rising, new, top')
74
+ elif type_of_error == 'subreddit':
75
+ return render_template('error.html', type_of_error='The subreddit does not exist')