Spaces:
Runtime error
Runtime error
File size: 3,124 Bytes
58c2772 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
import pandas as pd
import tempfile
from bertopic import BERTopic
from src.reddit import RedditBot
from flask import Blueprint, render_template, request, send_file, redirect, url_for, send_from_directory
DOWNLOADS_PATH = os.path.join(os.getcwd(), 'downloads')
views = Blueprint(__name__, 'views')
reddit = RedditBot()
topic_model = BERTopic()
def retrieve_subreddits(data: dict) -> pd.DataFrame:
# Retrieve subreddits through its API
posts = reddit.get_subreddits_posts(
name=data.get('subreddit'),
type=data.get('type'),
amount=int(data.get('amount'))
)
df = reddit.convert_posts_to_df(posts=posts)
df['Text'] = df.apply(lambda row: row.Title + ': ' + row.Content, axis=1)
return df
@views.route('/', methods=['POST', 'GET'])
def home():
data = request.form
if request.method == 'POST':
if (int(data.get('amount')) < 0 or int(data.get('amount')) > 1000):
return redirect(url_for('views.error', type_of_error='amount'))
elif data.get('type') not in ['hot', 'new', 'rising', 'top']:
print(data.get('type'))
return redirect(url_for('views.error', type_of_error='type'))
elif not reddit.subreddit_exists(data.get('subreddit')):
return redirect(url_for('views.error', type_of_error='subreddit'))
else:
# Retrieve subreddits
subreddits_df = retrieve_subreddits(data=data)
# Topic modelling using BERTtopic
_, _ = topic_model.fit_transform(subreddits_df.Text)
topics_df = topic_model.get_topic_info()
for t in topics_df.Topic:
topics_df.loc[topics_df.Topic == t, 'Top words'] = str([w for w, p in topic_model.get_topic(t)])
# Donwload topics
topics_df.to_csv(os.path.join(DOWNLOADS_PATH, 'topics.csv'), index=False)
send_from_directory(
directory = DOWNLOADS_PATH,
path = 'topics.csv',
as_attachment=True,
)
# Download docs info
docs_df = topic_model.get_document_info(subreddits_df.Text)
docs_df.to_csv(os.path.join(DOWNLOADS_PATH, 'docs_with_topics_info.csv'), index=False)
send_from_directory(
directory = DOWNLOADS_PATH,
path = 'docs_with_topics_info.csv',
as_attachment=True,
)
return redirect(url_for('views.success'))
return render_template('index.html')
@views.route('/succes', methods=['GET'])
def success():
return render_template('success.html')
@views.route('/error/<type_of_error>', methods=['GET'])
def error(type_of_error: str):
if type_of_error == 'amount':
return render_template('error.html', type_of_error='The amount is higher than 1000 or lower than 0')
elif type_of_error == 'type':
return render_template('error.html', type_of_error='The ordering is not within hot, rising, new, top')
elif type_of_error == 'subreddit':
return render_template('error.html', type_of_error='The subreddit does not exist')
|