Spaces:
Running
Running
import requests | |
from bs4 import BeautifulSoup | |
import streamlit as st | |
import time | |
import random | |
# Target URL | |
url = "https://m.news.naver.com/rankingList" | |
# Headers | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0", | |
"Referer": "https://m.news.naver.com/" | |
} | |
def random_delay(): | |
time.sleep(random.uniform(1, 3)) | |
def safe_find(element, selector, class_name, attribute=None): | |
"""Safely find elements and their attributes""" | |
found = element.find(selector, class_=class_name) | |
if found and attribute: | |
return found.get(attribute) | |
return found.text if found else None | |
def scrape_ranking_news(): | |
try: | |
random_delay() | |
response = requests.get(url, headers=headers) | |
response.raise_for_status() # Check for HTTP errors | |
soup = BeautifulSoup(response.text, "html.parser") | |
ranking_news_sections = soup.find_all("div", class_="rankingnews_box") | |
news_list = [] | |
for section in ranking_news_sections: | |
office_name = safe_find(section, "strong", "rankingnews_name") | |
if not office_name: | |
continue | |
articles = section.find_all("li") | |
for article in articles: | |
# Safely extract all attributes | |
rank = safe_find(article, "em", "list_ranking_num") | |
title = safe_find(article, "strong", "list_title") | |
time_posted = safe_find(article, "span", "list_time") | |
link = safe_find(article, "a", None, "href") | |
# Handle image separately as it needs specific null checking | |
img_tag = article.find("img") | |
image = img_tag.get('src') if img_tag else None | |
if all([rank, title, time_posted, link]): # Ensure all required fields exist | |
news_list.append({ | |
"rank": rank, | |
"title": title, | |
"time": time_posted, | |
"link": link, | |
"image": image, | |
"office": office_name | |
}) | |
return news_list | |
except Exception as e: | |
st.error(f"Error scraping news: {str(e)}") | |
return [] | |
def display_news(news_data, num_columns=5): | |
if not news_data: | |
st.warning("No news articles found.") | |
return | |
col_count = 0 | |
cols = st.columns(num_columns) | |
for news in news_data: | |
with cols[col_count]: | |
if news['image']: | |
try: | |
st.image(news['image']) | |
except Exception: | |
st.warning("Image unavailable") | |
st.write(f"**{news['rank']}μ - {news['office']}**") | |
st.write(f"[{news['title']}]({news['link']})") | |
st.write(f"π {news['time']}") | |
col_count = (col_count + 1) % num_columns | |
if col_count == 0: | |
cols = st.columns(num_columns) | |
# Main app | |
st.title("Daily News Scrap in Korea") | |
if st.button("Start"): | |
news_data = scrape_ranking_news() | |
display_news(news_data) |