gunship999's picture
Update app.py
8298dd3 verified
raw
history blame
3.25 kB
import requests
from bs4 import BeautifulSoup
import streamlit as st
import time
import random
# Target URL
url = "https://m.news.naver.com/rankingList"
# Headers
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
"Referer": "https://m.news.naver.com/"
}
def random_delay():
time.sleep(random.uniform(1, 3))
def safe_find(element, selector, class_name, attribute=None):
"""Safely find elements and their attributes"""
found = element.find(selector, class_=class_name)
if found and attribute:
return found.get(attribute)
return found.text if found else None
def scrape_ranking_news():
try:
random_delay()
response = requests.get(url, headers=headers)
response.raise_for_status() # Check for HTTP errors
soup = BeautifulSoup(response.text, "html.parser")
ranking_news_sections = soup.find_all("div", class_="rankingnews_box")
news_list = []
for section in ranking_news_sections:
office_name = safe_find(section, "strong", "rankingnews_name")
if not office_name:
continue
articles = section.find_all("li")
for article in articles:
# Safely extract all attributes
rank = safe_find(article, "em", "list_ranking_num")
title = safe_find(article, "strong", "list_title")
time_posted = safe_find(article, "span", "list_time")
link = safe_find(article, "a", None, "href")
# Handle image separately as it needs specific null checking
img_tag = article.find("img")
image = img_tag.get('src') if img_tag else None
if all([rank, title, time_posted, link]): # Ensure all required fields exist
news_list.append({
"rank": rank,
"title": title,
"time": time_posted,
"link": link,
"image": image,
"office": office_name
})
return news_list
except Exception as e:
st.error(f"Error scraping news: {str(e)}")
return []
def display_news(news_data, num_columns=5):
if not news_data:
st.warning("No news articles found.")
return
col_count = 0
cols = st.columns(num_columns)
for news in news_data:
with cols[col_count]:
if news['image']:
try:
st.image(news['image'])
except Exception:
st.warning("Image unavailable")
st.write(f"**{news['rank']}μœ„ - {news['office']}**")
st.write(f"[{news['title']}]({news['link']})")
st.write(f"πŸ•’ {news['time']}")
col_count = (col_count + 1) % num_columns
if col_count == 0:
cols = st.columns(num_columns)
# Main app
st.title("Daily News Scrap in Korea")
if st.button("Start"):
news_data = scrape_ranking_news()
display_news(news_data)