Spaces:

gunship999
/

Korea-Daily-News

Running

App Files Files Community

Korea-Daily-News / app.py

gunship999

Update app.py

8298dd3 verified 3 days ago

raw

history blame

3.25 kB

	import requests
	from bs4 import BeautifulSoup
	import streamlit as st
	import time
	import random

	# Target URL
	url = "https://m.news.naver.com/rankingList"

	# Headers
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
	"Referer": "https://m.news.naver.com/"
	}

	def random_delay():
	time.sleep(random.uniform(1, 3))

	def safe_find(element, selector, class_name, attribute=None):
	"""Safely find elements and their attributes"""
	found = element.find(selector, class_=class_name)
	if found and attribute:
	return found.get(attribute)
	return found.text if found else None

	def scrape_ranking_news():
	try:
	random_delay()
	response = requests.get(url, headers=headers)
	response.raise_for_status() # Check for HTTP errors

	soup = BeautifulSoup(response.text, "html.parser")
	ranking_news_sections = soup.find_all("div", class_="rankingnews_box")

	news_list = []
	for section in ranking_news_sections:
	office_name = safe_find(section, "strong", "rankingnews_name")
	if not office_name:
	continue

	articles = section.find_all("li")
	for article in articles:
	# Safely extract all attributes
	rank = safe_find(article, "em", "list_ranking_num")
	title = safe_find(article, "strong", "list_title")
	time_posted = safe_find(article, "span", "list_time")
	link = safe_find(article, "a", None, "href")

	# Handle image separately as it needs specific null checking
	img_tag = article.find("img")
	image = img_tag.get('src') if img_tag else None

	if all([rank, title, time_posted, link]): # Ensure all required fields exist
	news_list.append({
	"rank": rank,
	"title": title,
	"time": time_posted,
	"link": link,
	"image": image,
	"office": office_name
	})

	return news_list
	except Exception as e:
	st.error(f"Error scraping news: {str(e)}")
	return []

	def display_news(news_data, num_columns=5):
	if not news_data:
	st.warning("No news articles found.")
	return

	col_count = 0
	cols = st.columns(num_columns)

	for news in news_data:
	with cols[col_count]:
	if news['image']:
	try:
	st.image(news['image'])
	except Exception:
	st.warning("Image unavailable")

	st.write(f"{news['rank']}위 - {news['office']}")
	st.write(f"[{news['title']}]({news['link']})")
	st.write(f"🕒 {news['time']}")

	col_count = (col_count + 1) % num_columns
	if col_count == 0:
	cols = st.columns(num_columns)

	# Main app
	st.title("Daily News Scrap in Korea")

	if st.button("Start"):
	news_data = scrape_ranking_news()
	display_news(news_data)