import requests from bs4 import BeautifulSoup import streamlit as st import time import random # Target URL url = "https://m.news.naver.com/rankingList" # Headers headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0", "Referer": "https://m.news.naver.com/" } def random_delay(): time.sleep(random.uniform(1, 3)) def safe_find(element, selector, class_name, attribute=None): """Safely find elements and their attributes""" found = element.find(selector, class_=class_name) if found and attribute: return found.get(attribute) return found.text if found else None def scrape_ranking_news(): try: random_delay() response = requests.get(url, headers=headers) response.raise_for_status() # Check for HTTP errors soup = BeautifulSoup(response.text, "html.parser") ranking_news_sections = soup.find_all("div", class_="rankingnews_box") news_list = [] for section in ranking_news_sections: office_name = safe_find(section, "strong", "rankingnews_name") if not office_name: continue articles = section.find_all("li") for article in articles: # Safely extract all attributes rank = safe_find(article, "em", "list_ranking_num") title = safe_find(article, "strong", "list_title") time_posted = safe_find(article, "span", "list_time") link = safe_find(article, "a", None, "href") # Handle image separately as it needs specific null checking img_tag = article.find("img") image = img_tag.get('src') if img_tag else None if all([rank, title, time_posted, link]): # Ensure all required fields exist news_list.append({ "rank": rank, "title": title, "time": time_posted, "link": link, "image": image, "office": office_name }) return news_list except Exception as e: st.error(f"Error scraping news: {str(e)}") return [] def display_news(news_data, num_columns=5): if not news_data: st.warning("No news articles found.") return col_count = 0 cols = st.columns(num_columns) for news in news_data: with cols[col_count]: if news['image']: try: st.image(news['image']) except Exception: st.warning("Image unavailable") st.write(f"**{news['rank']}위 - {news['office']}**") st.write(f"[{news['title']}]({news['link']})") st.write(f"🕒 {news['time']}") col_count = (col_count + 1) % num_columns if col_count == 0: cols = st.columns(num_columns) # Main app st.title("Daily News Scrap in Korea") if st.button("Start"): news_data = scrape_ranking_news() display_news(news_data)