Spaces:

gunship999
/

Korea-Daily-News

Running

App Files Files Community

gunship999 commited on 2 days ago

Commit

8298dd3

verified ·

1 Parent(s): e666451

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -54

app.py CHANGED Viewed

@@ -4,74 +4,94 @@ import streamlit as st
 import time
 import random
-# 타겟 URL
 url = "https://m.news.naver.com/rankingList"
-# 헤더 설정 (User-Agent 및 Referer 설정)
 headers = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
     "Referer": "https://m.news.naver.com/"
 }
-# 랜덤 딜레이 설정 함수
 def random_delay():
-    delay = random.uniform(1, 3)  # 1초에서 3초 사이의 랜덤 딜레이
-    time.sleep(delay)
-# 스크래핑할 데이터가 포함된 HTML 영역 선택
-def scrape_ranking_news():
-    random_delay()  # 랜덤 딜레이 적용
-    response = requests.get(url, headers=headers)
-    soup = BeautifulSoup(response.text, "html.parser")
-    # 스크래핑할 데이터가 포함된 HTML 영역 선택
-    ranking_news_sections = soup.find_all("div", class_="rankingnews_box")
-    news_list = []
-    for section in ranking_news_sections:
-        office_name = section.find("strong", class_="rankingnews_name").text  # 언론사명 추출
-        articles = section.find_all("li")
-        for article in articles:
-            rank = article.find("em", class_="list_ranking_num").text
-            title = article.find("strong", class_="list_title").text
-            time_posted = article.find("span", class_="list_time").text
-            link = article.find("a")['href']
-            image = article.find("img")['src']
-            news_list.append({
-                "rank": rank,
-                "title": title,
-                "time": time_posted,
-                "link": link,
-                "image": image,
-                "office": office_name
-            })
-    return news_list
-# 대제목 추가
 st.title("Daily News Scrap in Korea")
-# 실행 버튼
-if st.button("start"):
-    # 랭킹 뉴스 데이터를 스크래핑
     news_data = scrape_ranking_news()
-    # 5x5 형태로 같은 언론사의 기사를 한 줄에 배치
-    num_columns = 5
-    for news in news_data:
-        col_count = 0
-        cols = st.columns(num_columns)
-        for index, news in enumerate(news_data):
-            with cols[col_count]:
-                st.image(news['image'])
-                st.write(f"**{news['rank']}위 - {news['office']}**")
-                st.write(f"[{news['title']}]({news['link']})")
-                st.write(f"🕒 {news['time']}")
-            col_count += 1
-            # 5개 출력 후 새로운 행으로
-            if col_count == num_columns:
-                col_count = 0
-                cols = st.columns(num_columns)

 import time
 import random
+# Target URL
 url = "https://m.news.naver.com/rankingList"
+# Headers
 headers = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
     "Referer": "https://m.news.naver.com/"
 }
 def random_delay():
+    time.sleep(random.uniform(1, 3))
+def safe_find(element, selector, class_name, attribute=None):
+    """Safely find elements and their attributes"""
+    found = element.find(selector, class_=class_name)
+    if found and attribute:
+        return found.get(attribute)
+    return found.text if found else None
+def scrape_ranking_news():
+    try:
+        random_delay()
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()  # Check for HTTP errors
+        soup = BeautifulSoup(response.text, "html.parser")
+        ranking_news_sections = soup.find_all("div", class_="rankingnews_box")
+        news_list = []
+        for section in ranking_news_sections:
+            office_name = safe_find(section, "strong", "rankingnews_name")
+            if not office_name:
+                continue
+            articles = section.find_all("li")
+            for article in articles:
+                # Safely extract all attributes
+                rank = safe_find(article, "em", "list_ranking_num")
+                title = safe_find(article, "strong", "list_title")
+                time_posted = safe_find(article, "span", "list_time")
+                link = safe_find(article, "a", None, "href")
+                # Handle image separately as it needs specific null checking
+                img_tag = article.find("img")
+                image = img_tag.get('src') if img_tag else None
+                if all([rank, title, time_posted, link]):  # Ensure all required fields exist
+                    news_list.append({
+                        "rank": rank,
+                        "title": title,
+                        "time": time_posted,
+                        "link": link,
+                        "image": image,
+                        "office": office_name
+                    })
+        return news_list
+    except Exception as e:
+        st.error(f"Error scraping news: {str(e)}")
+        return []
+def display_news(news_data, num_columns=5):
+    if not news_data:
+        st.warning("No news articles found.")
+        return
+    col_count = 0
+    cols = st.columns(num_columns)
+    for news in news_data:
+        with cols[col_count]:
+            if news['image']:
+                try:
+                    st.image(news['image'])
+                except Exception:
+                    st.warning("Image unavailable")
+            st.write(f"**{news['rank']}위 - {news['office']}**")
+            st.write(f"[{news['title']}]({news['link']})")
+            st.write(f"🕒 {news['time']}")
+        col_count = (col_count + 1) % num_columns
+        if col_count == 0:
+            cols = st.columns(num_columns)
+# Main app
 st.title("Daily News Scrap in Korea")
+if st.button("Start"):
     news_data = scrape_ranking_news()
+    display_news(news_data)