Spaces:

Yassmen
/

Job.web.scrapping

Sleeping

App Files Files Community

Yassmen commited on 22 days ago

Commit

a5b8861

•

1 Parent(s): 2a87767

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -66

app.py CHANGED Viewed

@@ -41,68 +41,53 @@ import subprocess
 import os
 import subprocess
-# Make sure the chromedriver is executable
-#os.chmod('chromedriver', 0o755)
-# Set up Chrome options
-#options = webdriver.ChromeOptions()
-#options.add_argument('--headless')
-#options.add_argument('--no-sandbox')
-#options.add_argument('--disable-dev-shm-usage')
-#from selenium import webdriver
-#options = webdriver.ChromeOptions()
-#options.add_argument('--headless')
-#driver = webdriver.Chrome(options=options)
-#print("'''''",driver.capabilities['browserVersion'],"''''")
-#driver.quit()
-# Initialize the ChromeDriver
-#service = Service('chromedriver')
-#driver = webdriver.Chrome(service=service, options=options)
 from wuzzuf_scraper import Wuzzuf_scrapping
 from linkedin_scraper import LINKEDIN_Scrapping
-from data_analysis import map_bubble,linkedin_exp,wuzzuf_exp
-####################### stream lit app ################################
 st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide")
 # ---- HEADER SECTION ----
 with st.container():
     left_column, right_column = st.columns(2)
     with left_column:
         st.subheader("Hi! I am Yassmen :wave:")
-        st.title("An Electronics and Communcation Engineer")
-        st.write(
-            "In this app we will scrap jobs from LinkedIn and Wuzzuf websites, let's get it started :boom:"
-        )
         st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)")
     with right_column:
-        st.image("im.gif", use_column_width=True)
-       # st_lottie(lottie_coding, height=300, key="coding")
-import streamlit as st
-from streamlit_option_menu import option_menu
-#with st.sidebar:
-   # selected = option_menu("Main Menu", ["select website", 'search job','numbers of jobs'], icons=['linkedin', 'search','123'], menu_icon="cast", default_index=1)
-webs =["Wuzzuf","Linkedin"]
-jobs =["Machine Learning","AI Engineer","Data Analysis","Software Testing"]
-nums = np.arange(1,1000)
-#with st.sidebar:
-  #if selected == "select website":
-site = st.sidebar.selectbox("select one website", webs)
-  #elif selected == "search job":
-job = st.sidebar.selectbox("select one job", jobs)
-  #elif selected == "numbers of jobs":
-num_jobs = st.sidebar.selectbox("select num of jobs you want to scrap", nums)
@@ -119,7 +104,9 @@ if st.sidebar.button('Start Scrapping'):
         with tab1 :
           with st.spinner('✨Now loading...' ):
             time.sleep(5)
-            n1 = Wuzzuf_scrapping(job ,num_jobs )
             try:
               tab1.dataframe(n1)
             except:
@@ -137,23 +124,21 @@ if st.sidebar.button('Start Scrapping'):
   elif site =="Linkedin":
     with st.container():
         st.write("---")
- # if site =="Linkedin":
- #   with st.container():
-  #      st.write("---")
-   #     tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
-    #    with tab1 :
-     #     with st.spinner('✨Now loading...' ):
-      #      time.sleep(5)
-       #     n1 = LINKEDIN_Scrapping(job ,num_jobs )
-        #    try:
-         #     tab1.dataframe(n1)
-          #  except:
-           #   try:
-            #    tab1.write(n1.astype(str).set_index(n1.index.astype(str)))  # Success
-             # except:
-              #  tab1.table(n1)
-       # with tab2:
-        #  map_bubble(n1)
-       # with tab3:
-       #   linkedin_exp(n1)  # WILL CHANGE

 import os
 import subprocess
+import streamlit as st
+import numpy as np
+import pandas as pd
+import time
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.chrome.options import Options
+from webdriver_manager.chrome import ChromeDriverManager
 from wuzzuf_scraper import Wuzzuf_scrapping
 from linkedin_scraper import LINKEDIN_Scrapping
+from data_analysis import map_bubble, linkedin_exp, wuzzuf_exp
+# Set up Streamlit page configuration
 st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide")
 # ---- HEADER SECTION ----
 with st.container():
     left_column, right_column = st.columns(2)
     with left_column:
         st.subheader("Hi! I am Yassmen :wave:")
+        st.title("An Electronics and Communication Engineer")
+        st.write("In this app we will scrap jobs from LinkedIn and Wuzzuf websites, let's get it started :boom:")
         st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)")
     with right_column:
+        st.image("im.gif", use_column_width=True)
+# Sidebar selections
+webs = ["Wuzzuf", "Linkedin"]
+jobs = ["Machine Learning", "AI Engineer", "Data Analysis", "Software Testing"]
+nums = np.arange(1, 1000)
+site = st.sidebar.selectbox("Select one website", webs)
+job = st.sidebar.selectbox("Select one job", jobs)
+num_jobs = st.sidebar.selectbox("Select number of jobs you want to scrap", nums)
+# Function to get Selenium driver
+@st.cache_resource
+def get_driver():
+    options = Options()
+    options.add_argument("--headless")  # Run in headless mode
+    options.add_argument("--no-sandbox")  # Bypass OS security model
+    options.add_argument("--disable-dev-shm-usage")  # Overcome limited resource problems
+    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
+import streamlit as st
+from streamlit_option_menu import option_menu
         with tab1 :
           with st.spinner('✨Now loading...' ):
             time.sleep(5)
+            driver = get_driver()  # Initialize the driver
+            n1 = Wuzzuf_scrapping(job, num_jobs, driver)  # Pass driver to the scraping function
+            driver.quit()  # Clean up the driver
             try:
               tab1.dataframe(n1)
             except:
   elif site =="Linkedin":
     with st.container():
         st.write("---")
+        tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
+        with tab1 :
+          with st.spinner('✨Now loading...' ):
+            time.sleep(5)
+            driver = get_driver()
+            n1 = LINKEDIN_Scrapping(job ,num_jobs,driver )
+            driver.quit()  # Clean up the driver
+            try:
+              tab1.dataframe(n1)
+            except:
+              try:
+                tab1.write(n1.astype(str).set_index(n1.index.astype(str)))  # Success
+              except:
+                tab1.table(n1)
+        with tab2:
+          map_bubble(n1)
+        with tab3:
+          linkedin_exp(n1)