Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -41,68 +41,53 @@ import subprocess
|
|
41 |
import os
|
42 |
import subprocess
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
#from selenium import webdriver
|
53 |
-
|
54 |
-
#options = webdriver.ChromeOptions()
|
55 |
-
#options.add_argument('--headless')
|
56 |
-
#driver = webdriver.Chrome(options=options)
|
57 |
-
#print("'''''",driver.capabilities['browserVersion'],"''''")
|
58 |
-
#driver.quit()
|
59 |
-
# Initialize the ChromeDriver
|
60 |
-
#service = Service('chromedriver')
|
61 |
-
#driver = webdriver.Chrome(service=service, options=options)
|
62 |
-
|
63 |
from wuzzuf_scraper import Wuzzuf_scrapping
|
64 |
from linkedin_scraper import LINKEDIN_Scrapping
|
65 |
-
from data_analysis import map_bubble,linkedin_exp,wuzzuf_exp
|
66 |
-
|
67 |
-
####################### stream lit app ################################
|
68 |
-
|
69 |
|
|
|
70 |
st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide")
|
71 |
|
72 |
-
|
73 |
# ---- HEADER SECTION ----
|
74 |
with st.container():
|
75 |
left_column, right_column = st.columns(2)
|
76 |
with left_column:
|
77 |
st.subheader("Hi! I am Yassmen :wave:")
|
78 |
-
st.title("An Electronics and
|
79 |
-
st.write(
|
80 |
-
"In this app we will scrap jobs from LinkedIn and Wuzzuf websites, let's get it started :boom:"
|
81 |
-
)
|
82 |
st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)")
|
83 |
with right_column:
|
84 |
-
st.image("im.gif", use_column_width=True)
|
85 |
-
# st_lottie(lottie_coding, height=300, key="coding")
|
86 |
|
|
|
|
|
|
|
|
|
87 |
|
|
|
|
|
|
|
88 |
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
-
#with st.sidebar:
|
93 |
-
# selected = option_menu("Main Menu", ["select website", 'search job','numbers of jobs'], icons=['linkedin', 'search','123'], menu_icon="cast", default_index=1)
|
94 |
-
|
95 |
-
webs =["Wuzzuf","Linkedin"]
|
96 |
-
jobs =["Machine Learning","AI Engineer","Data Analysis","Software Testing"]
|
97 |
-
nums = np.arange(1,1000)
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
site = st.sidebar.selectbox("select one website", webs)
|
102 |
-
#elif selected == "search job":
|
103 |
-
job = st.sidebar.selectbox("select one job", jobs)
|
104 |
-
#elif selected == "numbers of jobs":
|
105 |
-
num_jobs = st.sidebar.selectbox("select num of jobs you want to scrap", nums)
|
106 |
|
107 |
|
108 |
|
@@ -119,7 +104,9 @@ if st.sidebar.button('Start Scrapping'):
|
|
119 |
with tab1 :
|
120 |
with st.spinner('✨Now loading...' ):
|
121 |
time.sleep(5)
|
122 |
-
|
|
|
|
|
123 |
try:
|
124 |
tab1.dataframe(n1)
|
125 |
except:
|
@@ -137,23 +124,21 @@ if st.sidebar.button('Start Scrapping'):
|
|
137 |
elif site =="Linkedin":
|
138 |
with st.container():
|
139 |
st.write("---")
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
# with tab3:
|
159 |
-
# linkedin_exp(n1) # WILL CHANGE
|
|
|
41 |
import os
|
42 |
import subprocess
|
43 |
|
44 |
+
import streamlit as st
|
45 |
+
import numpy as np
|
46 |
+
import pandas as pd
|
47 |
+
import time
|
48 |
+
from selenium import webdriver
|
49 |
+
from selenium.webdriver.chrome.service import Service
|
50 |
+
from selenium.webdriver.chrome.options import Options
|
51 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
from wuzzuf_scraper import Wuzzuf_scrapping
|
53 |
from linkedin_scraper import LINKEDIN_Scrapping
|
54 |
+
from data_analysis import map_bubble, linkedin_exp, wuzzuf_exp
|
|
|
|
|
|
|
55 |
|
56 |
+
# Set up Streamlit page configuration
|
57 |
st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide")
|
58 |
|
|
|
59 |
# ---- HEADER SECTION ----
|
60 |
with st.container():
|
61 |
left_column, right_column = st.columns(2)
|
62 |
with left_column:
|
63 |
st.subheader("Hi! I am Yassmen :wave:")
|
64 |
+
st.title("An Electronics and Communication Engineer")
|
65 |
+
st.write("In this app we will scrap jobs from LinkedIn and Wuzzuf websites, let's get it started :boom:")
|
|
|
|
|
66 |
st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)")
|
67 |
with right_column:
|
68 |
+
st.image("im.gif", use_column_width=True)
|
|
|
69 |
|
70 |
+
# Sidebar selections
|
71 |
+
webs = ["Wuzzuf", "Linkedin"]
|
72 |
+
jobs = ["Machine Learning", "AI Engineer", "Data Analysis", "Software Testing"]
|
73 |
+
nums = np.arange(1, 1000)
|
74 |
|
75 |
+
site = st.sidebar.selectbox("Select one website", webs)
|
76 |
+
job = st.sidebar.selectbox("Select one job", jobs)
|
77 |
+
num_jobs = st.sidebar.selectbox("Select number of jobs you want to scrap", nums)
|
78 |
|
79 |
+
# Function to get Selenium driver
|
80 |
+
@st.cache_resource
|
81 |
+
def get_driver():
|
82 |
+
options = Options()
|
83 |
+
options.add_argument("--headless") # Run in headless mode
|
84 |
+
options.add_argument("--no-sandbox") # Bypass OS security model
|
85 |
+
options.add_argument("--disable-dev-shm-usage") # Overcome limited resource problems
|
86 |
+
return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
+
import streamlit as st
|
90 |
+
from streamlit_option_menu import option_menu
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
|
93 |
|
|
|
104 |
with tab1 :
|
105 |
with st.spinner('✨Now loading...' ):
|
106 |
time.sleep(5)
|
107 |
+
driver = get_driver() # Initialize the driver
|
108 |
+
n1 = Wuzzuf_scrapping(job, num_jobs, driver) # Pass driver to the scraping function
|
109 |
+
driver.quit() # Clean up the driver
|
110 |
try:
|
111 |
tab1.dataframe(n1)
|
112 |
except:
|
|
|
124 |
elif site =="Linkedin":
|
125 |
with st.container():
|
126 |
st.write("---")
|
127 |
+
tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
|
128 |
+
with tab1 :
|
129 |
+
with st.spinner('✨Now loading...' ):
|
130 |
+
time.sleep(5)
|
131 |
+
driver = get_driver()
|
132 |
+
n1 = LINKEDIN_Scrapping(job ,num_jobs,driver )
|
133 |
+
driver.quit() # Clean up the driver
|
134 |
+
try:
|
135 |
+
tab1.dataframe(n1)
|
136 |
+
except:
|
137 |
+
try:
|
138 |
+
tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success
|
139 |
+
except:
|
140 |
+
tab1.table(n1)
|
141 |
+
with tab2:
|
142 |
+
map_bubble(n1)
|
143 |
+
with tab3:
|
144 |
+
linkedin_exp(n1)
|
|
|
|