Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import numpy as np | |
from PIL import Image | |
import warnings | |
warnings.filterwarnings("ignore") | |
import requests | |
import pandas as pd | |
import numpy as np | |
from bs4 import BeautifulSoup | |
import bs4 | |
from urllib.request import urlopen | |
import time | |
import re | |
import time | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import matplotlib as mpl | |
import plotly | |
import plotly.express as px | |
import plotly.graph_objs as go | |
import plotly.offline as py | |
from plotly.offline import iplot | |
from plotly.subplots import make_subplots | |
import plotly.figure_factory as ff | |
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.chrome.service import Service | |
import requests | |
import platform | |
import zipfile | |
import os | |
import subprocess | |
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
import time | |
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.chrome.options import Options | |
from webdriver_manager.chrome import ChromeDriverManager | |
from wuzzuf_scraper import Wuzzuf_scrapping | |
from linkedin_scraper import LINKEDIN_Scrapping | |
from data_analysis import map_bubble, linkedin_exp, wuzzuf_exp | |
# Set up Streamlit page configuration | |
st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide") | |
# ---- HEADER SECTION ---- | |
with st.container(): | |
left_column, right_column = st.columns(2) | |
with left_column: | |
st.subheader("Hi! I am Yassmen :wave:") | |
st.title("An Electronics and Communication Engineer") | |
st.write("In this app we will scrap jobs from LinkedIn and Wuzzuf websites, let's get it started :boom:") | |
st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)") | |
with right_column: | |
st.image("im.gif", use_column_width=True) | |
# Sidebar selections | |
webs = ["Wuzzuf", "Linkedin"] | |
jobs = ["Machine Learning", "AI Engineer", "Data Analysis", "Software Testing"] | |
nums = np.arange(1, 1000) | |
site = st.sidebar.selectbox("Select one website", webs) | |
job = st.sidebar.selectbox("Select one job", jobs) | |
num_jobs = st.sidebar.selectbox("Select number of jobs you want to scrap", nums) | |
# Function to get Selenium driver | |
from selenium import webdriver | |
from selenium.webdriver.firefox.service import Service as FirefoxService | |
from webdriver_manager.firefox import GeckoDriverManager | |
def get_driver(): | |
options = webdriver.ChromeOptions() | |
options.add_argument("--headless") # Run in headless mode | |
options.add_argument("--no-sandbox") | |
options.add_argument("--disable-dev-shm-usage") | |
try: | |
driver = webdriver.Chrome(options=options) | |
return driver | |
except Exception as e: | |
st.error(f"Error initializing WebDriver: {e}") | |
return None | |
import streamlit as st | |
from streamlit_option_menu import option_menu | |
import streamlit.components.v1 as components | |
n2 = pd.DataFrame() | |
if st.sidebar.button('Start Scrapping'): | |
if site =="Wuzzuf": | |
with st.container(): | |
st.write("---") | |
tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"]) | |
with tab1 : | |
with st.spinner('✨Now loading...' ): | |
time.sleep(5) | |
driver = get_driver() # Initialize the driver | |
n1 = Wuzzuf_scrapping(job, num_jobs, driver) # Pass driver to the scraping function | |
driver.quit() # Clean up the driver | |
try: | |
tab1.dataframe(n1) | |
except: | |
try: | |
tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success | |
except: | |
tab1.table(n1) | |
with tab2: | |
map_bubble(n1) | |
with tab3: | |
#tab3.plotly_chart(wuzzuf_exp(n1)) | |
wuzzuf_exp(n1) | |
elif site =="Linkedin": | |
with st.container(): | |
st.write("---") | |
tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"]) | |
with tab1 : | |
with st.spinner('✨Now loading...' ): | |
time.sleep(5) | |
driver = get_driver() | |
n1 = LINKEDIN_Scrapping(job ,num_jobs,driver ) | |
driver.quit() # Clean up the driver | |
try: | |
tab1.dataframe(n1) | |
except: | |
try: | |
tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success | |
except: | |
tab1.table(n1) | |
with tab2: | |
map_bubble(n1) | |
with tab3: | |
linkedin_exp(n1) | |