import streamlit as st import requests import numpy as np from PIL import Image import warnings warnings.filterwarnings("ignore") import requests import pandas as pd import numpy as np from bs4 import BeautifulSoup import bs4 from urllib.request import urlopen import time import re import time import matplotlib.pyplot as plt import seaborn as sns import matplotlib as mpl import plotly import plotly.express as px import plotly.graph_objs as go import plotly.offline as py from plotly.offline import iplot from plotly.subplots import make_subplots import plotly.figure_factory as ff from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.service import Service import requests import platform import zipfile import os import subprocess # Check Chrome version chrome_version = subprocess.getoutput("google-chrome --version") chrome_version_number = chrome_version.split()[-1] # Extract only the version number # Construct the download URL url = f"https://chromedriver.storage.googleapis.com/{chrome_version_number}/chromedriver_{platform.system().lower()}64.zip" # Download ChromeDriver response = requests.get(url) # Check if the download was successful if response.status_code == 200: # Save the zip file with open("chromedriver.zip", "wb") as file: file.write(response.content) # Unzip the downloaded file with zipfile.ZipFile("chromedriver.zip", 'r') as zip_ref: zip_ref.extractall("drivers") # Extract to a folder named 'drivers' # Clean up the zip file os.remove("chromedriver.zip") else: raise Exception(f"Failed to download ChromeDriver: {response.status_code} - {response.text}") #Settings for using the driver without a UI # Set up Chrome options options = webdriver.ChromeOptions() options.add_argument('--headless') # Run in headless mode options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') # Initialize the ChromeDriver service = Service('drivers/chromedriver') driver = webdriver.Chrome(service=service, options=options) from wuzzuf_scraper import Wuzzuf_scrapping from linkedin_scraper import LINKEDIN_Scrapping from data_analysis import map_bubble,linkedin_exp,wuzzuf_exp ####################### stream lit app ################################ st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide") # ---- HEADER SECTION ---- with st.container(): left_column, right_column = st.columns(2) with left_column: st.subheader("Hi! I am Yassmen :wave:") st.title("An Electronics and Communcation Engineer") st.write( "In this app we will scrap jobs from LinkedIn and Wuzzuf websites, let's get it started :boom:" ) st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)") with right_column: st.image("images.jfif", use_column_width=True) # st_lottie(lottie_coding, height=300, key="coding") import streamlit as st from streamlit_option_menu import option_menu #with st.sidebar: # selected = option_menu("Main Menu", ["select website", 'search job','numbers of jobs'], icons=['linkedin', 'search','123'], menu_icon="cast", default_index=1) webs =["Wuzzuf","Linkedin"] jobs =["Machine Learning","AI Engineer","Data Analysis","Software Testing"] nums = np.arange(1,1000) #with st.sidebar: #if selected == "select website": site = st.sidebar.selectbox("select one website", webs) #elif selected == "search job": job = st.sidebar.selectbox("select one job", jobs) #elif selected == "numbers of jobs": num_jobs = st.sidebar.selectbox("select num of jobs you want to scrap", nums) import streamlit.components.v1 as components import hydralit_components as hc n2 = pd.DataFrame() if st.sidebar.button('Start Scrapping'): if site =="Wuzzuf": with st.container(): st.write("---") tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"]) with tab1 : with st.spinner('✨Now loading...' ): time.sleep(5) n1 = Wuzzuf_scrapping(job ,num_jobs ) try: tab1.dataframe(n1) except: try: tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success except: tab1.table(n1) with tab2: map_bubble(n1) with tab3: #tab3.plotly_chart(wuzzuf_exp(n1)) wuzzuf_exp(n1) if site =="Linkedin": with st.container(): st.write("---") tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"]) with tab1 : with st.spinner('✨Now loading...' ): time.sleep(5) n1 = LINKEDIN_Scrapping(job ,num_jobs ) try: tab1.dataframe(n1) except: try: tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success except: tab1.table(n1) with tab2: map_bubble(n1) with tab3: linkedin_exp(n1) # WILL CHANGE