Spaces:
Sleeping
Sleeping
Update wuzzuf_scraper.py
Browse files- wuzzuf_scraper.py +3 -2
wuzzuf_scraper.py
CHANGED
@@ -53,7 +53,8 @@ def Wuzzuf_scrapping(job_type , job_num,driver):
|
|
53 |
r= r+ ', ' +loc[:-1][i].strip()
|
54 |
location.append(r.replace(',', '', 1).strip())
|
55 |
country.append(loc[-1].strip())
|
56 |
-
|
|
|
57 |
m = " ".join(re.findall("[a-zA-Z\d+]+", (soup.find_all('div' , {'class': 'css-d7j1kk'})[x].find('a').text)))
|
58 |
company_name.append(m)
|
59 |
c = soup.find_all('div' ,{'class':'css-1lh32fc'})[x].find_all('span')
|
@@ -85,6 +86,7 @@ def Wuzzuf_scrapping(job_type , job_num,driver):
|
|
85 |
# to get the logo of the company
|
86 |
# Fetch the company logo
|
87 |
try:
|
|
|
88 |
data1 = requests.get(links[x])
|
89 |
data1.raise_for_status() # Check for HTTP errors
|
90 |
soup1 = BeautifulSoup(data1.content, 'html.parser')
|
@@ -140,4 +142,3 @@ def Wuzzuf_scrapping(job_type , job_num,driver):
|
|
140 |
df[:job_num].to_excel('WUZZUF_scrapping.xlsx',index=False,encoding='utf-8')
|
141 |
return df[:job_num]
|
142 |
|
143 |
-
|
|
|
53 |
r= r+ ', ' +loc[:-1][i].strip()
|
54 |
location.append(r.replace(',', '', 1).strip())
|
55 |
country.append(loc[-1].strip())
|
56 |
+
#print("---",Title[x].find('a').attrs['href'])
|
57 |
+
links.append(Title[x].find('a').attrs['href'])
|
58 |
m = " ".join(re.findall("[a-zA-Z\d+]+", (soup.find_all('div' , {'class': 'css-d7j1kk'})[x].find('a').text)))
|
59 |
company_name.append(m)
|
60 |
c = soup.find_all('div' ,{'class':'css-1lh32fc'})[x].find_all('span')
|
|
|
86 |
# to get the logo of the company
|
87 |
# Fetch the company logo
|
88 |
try:
|
89 |
+
#print(links[x])
|
90 |
data1 = requests.get(links[x])
|
91 |
data1.raise_for_status() # Check for HTTP errors
|
92 |
soup1 = BeautifulSoup(data1.content, 'html.parser')
|
|
|
142 |
df[:job_num].to_excel('WUZZUF_scrapping.xlsx',index=False,encoding='utf-8')
|
143 |
return df[:job_num]
|
144 |
|
|