Yassmen commited on
Commit
f6f5e88
1 Parent(s): f707ac6

Update wuzzuf_scraper.py

Browse files
Files changed (1) hide show
  1. wuzzuf_scraper.py +3 -2
wuzzuf_scraper.py CHANGED
@@ -53,7 +53,8 @@ def Wuzzuf_scrapping(job_type , job_num,driver):
53
  r= r+ ', ' +loc[:-1][i].strip()
54
  location.append(r.replace(',', '', 1).strip())
55
  country.append(loc[-1].strip())
56
- links.append('https://wuzzuf.net' + Title[x].find('a').attrs['href'])
 
57
  m = " ".join(re.findall("[a-zA-Z\d+]+", (soup.find_all('div' , {'class': 'css-d7j1kk'})[x].find('a').text)))
58
  company_name.append(m)
59
  c = soup.find_all('div' ,{'class':'css-1lh32fc'})[x].find_all('span')
@@ -85,6 +86,7 @@ def Wuzzuf_scrapping(job_type , job_num,driver):
85
  # to get the logo of the company
86
  # Fetch the company logo
87
  try:
 
88
  data1 = requests.get(links[x])
89
  data1.raise_for_status() # Check for HTTP errors
90
  soup1 = BeautifulSoup(data1.content, 'html.parser')
@@ -140,4 +142,3 @@ def Wuzzuf_scrapping(job_type , job_num,driver):
140
  df[:job_num].to_excel('WUZZUF_scrapping.xlsx',index=False,encoding='utf-8')
141
  return df[:job_num]
142
 
143
-
 
53
  r= r+ ', ' +loc[:-1][i].strip()
54
  location.append(r.replace(',', '', 1).strip())
55
  country.append(loc[-1].strip())
56
+ #print("---",Title[x].find('a').attrs['href'])
57
+ links.append(Title[x].find('a').attrs['href'])
58
  m = " ".join(re.findall("[a-zA-Z\d+]+", (soup.find_all('div' , {'class': 'css-d7j1kk'})[x].find('a').text)))
59
  company_name.append(m)
60
  c = soup.find_all('div' ,{'class':'css-1lh32fc'})[x].find_all('span')
 
86
  # to get the logo of the company
87
  # Fetch the company logo
88
  try:
89
+ #print(links[x])
90
  data1 = requests.get(links[x])
91
  data1.raise_for_status() # Check for HTTP errors
92
  soup1 = BeautifulSoup(data1.content, 'html.parser')
 
142
  df[:job_num].to_excel('WUZZUF_scrapping.xlsx',index=False,encoding='utf-8')
143
  return df[:job_num]
144