Spaces:

Yassmen
/

Job.web.scrapping

Sleeping

Yassmen commited on 21 days ago

Commit

f6f5e88

•

1 Parent(s): f707ac6

Update wuzzuf_scraper.py

Files changed (1) hide show

wuzzuf_scraper.py CHANGED Viewed

@@ -53,7 +53,8 @@ def Wuzzuf_scrapping(job_type , job_num,driver):
           r= r+ ', ' +loc[:-1][i].strip()
         location.append(r.replace(',', '', 1).strip())
         country.append(loc[-1].strip())
-        links.append('https://wuzzuf.net' + Title[x].find('a').attrs['href'])
         m = " ".join(re.findall("[a-zA-Z\d+]+", (soup.find_all('div' , {'class': 'css-d7j1kk'})[x].find('a').text)))
         company_name.append(m)
         c = soup.find_all('div' ,{'class':'css-1lh32fc'})[x].find_all('span')
@@ -85,6 +86,7 @@ def Wuzzuf_scrapping(job_type , job_num,driver):
   # to get the logo of the company
         # Fetch the company logo
         try:
             data1 = requests.get(links[x])
             data1.raise_for_status()  # Check for HTTP errors
             soup1 = BeautifulSoup(data1.content, 'html.parser')
@@ -140,4 +142,3 @@ def Wuzzuf_scrapping(job_type , job_num,driver):
     df[:job_num].to_excel('WUZZUF_scrapping.xlsx',index=False,encoding='utf-8')
     return df[:job_num]

           r= r+ ', ' +loc[:-1][i].strip()
         location.append(r.replace(',', '', 1).strip())
         country.append(loc[-1].strip())
+        #print("---",Title[x].find('a').attrs['href'])
+        links.append(Title[x].find('a').attrs['href'])
         m = " ".join(re.findall("[a-zA-Z\d+]+", (soup.find_all('div' , {'class': 'css-d7j1kk'})[x].find('a').text)))
         company_name.append(m)
         c = soup.find_all('div' ,{'class':'css-1lh32fc'})[x].find_all('span')
   # to get the logo of the company
         # Fetch the company logo
         try:
+            #print(links[x])
             data1 = requests.get(links[x])
             data1.raise_for_status()  # Check for HTTP errors
             soup1 = BeautifulSoup(data1.content, 'html.parser')
     df[:job_num].to_excel('WUZZUF_scrapping.xlsx',index=False,encoding='utf-8')
     return df[:job_num]