# LinkedIn Job Posting Scraper """ -------------------------------- # Pre-requisites # Install selenium # Install chromedriver_autoinstaller # Install pandas # Install BeautifulSoup -------------------------------- """ # Import necessary packages and libraries import time import random import pandas as pd from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import chromedriver_autoinstaller # Install the latest chromedriver if necessary chromedriver_autoinstaller.install() # Configure Chrome options options = webdriver.ChromeOptions() options.add_argument("--start-maximized") # Launch Chrome browser browser = webdriver.Chrome(options=options) # Open LinkedIn job search page (modify keywords as needed) browser.get(f'https://www.linkedin.com/jobs/search/?keywords=Business%20Analyst&location=Toronto&position=1&pageNum=0') # Set the number of pages to scrape pages = 100 # Loop through the specified number of pages to retrieve job postings for i in range(pages): print(f'Scraping page {i + 1}') browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") try: # Click on the "see more jobs" button if present element = WebDriverWait(browser, 5).until( EC.presence_of_element_located( (By.XPATH, "//button[text()='See more jobs']") ) ) element.click() except Exception: pass # Scrape job postings jobs = [] soup = BeautifulSoup(browser.page_source, "html.parser") job_listings = soup.find_all("div", class_="base-card") for job in job_listings: job_title = job.find("h3", class_="base-search-card__title").text.strip() job_company = job.find("h4", class_="base-search-card__subtitle").text.strip() job_location = job.find("span", class_="job-search-card__location").text.strip() apply_link = job.find("a", class_="base-card__full-link")["href"] job_ID = apply_link.split('?')[0][-10:] browser.get(apply_link) time.sleep(random.choice(range(5, 11))) try: description_soup = BeautifulSoup(browser.page_source, "html.parser") job_description = description_soup.find("div", class_="jobs-apply-button--top-card").text.strip() except AttributeError: job_description = None jobs.append({ "job ID": job_ID, "title": job_title, "company": job_company, "location": job_location, "link": apply_link, "job description": job_description }) # Save data into a CSV file df = pd.DataFrame(jobs) df.to_csv("jobs.csv", index=False)