from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome()
driver.get("https://www.linkedin.com/jobs/search/?currentJobId=3354966649&geoId=103644278&keywords=Software%20Engineer"
"&location=United%20States&refresh=true")
try:
main = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located(
(By.ID, "main")))
jobList = main.find_elements(By.CLASS_NAME, "scaffold-layout__list-container")
for companyName in jobList:
name = companyName.find_element(By.XPATH, "/html/body/div[5]/div[3]/div[4]/div/div/main/div/section[1]/div/ul/li[1]/div/div[1]/div[1]/div[2]/div[2]/a")
print(name.text)
except:
driver.quit()
我似乎找不到打印公司名称的方法(我也希望向下移动列表,抓取比如20家公司的名称)。我希望对其他属性也这样做,比如职位,但是我被难住了。LinkedIn链接:https://www.linkedin.com/jobs/search/?currentJobId=3354951485&geoId=103644278&keywords=Software%20Engineer&location=United%20States&refresh=true
1条答案
按热度按时间kokeuurv1#
1.最好还是登录linkedin来了解真实的情况。
1.您必须以正确的方式选择正确的元素定位器策略
1.你必须滚动页面才能抓取页面中的所有元素
完整的工作代码及示例:
输出: