python-3.x Selenium无法向下滚动

holgip5t  于 2022-11-19  发布在  Python
关注(0)|答案(2)|浏览(188)

我正在使用Selenium从here抓取数据。网站正在使用一些动画来显示您向下滚动后的部分。我正在尝试向下滚动到页脚,并等待动画从页面中获取数据。
虽然我不确定这是否是获得数据的唯一方法,因为我可以看到动画只是将类aos-animate添加到主类中,如果该类不在HTML元素中,它就不会获得文本!
get_service_data函数中,我试图向下滚动到页面的末尾。我试图在开始循环之前向下滚动。
我试探着:

driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
html = driver.find_element(By.CLASS_NAME, 'html')
html.send_keys(Keys.END)
html.send_keys(Keys. PAGE_DOWN)

copyright = driver.find_element(By.CLASS_NAME, 'copyright')
driver.execute_script("arguments[0].scrollIntoView();", copyright)

以下是我的完整脚本:

import os 
import time
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys

language = "en" # to take this from the user 
main_link = f"https://www.atlp.ae/{language}"

driver_path = os.path.join(os.getcwd(), "chromedriver")
# options = webdriver.ChromeOptions()
# options.headless = True

driver = webdriver.Chrome(driver_path) #  options=options
driver.maximize_window()

def get_services_links():
    links = []
    driver.get(main_link)
    services_header_xpath = '//*[@id="fixed-header"]/div/div[2]/div/nav/ul/li[5]/button'
    driver.find_element(By.XPATH, services_header_xpath).click()
    services_menu_xpath = '//*[@id="serviceInfotitle"]/nav/ul'
    services_menu = driver.find_element(By.XPATH, services_menu_xpath)
    options = services_menu.find_elements(By.TAG_NAME ,"li")
    for option in options:
        a_tag = option.find_element(By.TAG_NAME ,"a")
        links.append(a_tag.get_attribute("href"))
    return links[:-1] if len(links) > 0 else []

def get_service_data(link):
    driver.get(link)
    wait = WebDriverWait(driver, 10)

    service_name_xpath = '//*[@id="main-scrollbar"]/div[1]/main/sc-placeholder/app-intro-section/section/div/div[1]/div[1]/div/p'
    wait.until(EC.visibility_of_element_located((By.XPATH,service_name_xpath)))
    service_name = driver.find_element(By.XPATH, service_name_xpath).text
    print("Service Name: ", service_name)
    
    # row serviceSubsetRow ng-star-inserted
    wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'ServiceSubsetWrapper')))
    services_wrapper = driver.find_element(By.CLASS_NAME, 'ServiceSubsetWrapper')

    container = services_wrapper.find_element(By.CLASS_NAME, 'container')

    service_sections = container.find_elements(By.CLASS_NAME, 'serviceSubsetRow')
    for service in service_sections:
        textual_div = service.find_element(By.CLASS_NAME, 'textCol')
        something = textual_div.find_element(By.CLASS_NAME, 'serviceSubsetTitle')
        print("Text: ", something.text)
  

if __name__ == '__main__':
    # try:
    links = get_services_links()
    for link in links: 
        get_service_data(link) 
        break
    driver.quit()
jchrr9hc

jchrr9hc1#

你需要的是:something.get_attribute('innerText'),因为可能由于添加了动画,常规的text无法工作。
此外,我还删除了一些我认为不需要的行(至少在本练习中是这样)。

def get_service_data(link):
    driver.get(link)
    wait = WebDriverWait(driver, 10)

    service_name_xpath = '//*[@id="main-scrollbar"]/div[1]/main/sc-placeholder/app-intro-section/section/div/div[1]/div[1]/div/p'
    wait.until(EC.visibility_of_element_located((By.XPATH, service_name_xpath)))
    service_name = driver.find_element(By.XPATH, service_name_xpath).text
    print("Service Name: ", service_name)
    # ---- removed these lines --------
    # row serviceSubsetRow ng-star-inserted
    # wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'ServiceSubsetWrapper')))
    # services_wrapper = driver.find_element(By.CLASS_NAME, 'ServiceSubsetWrapper')
    #
    # container = services_wrapper.find_element(By.CLASS_NAME, 'container')
    # service_sections = container.find_elements(By.CLASS_NAME, 'serviceSubsetRow')
    # ----- End of lines removal ----------
   # Clicking out the cookie acceptance button
    try:
        driver.find_element(By.XPATH, "//*[@class='cc-btn cc-allow']").click()
    except:
        print("nothing there")
    # --- removed these lines
    # for service in service_sections:
    #     textual_div = service.find_element(By.CLASS_NAME, 'textCol')
    #     time.sleep(3)
    # --- end of lines removal ---------
    # These are my lines here from below:
    somethings = driver.find_elements(By.XPATH, "//*[contains(@class, 'serviceSubsetTitle')]")
    print(len(somethings))
    for something in somethings:
        # time.sleep(2)
        title_txt = something.get_attribute('innerText')
        print(title_txt)

输出如下:

Service Name:  Sea Services
5
Vessel Management and Marine Services
Passenger Handling and Cargo Operations
Issuance of Certificates and Approvals in Ports
Ports Licensing
Property Leasing Services - Ports

Process finished with exit code 0
44u64gxh

44u64gxh2#

这是向下滚动页面的一种方法:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys

chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")

webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)

url = 'https://www.atlp.ae/en'
browser.get(url) 
browser.execute_script('window.scrollBy(0, 100);')
cookie_b = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//a[@aria-label='deny cookies']")))
cookie_b.click()
body = WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, "main-scrollbar")))
body.click()
body.send_keys(Keys.END)
print('scrolled down')

安装程序是chrome/chromedriver在linux上,但它可以适应您的系统,只需观察导入,并定义浏览器/驱动程序后的代码。Selenium文档:https://www.selenium.dev/documentation/

相关问题