Python selenium :将空列表作为搜索结果输出

wn9m85ua  于 2023-01-02  发布在  Python
关注(0)|答案(1)|浏览(158)

这是我的代码。

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

url_list = []
content_list = ""
text = "b%C3%A1nh%20crown"

url_maxpage = "https://shopee.vn/search?brands=3372239&keyword=" + text + "&noCorrection=true&page=0"
driver.get(url_maxpage)

by = webdriver.common.by.By

max_page = driver.find_elements(by=By.CLASS_NAME, value='shopee-mini-page-controller__total')
num=int(max_page[0].text)

for i in range(num):  # Crawl from 1 to max page
    url = "https://shopee.vn/search?brands=3372239&keyword=" + text + "&noCorrection=true&page=" + str(i)
    driver.get(url)
    by = webdriver.common.by.By
    
    time.sleep(0.5)
    
    div_href = driver.find_elements(by.CLASS_NAME, "col-xs-2-4")
    hlink = []
    for i in range(int(len(div_href))):
        hlink_list = div_href[i].find_elements(by.TAG_NAME, "a")[0].get_attribute('href')
        hlink.append(hlink_list)

    
# Remove duplicates in list
my_list = hlink
my_set = set(my_list) 
my_list = list(my_set)
output : []

使用上面的代码,我想遍历页面并抓取每个产品的链接。但我不知道为什么它输出一个空列表。任何帮助将不胜感激。
我修改了for语句,如下所示,但是只有15个输出。

for i in range(num):  # Crawl from 1 to max page
    url = "https://shopee.vn/search?brands=3372239&keyword=" + text + "&noCorrection=true&page=" + str(i)
    driver.get(url)
    wait = WebDriverWait(driver, 20)
    
    SCROLL_PAUSE_SEC = 10

    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        time.sleep(SCROLL_PAUSE_SEC)

        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    
    hlink = []
    elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".col-xs-2-4 a")))
    
    for element in elements:
        hlink_list = element.get_attribute('href')
        hlink.append(hlink_list)
xqkwcwgp

xqkwcwgp1#

您需要等待元素加载完毕。
这将给予你在页面上的前15个hrefs。
要获得所有60个元素,您需要滚动页面,因为最初只加载了15个元素。

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

options = Options()
options.add_argument("start-maximized")

webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)

url = 'https://shopee.vn/search?brands=3372239&keyword=b%C3%A1nh%20crown&noCorrection=true&page=0e'
driver.get(url)
wait = WebDriverWait(driver, 20)
hlink = []
elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".col-xs-2-4 a")))
for element in elements:
    hlink_list = element.get_attribute('href')
    hlink.append(hlink_list)

相关问题