这是我的代码。
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
url_list = []
content_list = ""
text = "b%C3%A1nh%20crown"
url_maxpage = "https://shopee.vn/search?brands=3372239&keyword=" + text + "&noCorrection=true&page=0"
driver.get(url_maxpage)
by = webdriver.common.by.By
max_page = driver.find_elements(by=By.CLASS_NAME, value='shopee-mini-page-controller__total')
num=int(max_page[0].text)
for i in range(num): # Crawl from 1 to max page
url = "https://shopee.vn/search?brands=3372239&keyword=" + text + "&noCorrection=true&page=" + str(i)
driver.get(url)
by = webdriver.common.by.By
time.sleep(0.5)
div_href = driver.find_elements(by.CLASS_NAME, "col-xs-2-4")
hlink = []
for i in range(int(len(div_href))):
hlink_list = div_href[i].find_elements(by.TAG_NAME, "a")[0].get_attribute('href')
hlink.append(hlink_list)
# Remove duplicates in list
my_list = hlink
my_set = set(my_list)
my_list = list(my_set)
output : []
使用上面的代码,我想遍历页面并抓取每个产品的链接。但我不知道为什么它输出一个空列表。任何帮助将不胜感激。
我修改了for语句,如下所示,但是只有15个输出。
for i in range(num): # Crawl from 1 to max page
url = "https://shopee.vn/search?brands=3372239&keyword=" + text + "&noCorrection=true&page=" + str(i)
driver.get(url)
wait = WebDriverWait(driver, 20)
SCROLL_PAUSE_SEC = 10
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(SCROLL_PAUSE_SEC)
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
hlink = []
elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".col-xs-2-4 a")))
for element in elements:
hlink_list = element.get_attribute('href')
hlink.append(hlink_list)
1条答案
按热度按时间xqkwcwgp1#
您需要等待元素加载完毕。
这将给予你在页面上的前15个hrefs。
要获得所有60个元素,您需要滚动页面,因为最初只加载了15个元素。