python-3.x Selenium购物网站上商品价格自动排序的问题和获取商品信息的问题

pb3s4cty  于 2023-03-04  发布在  Python
关注(0)|答案(1)|浏览(147)

价格跟踪器代码的一部分:

# libraries
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import matplotlib.pyplot as plt
from time import sleep
import inspect
import os
from bs4 import BeautifulSoup
import requests

# Get the search term and tracking period from the user
search_term = input("Please enter the name of the product you want to search: ")
months =input("Please enter the number of months you want to track the product: ")

# To ensure that the user enters a non-string value 
while not months.isdigit():
    print("Warning: Please enter a valid integer value for the number of months.")
    months = input("Please enter the number of months you want to track the product: ")
months = int(months)

# Start the web driver and go to the Hepsiburada homepage
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)

module_path="C:/Users/Desktop/hepsiburada_price_tracker/chromedriver.exe"

driver = webdriver.Chrome(executable_path=module_path,options=chrome_options)
driver.maximize_window()
driver.get("https://www.hepsiburada.com/")

# Accept cookies
driver.find_element_by_id('onetrust-accept-btn-handler').click()
sleep(3)


# Enter the search term in the search box and press Enter
wait = WebDriverWait(driver, 15)
search_box = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'theme-IYtZzqYPto8PhOx3ku3c')))

search_box.send_keys(search_term)
search_box.send_keys(Keys.RETURN)

# Wait for search results and select the first product
sleep(3)  # Sayfanın yüklenmesi için birkaç saniye bekleyin

# Click on the order button
driver.find_element_by_class_name('horizontalSortingBar-Ce404X9mUYVCRa5bjV4D').click()
sleep(3)

# Sort by increasing price
driver.find_element_by_class_name('horizontalSortingBar-PkoDOH7UsCwBrQaQx9bn').click()
sleep(3)

# Get the link, name, and price of the first product in the search results
results = driver.find_elements_by_xpath("//h3[@data-test-id='product-card-name']")
if not results:
    print("Sorry, we could not find the product you were looking for.")
    #driver.quit()
else:
    first_result0 = results[0]
    first_result= first_result0.text
    print(first_result)
    product_link = first_result.find_element(By.XPATH, ".//a[@data-productid]")
    product_url = product_link.get_attribute("href")
    product_name = first_result.find_element(By.XPATH, ".//h3").text
    print("The product selected from the search results is {}: {}".format(product_name, product_url))

您好,我想查询www.example.com网站上搜索到的具体产品的最低价格信息Hepsiburada.com.tr如:arko krem 20cc(arko cream 20cc),但我有两个相互关联的问题。
1.当你手动进入这个网站并按升序排列商品时,它能正确排序,但当你自动测试它时,它不能排序。实际上代码点击了正确的按钮(Sıralama〉order,artan〉ascending),但排序是错误的。这是可以修复的吗?
1.我找不到页面上的产品数量(例如115 20毫升的arko krem(arko奶油)),链接和卖家的名称。我想得到这些信息的集体,然后我想得到列表中的第一个结果分别,因为我排序的产品价格从最小到最大我怎么才能访问他们与 selenium ?
我有(first_result)信息,但无法处理其他信息。

手动订购的价格:

卖方名称:

gr8qqesn

gr8qqesn1#

我试过使用Chrome、Edge和Firefox的webdrivers,但都有一个大问题,当你向下滚动或点击加载更多按钮时,它们不会加载新文章。幸运的是,我找到了一个可行的替代方案,它叫做未检测 selenium 。

import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = uc.ChromeOptions()
options.add_argument('--blink-settings=imagesEnabled=false') # disable images so that pages load faster
options.add_argument('--disable-notifications')
driver = uc.Chrome(options=options)

search_term = 'pınar süt 1lt'
url = 'https://www.hepsiburada.com/ara?q=' + search_term.replace(' ','+')
driver.get(url)
wait = WebDriverWait(driver, 15)

# load all products
number_of_products = int(wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, 'searchResultSummaryBar-AVnHBWRNB0_veFy34hco')))[1].text)
number_of_loaded_products = 0
while number_of_loaded_products < number_of_products:
    loaded_products = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'li[class*=productListContent][id]')))
    number_of_loaded_products = len(loaded_products)
    driver.execute_script('arguments[0].scrollIntoView({block: "center", behavior: "smooth"});', loaded_products[-1])

# Get the link, name, price and seller of all the products
product = {key:[] for key in ['name','price','seller','url']}
product['name']  = [h3.text for h3 in driver.find_elements(By.CSS_SELECTOR, 'h3[data-test-id=product-card-name]')]
product['url']   = [a.get_attribute('href') for a in driver.find_elements(By.CSS_SELECTOR, 'a[class*=ProductCard]')]
product['price'] = [float(div.text.replace('TL','').replace(',','.')) for div in driver.find_elements(By.CSS_SELECTOR, 'div[data-test-id=price-current-price]')]
for i,url in enumerate(product['url']):
    print(f'Search seller names {i+1}/{number_of_loaded_products}', end='\r')
    driver.get(url)
    product['seller'] += [wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.seller a'))).text]
    product['url'][i] = driver.current_url # useful to replace some long urls

# Sort by price in ascending order
import pandas as pd
product = pd.DataFrame(product).sort_values(by='price').to_dict('list')

print(f"\nThe product selected from the search results is:"+
      f"\nname:   {product['name'][0]}"+
      f"\nprice:  {product['price'][0]}"+
      f"\nseller: {product['seller'][0]}"+
      f"\nurl:    {product['url'][0]}")

产出

Search seller names 37/37
The product selected from the search results is:
name:   Pınar Yağlı Süt 1 L
price:  27.9
seller: PınarOnline
url:    https://www.hepsiburada.com/pinar-yagli-sut-1-l-p-ZYBICN9286394?magaza=P%C4%B1narOnline

相关问题