通过 selenium 定位元素显式等待和不给出0

iswrvxsc  于 2022-11-10  发布在  其他
关注(0)|答案(1)|浏览(145)

使用这个url,我想找到属性为data-asin的div标记。当我在Chrome检查模式下使用//div[@data-asin]时,它给出了21个元素。但是,当试图以这两种方式通过Selify获取这些元素时,显式等待和直接长度得到的结果是0。正如我猜测的那样,Selify远程浏览器无法将这些元素中的任何一个作为DOM树获取。代码如下

import pandas as pd
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

# reading from csv file url-s

def readCSV(path_csv):
    df=pd.read_csv(path_csv)
    return df

fileCSV=readCSV(r'C:\Users\Admin\Downloads\urls.csv')
length_of_column_urls=fileCSV['linkamazon'].last_valid_index()

def create_driver():
    chrome_options = Options()
    chrome_options.headless = True
    chrome_options.add_argument("start-maximized")
    # options.add_experimental_option("detach", True)
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    chrome_options.add_argument('--disable-blink-features=AutomationControlled')

    webdriver_service = Service(r'C:\Users\Admin\Downloads\chromedriver107v\chromedriver.exe')
    driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)

    return driver

# going to urls 1-by-1

def goToUrl_Se(driver):
    global counter
    counter = 0
    for i in range(0, length_of_column_urls + 1):
        xUrl = fileCSV.iloc[i, 1]
        print(xUrl,i)
        # going to url(amazn) via Selenium WebDriver
        driver.get(xUrl)
        parse_data()
        counter+=1
    driver.quit()

# fetch-parse the data from url page

def parse_data():
    global asin, title, bookform, priceNewProd,author
    wait=WebDriverWait(driver,timeout=77)

    try:
        x_index=wait.until(EC.visibility_of_all_elements_located((By.TAG_NAME,'//div[@data-asin]')))###Attention here
        print(len(x_index))
    except:
        y_index=driver.find_elements(By.TAG_NAME,'//div[@data-asin]')###Anf attention here
        print(len(y_index))

driver=create_driver()
goToUrl_Se(driver)
j2cgzkjk

j2cgzkjk1#

您必须提到XPath而不是tag_name:

try:
    x_index=wait.until(EC.visibility_of_all_elements_located((By.XPATH,'//div[@data-asin]')))###Attention here
    print(len(x_index))
except:
    y_index=driver.find_elements(By.XPATH,'//div[@data-asin]')###Anf attention here
    print(len(y_index))

相关问题