无法在python中使用chrome下载PDF

mec1mxoz  于 2023-05-04  发布在  Go
关注(0)|答案(2)|浏览(249)

在Python中使用Selenium。当我点击页面上可见的第一个PDF图标时。而不是正在下载的PDF文件有一个页面上的“打开”按钮。我尝试使用ID打开按钮点击打开按钮,但似乎不起作用。

from selenium import webdriver
import time
from selenium.webdriver.common.by import By

options=webdriver.ChromeOptions()
prefs={"download.default_directory":"C:\\path",
"plugins.always_open_pdf_externally": True,
"download.directory_upgrade": True}
options.add_experimental_option("prefs",prefs)
driver=webdriver.Chrome(executable_path='chromedriver.exe',options=options)

driver.get('https://ieeexplore.ieee.org/xpl/conhome/10067248/proceeding?isnumber=10067251&sortType=vol-only-seq&rowsPerPage=75&pageNumber=1')
cookie_button = driver.find_element(By.CLASS_NAME,'cc-btn')
cookie_button.click()

time.sleep(20)
pdf_buttons = driver.find_elements(By.XPATH, "//a[@aria-label='PDF']")

for button in pdf_buttons:
    button.click()
    time.sleep(5)
    button.click()
    driver.back()
    break

time.sleep(1000)
62o28rlo

62o28rlo1#

这是因为打开按钮在一个iframe内,该页面特别有2个iframe。试着这样做,切换到第二个iframe,它有按钮,然后点击它

from selenium import webdriver
import time
from selenium.webdriver.common.by import By

options=webdriver.ChromeOptions()
prefs={"download.default_directory":"C:\\path",
"plugins.always_open_pdf_externally": True,
"download.directory_upgrade": True}
options.add_experimental_option("prefs",prefs)
driver=webdriver.Chrome(executable_path='chromedriver.exe',options=options)

driver.get('https://ieeexplore.ieee.org/xpl/conhome/10067248/proceeding?isnumber=10067251&sortType=vol-only-seq&rowsPerPage=75&pageNumber=1')
cookie_button = driver.find_element(By.CLASS_NAME,'cc-btn')
cookie_button.click()

time.sleep(20)
pdf_buttons = driver.find_elements(By.XPATH, "//a[@aria-label='PDF']")[0].click()

# for button in pdf_buttons:
#     button.click()
#     time.sleep(5)
#     button.click()
#     driver.back()
#     break

time.sleep(20)

driver.switch_to.frame(driver.find_elements(By.TAG_NAME,"iframe")[1])

open_button = driver.find_element(By.ID,"open-button")
open_button.click()

time.sleep(20)
wkyowqbh

wkyowqbh2#

您可以尝试以下代码,它使用ChromeOptions在Chrome中禁用PDF下载提示。我还修改了您的代码,以使用显式等待元素。当我们点击PDF下载按钮时,它将打开一个带有下载选项的新页面。然后,我们切换到iframe与下载按钮,点击它下载文件,并等待5秒,它完成
在给定的代码中,在下载PDF并返回到原始页面后,代码重新获取带有标签“PDF”的链接列表。这样做是为了更新页面上的链接列表,因为以前下载的PDF链接将不再存在于DOM中,并且如果我们再次尝试访问它,则会发生陈旧元素异常。

from selenium import webdriver
import time
from selenium.webdriver.common.by import By

options = webdriver.ChromeOptions()
preferences = {
    "download.prompt_for_download": False,
    "plugins.always_open_pdf_externally": True,
    "download.open_pdf_in_system_reader": False,
    "profile.default_content_settings.popups": 0,
    "download.default_directory": "your download folder path here"
}
options.add_experimental_option("prefs", preferences)
driver = webdriver.Chrome(executable_path='chromedriver.exe', options=options)
driver.get(
    'https://ieeexplore.ieee.org/xpl/conhome/10067248/proceeding?isnumber=10067251&sortType=vol-only-seq&rowsPerPage=75&pageNumber=1')
cookie_button = driver.find_element(By.CLASS_NAME, 'cc-btn')
cookie_button.click()
time.sleep(20)
pdf_buttons = driver.find_elements(By.XPATH, "//a[@aria-label='PDF']")
for i in range(len(pdf_buttons)):
    # Click on the link
    WebDriverWait(driver, 20).until(EC.element_to_be_clickable(pdf_buttons[i]))
    driver.execute_script("arguments[0].scrollIntoView()", pdf_buttons[i])
    pdf_buttons[i].click()
    frame = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//iframe[@src]")))
    driver.switch_to.frame(frame)
    down = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//a[@href]")))
    down.click()
    time.sleep(5)
    driver.back()
    pdf_buttons = driver.find_elements(By.XPATH, "//a[@aria-label='PDF']")
# Re-fetch the list of links

相关问题