Python请求,下载文件

dzjeubhm  于 2022-11-26  发布在  Python
关注(0)|答案(1)|浏览(166)

我试图访问我的GPU驱动程序的AMD下载链接,但该网站检测到我没有使用浏览器,并将我重定向到他们论坛中的一个页面...('https://www.amd.com/fr/support/kb/faq/download-incomplete')
如何“欺骗”他们的系统访问它?
我试着用网站的cookie嵌入标题,但是没有用。下面是我的代码:

from requests import get
from bs4 import BeautifulSoup

header = {
    "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Mobile Safari/537.36"}

url = 'https://www.amd.com/fr/support/graphics/amd-radeon-6000-series/amd-radeon-6900-series/amd-radeon-rx-6900-xt'
req = get(url, headers=header)
latestlink = BeautifulSoup(req.text, 'html.parser').find("a", {"class":"btn-transparent-black"})['href']

for key in req.headers:
    header[key]= req.headers[key]

dlpage = get(latestlink, headers=header).url
print(dlpage)   #Actually the forum page
soat7uwm

soat7uwm1#

如果您需要下载链接,请尝试以下操作:

import requests
from bs4 import BeautifulSoup

headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:104.0) Gecko/20100101 Firefox/104.0",
}

url = 'https://www.amd.com/fr/support/graphics/amd-radeon-6000-series/amd-radeon-6900-series/amd-radeon-rx-6900-xt'
soup = (
    BeautifulSoup(requests.get(url, headers=headers).text, 'lxml')
    .find_all("a", href=True)
)
links = [i["href"] for i in soup if i["href"].endswith(".exe")]
print("\n".join(links))

这将输出:

https://drivers.amd.com/drivers/amd-software-adrenalin-edition-22.10.3-win10-win11-oct28.exe
https://drivers.amd.com/drivers/whql-amd-software-adrenalin-edition-22.5.1-win10-win11-may10.exe
https://drivers.amd.com/drivers/prographics/amd-software-pro-edition-22.q3-win10-win11.exe
https://drivers.amd.com/drivers/installer/22.20/beta/amd-software-adrenalin-edition-22.10.3-minimalsetup-221027_web.exe
https://drivers.amd.com/drivers/amd-software-adrenalin-edition-22.10.3-win10-win11-oct28.exe
https://drivers.amd.com/drivers/whql-amd-software-adrenalin-edition-22.5.1-win10-win11-may10.exe
https://drivers.amd.com/drivers/prographics/amd-software-pro-edition-22.q3-win10-win11.exe
https://drivers.amd.com/drivers/installer/22.20/beta/amd-software-adrenalin-edition-22.10.3-minimalsetup-221027_web.exe
https://drivers.amd.com/drivers/radeon-software-adrenalin-2020-22.6.1-win7-64bit-june23-2022.exe

如果您要下载驱动程序,请使用以下命令:

import os
from pathlib import Path
from shutil import copyfileobj

import requests
from bs4 import BeautifulSoup

headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:104.0) Gecko/20100101 Firefox/104.0",
}

def download_file(source_url: str, directory: str) -> None:
    os.makedirs(directory, exist_ok=True)
    save_dir = Path(directory)
    file_name = save_dir / source_url.rsplit('/', 1)[-1]
    with s.get(source_url, stream=True) as file, open(file_name, "wb") as output:
        copyfileobj(file.raw, output)

with requests.session() as s:
    url = 'https://www.amd.com/fr/support/graphics/amd-radeon-6000-series/amd-radeon-6900-series/amd-radeon-rx-6900-xt'
    s.headers.update(headers)
    soup = BeautifulSoup(s.get(url).text, 'lxml').find_all("a", href=True)
    links = [i["href"] for i in soup if i["href"].endswith(".exe")]
    print("\n".join(links))
    for link in links:
        download_file(link, "downloads")

相关问题