用Python抓取网页-循环URL列表并转换为CSV

6ie5vjzr  于 2023-03-15  发布在  Python
关注(0)|答案(1)|浏览(101)

我有一个URL列表,我想转换并保存到本地驱动器上的CSV。我还想为文件名获取URL的子字符串。这是我目前拥有的代码,但它只将第一个URL数据写入2个单独的文件。

import csv
import requests
from bs4 import BeautifulSoup

link =
['https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_19.html',
        'https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_20.html']

def get_data(link):
    for url in link:
        res = requests.get(url)
        soup = BeautifulSoup(res.text,"lxml")

        for items in soup.select("table.table tr"):
            td = [item.get_text(strip=True) for item in items.select("th,td")]
            writer.writerow(td)

if __name__ == '__main__':
    for f in link:        
        f2 = f.split('audit/')[-1].split('.html')[0]   
        with open(f2 + '.csv',"w",newline="") as infile: 
            writer = csv.writer(infile)
            get_data(link)
1qczuiv0

1qczuiv01#

你不需要在get_data()中再次循环link,你只需要在main循环中把url发送到get_data

import csv
import requests
from bs4 import BeautifulSoup

link = ['https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_19.html',
        'https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_20.html']

def get_data(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.text,"lxml")

    for items in soup.select("table.table tr"):
        td = [item.get_text(strip=True) for item in items.select("th,td")]
        writer.writerow(td)

if __name__ == '__main__':
    for f in link:
        f2 = f.split('audit/')[-1].split('.html')[0]
        with open(f2 + '.csv',"w",newline="") as infile:
            writer = csv.writer(infile)
            get_data(f)

相关问题