pandas 检索要显示的数据时出错,AttributeError:“NoneType”对象没有属性“iloc”

kwvwclae  于 2023-03-16  发布在  其他
关注(0)|答案(1)|浏览(131)

我有一个小脚本,当代码都在一个文件中时,它可以正常工作(这个细节很重要)。现在,我想将脚本放在带有外部文件的Tabcontrol中,所以我将脚本代码放在外部文件page1.py中,该文件由main.py文件的Tabcontrol打开
现在我得到这个错误:

item = df.iloc[selection[-1]]
AttributeError: 'NoneType' object has no attribute 'iloc'

我什么时候会遇到这个错误?代码是用来在我点击View Title s按钮时显示一个站点的所有新闻标题的列表的(这个功能目前工作正常)。我在选择一个标题行并点击View Content按钮时遇到这个问题
代码中的问题出在哪里?问题出在page1.py文件的content函数中:

def content(event=None): 
            selection = listbox_title.curselection()
            print('selection:', selection)

            if selection:
                item = df.iloc[selection[-1]]
                url = item['link']

考虑到脚本,以前,正确的罚款在一个单一的文件。代码现在是相同的,但用于标签控件。如果尝试使用print(df)它打印罚款正确。

主文件.py

import tkinter as tk
from tkinter import ttk
from tkinter.scrolledtext import ScrolledText

from page1 import Page1

root = tk.Tk()
root.geometry('480x720')
style = ttk.Style()
style.theme_use('default')

nb = ttk.Notebook(root)
nb.pack(fill='both', expand=1)

page1 = Page1(nb)

nb.add(page1, text='Tab1', compound='left')

root.mainloop()

问题出在www.example.com文件的content函数中page1.py。

页码.py

import tkinter as tk   
from tkinter import ttk
from tkinter.scrolledtext import ScrolledText
import requests
import requests_cache 
from bs4 import BeautifulSoup
import pandas as pd
import re
import json
from dateutil import parser
import datetime
import locale

class Page1(tk.Frame):
    def __init__(self, master, **kw):
        super().__init__(master, **kw)

        def get(place):
            headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
            }

            results = []
            response = requests.get(f'https://www.example.com/{place}/', headers=headers)
            soup = BeautifulSoup(response.content, 'html.parser')

            ####################################################################################
            #Cover
            cover_news = soup.find('div', {'class':'box pp'})
            link = cover_news.find('a', href=True)['href']
            coverNewsResponse = requests.get(link, headers=headers)
            coverSoup = BeautifulSoup(coverNewsResponse.content, 'html.parser')
            jsonStr = str(coverSoup.find('script'))
            jsonStr = re.search('({.*})', jsonStr).group(1)
            jsonData = json.loads(jsonStr)
            
            timePublished = parser.parse(jsonData['datePublished']).strftime("%H:%M")
            datePublished = parser.parse(jsonData['datePublished']).strftime("%Y-%m-%d")
            title = jsonData['headline']
            news = f" {timePublished} {place.upper()}, {title} (TMW)"
            results.append( [datePublished, timePublished, place, title, news, link] )


            ####################################################################################
            # Side panel
            side_news = soup.find_all('div', {'class':'box small'})
            for each in side_news:
                try:
                    link = each.find('a', href=True)['href']
                    sideNewsResponse = requests.get(link, headers=headers)
                    sideSoup = BeautifulSoup(sideNewsResponse.content, 'html.parser')
                    jsonStr = str(sideSoup.find('script'))
                    jsonStr = re.search('({.*})', jsonStr).group(1)
                    jsonData = json.loads(jsonStr)
                    
                    timePublished = parser.parse(jsonData['datePublished']).strftime("%H:%M")
                    datePublished = parser.parse(jsonData['datePublished']).strftime("%Y-%m-%d")
                    title = jsonData['headline']
                    news = f" {timePublished} {place.upper()}, {title} (TMW)"
                    results.append( [datePublished, timePublished, place, title, news, link] )
                except:
                    print("Error")
                    continue
                    
                
            ####################################################################################
            #Center
            news = soup.find_all('div', attrs={"class": "tcc-list-news"})

            for each in news:
                for div in each.find_all("div"):
                    timePublished  = div.find('span', attrs={'class': 'hh serif'}).text
                    datePublished = div.find_previous('div', {'class':'tcc-border upper date'})
                    
                    if datePublished:
                        if datePublished.text in ['Ieri']:
                            yesterday = datetime.datetime.today() - datetime.timedelta(days = 1)
                            datePublished = yesterday.strftime("%Y-%m-%d")
                        else:
                            locale.setlocale(locale.LC_ALL, 'de_DE')
                            currentYear = datetime.datetime.today().strftime("%Y")
                            dateStr = datePublished.text
                            dateStr = datetime.datetime.strptime(dateStr + ' ' + currentYear, '%A %d %B %Y')
                            datePublished = dateStr.strftime("%Y-%m-%d")
                    else:
                        datePublished = datetime.datetime.today().strftime("%Y-%m-%d")
                    
                    title = " ".join(span.text for span in div.select("a > span"))
                    news = f" {timePublished} {place.upper()}, {title} (TMW)" #rigo della notizia
                    link  = div.find('a')['href']
                    results.append( [datePublished, timePublished, place, title, news, link] )

            return results

        ####################################################################################
        #Titles
        def all_titles():
            global df

            allnews = []
            new = []

            for place in ['name1', 'name2']:         
                results = get(place)
                allnews += results
                text_download.insert('end', f"search: {place}\nfound: {len(results)}\n")

                for place in ['name1']:
                    for sublist in allnews:
                        for item in sublist:
                            if "Name1" in item:
                                new += [sublist]

                for place in ['name2']:
                    for sublist in allnews:
                        for item in sublist:
                            if "Name2" in item:
                                new += [sublist]

            df = pd.DataFrame(new, columns=['date', 'time', 'place', 'title', 'news', 'link'])
            df = df.sort_values(by=['date', 'time', 'place', 'title'], ascending=[False, False, True, True])
            df = df.drop_duplicates(subset=['date', 'time', 'place', 'title'])
            df = df.reset_index(drop=True)

            listbox_title.delete('0', 'end')

            for index, row in df.iterrows():
                #Name1
                for place in ['name1'] 
                     if "Example" in row['news']:          
                         listbox_title.insert('end', row['news'])

        def content(event=None): 
            selection = listbox_title.curselection()
            print('selection:', selection)

            if selection:
                item = df.iloc[selection[-1]]
                url = item['link']

                headers = {
                    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
                }

                session = requests_cache.CachedSession('titles')
                response = session.get(url, headers=headers)
                soup = BeautifulSoup(response.content, 'html.parser')

                content_download = "\n".join(item.get_text() for item in soup.select("div.text.mbottom"))

                text_download.delete('1.0', 'end')
                text_download.insert('end', content_download)

        df = None

        #GUI
        frame_title = tk.Frame(self)
        frame_title.pack(fill='both', expand=True, pady=5, padx=5)

        listbox_title = tk.Listbox(frame_title, selectbackground="#960000", selectforeground="white", bg="white")
        listbox_title.pack(side='left', fill='both', expand=True)

        scrollbar_title = tk.Scrollbar(frame_title)
        scrollbar_title.pack(side='left', fill='y')

        scrollbar_title['command'] = listbox_title.yview
        listbox_title.config(yscrollcommand=scrollbar_title.set)

        listbox_title.bind('<Double-Button-1>', content)

        text_download = ScrolledText(self, bg="white")
        text_download.pack(fill='both', expand=True, pady=0, padx=5)

        buttons_frame = tk.Frame(self)
        buttons_frame.pack(fill='x')

        button1 = tk.Button(buttons_frame, text="View Titles", command=all_titles) 
        button1.pack(side='left', pady=5, padx=5)

        button2 = tk.Button(buttons_frame, text="View Content", command=content)  
        button2.pack(side='left', pady=5, padx=(0,5))
drnojrws

drnojrws1#

既然你已经在代码中使用了类,我建议你使用示例变量而不是全局变量:

class Page1(tk.Frame):
    def __init__(self, master, **kw):
        ...
        def all_titles():
            #global df   # don't use global
            ...
            df = pd.DataFrame(new, columns=['date', 'time', 'place', 'title', 'news', 'link'])
            df = df.sort_values(by=['date', 'time', 'place', 'title'], ascending=[False, False, True, True])
            df = df.drop_duplicates(subset=['date', 'time', 'place', 'title'])
            self.df = df.reset_index(drop=True)  ### df to self.df in LHS

            listbox_title.delete('0', 'end')

            for index, row in self.df.iterrows():   ### df to self.df
                #Name1
                for place in ['name1']:
                     if "Example" in row['news']:
                         listbox_title.insert('end', row['news'])

        def content(event=None):
            selection = listbox_title.curselection()
            print('selection:', selection)

            if selection:
                item = self.df.iloc[selection[-1]]  ### df to self.df
                url = item['link']

                headers = {
                    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
                }

                session = requests_cache.CachedSession('titles')
                response = session.get(url, headers=headers)
                soup = BeautifulSoup(response.content, 'html.parser')

                content_download = "\n".join(item.get_text() for item in soup.select("div.text.mbottom"))

                text_download.delete('1.0', 'end')
                text_download.insert('end', content_download)

        self.df = None  ### df to self.df

        ...

相关问题