PyMySQL数据库,填充表格的问题

6ioyuze2  于 2022-12-17  发布在  Mysql
关注(0)|答案(1)|浏览(102)

这是我的Pythonmain.py:

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait as wait
import time

from bs4 import BeautifulSoup
import requests

import mysql.connector

db = mysql.connector.connect(                                                                                           
    host="localhost",                                                                                                   
    user="root",
    passwd="root",
    database="testdatabase"                                                                                             
    )

mycursor = db.cursor()

try:
    #mycursor.execute("CREATE DATABASE animus")
    mycursor.execute("CREATE TABLE animus_list (name VARCHAR(255), voti VARCHAR(10), ranks VARCHAR(50),   popolarità VARCHAR(50), membri VARCHAR(50) )")

except:
    #delete_table = "DROP TABLE animus_list"
    #mycursor.execute(delete_table)
    print("Table already exists")

class Datascraping:
    def __init__(self):
        print("Let's start!")

    def MyAnimeList(self):
    
        '''GIVE SELENIUM THE GECKODRIVER LOCATION AND THE WEBSITE TO WORK ON'''
        #---------------------------------------------------------------------------------------------------------------------
        PATH = "C:\Program Files (x86)\chromedriver.exe"

        driver = webdriver.Chrome(PATH)

        driver.get("https://myanimelist.net/anime.php")

        print(driver.title)
     
        '''INPUT TO PYTHON THE ANIME YOU ARE SEARCHING. CAREFUL!!! THE SEARCHBAR OF THAT WEBSITE DOESN'T WORK WITH LESS THAN 3 LETTERS IN IT!!!'''
        #-----------------------------------------------------------------------------------------------------------------------

        selected_anime = str(input("Insert the anime you are searching (MIN 3 CHARACTERS):  "))
        while len(selected_anime) < 3:
            print("The search won't work with less than 3 characters!!")
            selected_anime = input("Insert the anime you are searching (MIN 3 CHARACTERS):  ")

        '''COOKIE BUTTON THAT APPEARS AS SOON AS I OPEN THE WEBSITE. IF I DON'T CLOSE IT, THE HTML OF THE PAGE WON'T BE REACHABLE AND I WILL GET AN EXCEPTION'''
        #-----------------------------------------------------------------------------------------------------------------------
        try:
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "css-47sehv")))
            puls_cookie = driver.find_element(By.CLASS_NAME, "css-47sehv")
            puls_cookie.click()
        except:
            print("The cookie window didn't appear this time!")
        #-----------------------------------------------------------------------------------------------------------------------

        '''WRITE IN THE WEBSITE SEARCHBAR'''
        #-----------------------------------------------------------------------------------------------------------------------
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "q")))                       #searchbar has ID = "q"
        search = driver.find_element(By.ID, "q")
        search.click()
        search.send_keys(anime_selezionato)                                                               #anime_selezionato means "selected anime"
        search.send_keys(Keys.RETURN)
        #-----------------------------------------------------------------------------------------------------------------------


        '''TRY TO OBTAIN THE ANIME WITH THE EXACT NAME THAT WE INSERTED IN anime_selezionato'''
        #-----------------------------------------------------------------------------------------------------------------------
        try:
           driver.get(WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"//table/tbody/tr/td[2]/div/a[strong='{anime_selezionato}']"))).get_attribute("href"))
        #-----------------------------------------------------------------------------------------------------------------------

            '''IF WE DON'T FIND THE ANIME WITH THE EXACT NAME WE INPUT, WE PRINT THE ANIME(s) THAT THE PAGE FOUND WITH THE KEY WE INSERTED IN THE SEARCHBAR. IS ONE OF THEM WHAT YOU WERE SEARCHING FOR?'''
        except:

        #-----------------------------------------------------------------------------------------------------------------------
            print("No anime with the same name found! Is it one of this, maybe?")

            listaanimu = []                                                 #create an empty list to  fill later with the list of animes found
            numero_di_anime = 1                                             #iterable element that we will need in the for loop. numero di anime means "number of animes"
            search2 = driver.find_elements(By.TAG_NAME, "strong")           #All anime names are stored    in strong tags
            for element in search2:
                nome = element.text                                         #nome means "name"
                print("-" + str(numero_di_anime) + "    " + element.text)   #the result will be like: -1   Demon Slayer
                listaanimu.append(nome)
                numero_di_anime += 1
            print("If one of this animes is what you are looking for, write the respective index number")
            anime = input("The anime I want is number:  ")
            try:
                anime2 = int(anime)                                         #this try/except is needed in case of a wrong input, like a letter, that would create an exception 
            except:
                anime2 = anime

            while anime2 not in range(1, 51):                               #the animes that appear are 50 at maximum. A number too big would not be found and give an exception
                print("Anime not found! Try again!!")
                anime = input("The anime I want is number:  ")
                try:
                    anime2 = int(anime)
                except:
                    anime2 = anime
        #-----------------------------------------------------------------------------------------------------------------------


            '''CLICK THE NAME OF THE ANIME ON THE SITE, TO OPEN ITS PAGE'''
        #-----------------------------------------------------------------------------------------------------------------------
            def anime_alternativo():
                try:
                    anime_selezionato = listaanimu[int(anime) - 1]                                                                      #The list of animes starts at 1, but lists in python start at 0
                    print(anime_selezionato)
                    print("Anime Found!")
                    try:
                        driver.get(WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"//table/tbody/tr/td[2]/div/a[strong='{anime_selezionato}']"))).get_attribute("href"))
                        anime_BeautifulSoup = driver.find_element(By.LINK_TEXT, "%s" % anime_selezionato).get_attribute("href")         #BeautifulSoup works with links, let's give it the link of the page of the selected anime
                        print(anime_BeautifulSoup)
                    except:
                        print("Anime not found! Are you sure it is part of the list?")
                except:
                    print("It is not a valid answer!!! ")                                                                                #these excepts should never be activated, shouldn't even be necessary
                BeautifulSoup_Scraping(anime_BeautifulSoup)                                                                              #give the link to beautifulSoup
         #-----------------------------------------------------------------------------------------------------------------------
    
                    '''NOW THAT THE PAGE OF THE SELECTED ANIME IS OPENED, WE DATASCRAPE IT WITH BEAUTIFULSOUP'''
        #-----------------------------------------------------------------------------------------------------------------------
            def BeautifulSoup_Scraping(link):

                html_text = requests.get(link).text
                soup = BeautifulSoup(html_text, 'lxml')

                rangevoti = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'na']                                                          #the range of votes of the website. if an anime is still unreleased, its vote will be na

                for numeri in rangevoti:
                    try:
                        voto = soup.find('div', class_='score-label score-%s' % (numeri)).text                                           #get the vote of the anime from the webpage
                    except:
                        print("Error! They may have invented another 'type' of vote!")

                try:
                    nome = soup.find('h1', class_='title-name h1_bold_none').text                                                        #get the name of the anime. this try/except is needed because some anime names have weird fonts that are not supported by BeautifulSoup
                except:
                    print("The anime could have a non supported font")

                rank = soup.find('span', class_='numbers ranked').text                                                                   #get the anime rank, popularity and number of users
                popularity = soup.find('span', class_='numbers popularity').text        
                users = soup.find('span', class_='numbers members').text

                rank1 = rank.replace('Ranked #', '', 1)                                                                                  #remove unnecessary characters, just give me the numbers
                popularity1 = popularity.replace('Popularity #', '', 1)
                users1 = users.replace('Members', '', 1)

                if rank == 'Ranked N/A':
                    rank1 = rank.replace('Ranked', '', 1)
                if rank == 'Popularity N/A':
                    popularity1 = popularity.replace('Popularity', '', 1)

                print(nome + " " + voto + " " + rank1 + " " + popularity1 + " " + users1)                                                #Print a preview of what the row on the database will look like

                name = str(nome)                                                
                vote = str(voto)
                rank2 = str(rank1)
                popularity2 = str(popularity1)
                users2 = str(users1)

                '''ERROR ZONE! MYSQL SAYS THAT I HAVE AN ERROR IN THE SYNTAX BUT THIS COMMAND WORKS IN ANOTHER PROGRAM, WHERE IT IS WRITTEN IN THE SAME EXACT WAY!'''
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------

                data = "INSERT INTO animus_list (nome, voto, rank1, popularity1, users1) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE"   #insert all the scraped data in the table
                values = (name, vote, rank2, popularity2, users2)
                mycursor.execute(data, values)
                db.commit()

                for x in mycursor:
                    print(x)

                #except:
                    #print("Cannot insert in database")
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------



                '''CALL THE FUNCTION'''
        #-----------------------------------------------------------------------------------------------------------------------
            anime_alternativo()

        #-----------------------------------------------------------------------------------------------------------------------

        '''FINE PROGRAMMA'''
        #-----------------------------------------------------------------------------------------------------------------------
        time.sleep(5)                                                                                                                #close the browser window and end the program

        driver.quit()
        #-----------------------------------------------------------------------------------------------------------------------

scraping = Datascraping()
scraping.MyAnimeList()

整个程序都在一个类中,因为这个项目的最终结果是:一款会破坏网站的Tkinter(或者Kivi或者PyQt,还不确定该选什么)应用程序
代码中产生错误的部分在另一个程序中工作得很好(这里只是这个程序的beautifoulSoup)。我得到的异常说我在MySQL语法上犯了一个错误,但又一次。在其他程序中它工作得很好,并且具有相同的语法。

data = "INSERT INTO animus_list (nome, voto, rank1, popularity1, users1) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE"   #insert all the scraped data in the table
values = (name, vote, rank2, popularity2, users2)
mycursor.execute(data, values)
db.commit()

for x in mycursor:
    print(x)

#except:
    #print("Cannot insert in database")

这是给我带来问题的部分。我尝试的是:- 删除表并创建新表-添加/删除要添加到表中的变量-更改MySQL中为每个变量保留的空间大小(VARCHAR(1),VARCHAR(100)....)

46qrfjad

46qrfjad1#

错误是由“重复键更新”引起的,当我删除它时,错误消失了。我以为我需要它来避免重复行,但我找到了另一种方法。谢谢你的帮助马库斯。祝你有美好的一天

相关问题