csv 为什么y轴的值不是按数字顺序排列的?

s4chpxco  于 12个月前  发布在  其他
关注(0)|答案(2)|浏览(105)

所以我在看《Python速成班》这本书,我在做一个练习。练习的重点是绘制两个不同地点的低温和高温。我看到另一个用户在这里做同样的练习,但用字典,我试着做同样的事情。但是这个图真的很令人困惑,它以非数字顺序绘制了y轴的值。为什么?为什么?
这是代码:

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import MultipleLocator
from datetime import datetime
import csv

# Open the csv file and create a dictionary with the information from the file
def get_weather_data(path, data):
    """Get the high and low temperature from the csv file."""
    with open (path, 'r') as csv_file:
        csv_reader = csv.reader(csv_file)
        headers = next(csv_reader)

        for title in headers:
            data[title] = []
        
        try:
            for row in csv_reader:
                for i, title in enumerate(headers):
                    data[title].append(row[i])

            for i in range (len(data["DATE"])):
                data['DATE'][i] = datetime.strptime(data["DATE"][i],"%Y-%m-%d")
        
            for i in range(len(data['TMAX'])):
                data['TMAX'][i] = int(data['TMAX'][i])
                
        
            for i in range(len(data["TMIN"])):
                data["TMIN"][i] = int(data['TMIN'][i])
        
        except ValueError:
            pass

        

# Tell the path of the file
path_valley = 'weather_data/death_valley_2021_full.csv'
path_sitka = 'weather_data/sitka_weather_2021_full.csv'

# Gets the data of Death Valley
data = {}
get_weather_data(path_valley,data)

# Plot the chart
plt.style.use('seaborn')
fig, ax = plt.subplots()

# Plot the data for Death Valey
ax.plot(data['DATE'], data['TMAX'], color='r', label='Death Valley - High Temp.')
ax.plot(data['DATE'], data['TMIN'], color='b', label='Death Valley - Low Temp.')

# Get the data of Sitka
data = {}
get_weather_data(path_sitka,data)

# Plot the data for Sitka
ax.plot(data['DATE'], data['TMAX'], color='m', label='Sitka - High Temp.')
ax.plot(data['DATE'], data['TMIN'], color='c', label='Sitka - Low Temp.')

# Format plot
title = 'Daily High and Low Temperature - 2021'
title += '\nSitka, AK and Death Valley, CA'
ax.set_title(title)

# Configure the axis
ax.set_xlabel('', fontsize=16)
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%B'))
fig.autofmt_xdate()

ax.set_ylabel('Temperatures(F)', fontsize=16)
ax.tick_params(labelsize=14)

plt.legend()
plt.show()

图:

文件:https://github.com/ehmatthes/pcc_3e/tree/main/chapter_16/the_csv_file_format/weather_data

death_valley_2021_full.csv

"STATION","NAME","DATE","PRCP","SNOW","SNWD","TMAX","TMIN","TOBS"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-01","0.00","0.0","0.0","71","51","56"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-02","0.00","0.0","0.0","67","42","51"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-03","0.00","0.0","0.0","66","41","49"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-04","0.00","0.0","0.0","66","41","48"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-05","0.00","0.0","0.0","70","40","52"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-06","0.00","0.0","0.0","69","40","50"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-07","0.00","0.0","0.0","60","38","44"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-08","0.00","0.0","0.0","65","33","44"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-09","0.00","0.0","0.0","71","41","56"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-10","0.00","0.0","0.0","70","49","51"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-11","0.00","0.0","0.0","67","37","45"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-12","0.00","0.0","0.0","61","37","48"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-13","0.00","0.0","0.0","66","37","47"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-14","0.00","0.0","0.0","71","39","48"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-15","0.00","0.0","0.0","76","40","53"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-16","0.00","0.0","0.0","90","42","60"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-17","0.00","0.0","0.0","80","51","54"
"USC00042319","DEATH VALLEY NATIONAL PARK, CA US","2021-01-18","0.00","0.0","0.0","83","48","63"

sitka_weather_2021_full.csv

"STATION","NAME","DATE","AWND","PGTM","PRCP","TAVG","TMAX","TMIN","WDF2","WDF5","WSF2","WSF5","WT01","WT02","WT04","WT05","WT08","WT09"
"USW00025333","SITKA AIRPORT, AK US","2021-01-01","8.72"," 0411","0.01",,"44","40","  110","  110","21.0","28.0",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-02","7.61"," 0421","0.07",,"44","35","  110","  100","23.0","25.9",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-03","6.71"," 1506","0.29",,"43","36","  230","  220","23.9","29.1",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-04","15.88"," 0440","0.25",,"45","39","  110","   90","31.1","42.9",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-05","19.24"," 1321","0.57",,"45","40","  170","  190","45.0","65.1",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-06","12.97"," 0955","0.24",,"44","39","  110","  190","21.0","30.0",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-07","19.24"," 0711","0.07",,"48","42","  110","  130","33.1","44.1",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-08","10.74"," 1520","0.32",,"48","37","  290","  290","29.1","35.1",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-09","19.01"," 1354","0.00",,"46","38","  110","  120","33.1","45.0",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-10","16.55"," 0155","0.76",,"46","41","  120","  110","33.1","42.9",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-11","12.08"," 0439","0.37",,"44","38","  190","  190","23.0","35.1",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-12","13.42"," 1037","0.46",,"44","34","  220","  230","36.0","45.0",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-13","6.93"," 0623","0.89",,"39","34","   90","  130","14.1","16.1",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-14","20.58"," 1630","0.53",,"47","37","  100","   90","36.0","50.1",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-15","14.76"," 0418","0.79",,"48","41","  170","  170","36.0","47.0",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-16","17.22"," 1406","0.31",,"47","42","  110","  110","32.0","42.9",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-17","9.62"," 0517","0.46",,"46","39","  220","  220","23.0","32.0",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-18","17.22"," 1610","1.50",,"48","41","  150","  160","36.9","51.0",,,,,,
"USW00025333","SITKA AIRPORT, AK US","2021-01-19","11.63"," 0205","0.29",,"42","36","  260","  260","31.1","42.9",,,,,"    1",
gev0vcfq

gev0vcfq1#

except ValueError:
            pass

没有至少打印一个警告通常是一个坏主意,并已沉默的错误,在您的情况下。
您的数据缺少TMIN和TMAX值,因此在某些时候

data['TMAX'][i] = int(data['TMAX'][i])

data['TMAX'][i] = int('')

并抛出值错误。这个错误被捕获了,你永远不知道,因为你有那个except子句,你的for循环停止迭代和转换你的data['TMAX'],这会留下一些字符串值和一些整型值。您可以在打印前打印data['TMAX']来检查这一点。这就是为什么你的y轴最终没有被数字排序。我建议您将每个转换 Package 到一个单独的try...except块中,并在丢失条目的情况下使用特定值进行替换,并向屏幕发出警告。我不知道什么是合理的数据,所以我改为float,它允许使用nan作为缺失值:

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import MultipleLocator
from datetime import datetime
import csv

# Open the csv file and create a dictionary with the information from the file
def get_weather_data(path, data, missingValue=float("nan")):
    """Get the high and low temperature from the csv file."""
    with open (path, 'r') as csv_file:
        csv_reader = csv.reader(csv_file)
        headers = next(csv_reader)

        for title in headers:
            data[title] = []

        for row in csv_reader:
            for i, title in enumerate(headers):
                data[title].append(row[i])

        for i in range (len(data["DATE"])):
            data['DATE'][i] = datetime.strptime(data["DATE"][i],"%Y-%m-%d")

        for i in range(len(data['TMAX'])):
            #CHANGED HERE
            try:
                data['TMAX'][i] = float(data['TMAX'][i])
            except ValueError:
                print("WARNING: Encountered value \"{}\" in row {} for TMAX, replacing with {}".format(data['TMAX'][i], i, missingValue))
                data['TMAX'][i] = missingValue

        for i in range(len(data["TMIN"])):
            #CHANGED HERE
            try:
                data["TMIN"][i] = float(data['TMIN'][i])
            except ValueError:
                print("WARNING: Encountered value \"{}\" in row {} for TMIN, replacing with {}".format(data['TMIN'][i], i, missingValue))
                data['TMIN'][i] = missingValue

# Tell the path of the file
path_valley = 'weather_data/death_valley_2021_full.csv'
path_sitka = 'weather_data/sitka_weather_2021_full.csv'

# Gets the data of Death Valley
data = {}
get_weather_data(path_valley,data)

# Plot the chart
plt.style.use('seaborn')
fig, ax = plt.subplots()

# Plot the data for Death Valey
ax.plot(data['DATE'], data['TMAX'], color='r', label='Death Valley - High Temp.')
ax.plot(data['DATE'], data['TMIN'], color='b', label='Death Valley - Low Temp.')

# Get the data of Sitka
data = {}
get_weather_data(path_sitka,data)

# Plot the data for Sitka
ax.plot(data['DATE'], data['TMAX'], color='m', label='Sitka - High Temp.')
ax.plot(data['DATE'], data['TMIN'], color='c', label='Sitka - Low Temp.')

# Format plot
title = 'Daily High and Low Temperature - 2021'
title += '\nSitka, AK and Death Valley, CA'
ax.set_title(title)

# Configure the axis
ax.set_xlabel('', fontsize=16)
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%B'))
fig.autofmt_xdate()

ax.set_ylabel('Temperatures(F)', fontsize=16)
ax.tick_params(labelsize=14)

plt.legend()
plt.show()


备注
由于您遵循的是课程手册,因此我试图尽可能接近您的原始代码,因为它可能符合课程的进展程度。对于这种任务,pandas可能是一个不错的选择,因为它可以检测和替换缺失的值:

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import MultipleLocator
from datetime import datetime
import import pandas as pd

path_valley = r'C:\Users\1544Jochheim\Downloads/death_valley_2021_full.csv'
path_sitka = r'C:\Users\1544Jochheim\Downloads/sitka_weather_2021_full.csv'

# Get the data of Valley
data_valley = pd.read_csv(path_valley, parse_dates=["DATE"])

plt.style.use('seaborn')
fig, ax = plt.subplots()

# Plot the data for Death Valey
ax.plot(data_valley['DATE'], data_valley['TMAX'], color='r', label='Death Valley - High Temp.')
ax.plot(data_valley['DATE'], data_valley['TMIN'], color='b', label='Death Valley - Low Temp.')

# Get the data of Sitka
data_sitka = pd.read_csv(path_sitka, parse_dates=["DATE"])

# Plot the data for Sitka
ax.plot(data_sitka['DATE'], data_sitka['TMAX'], color='m', label='Sitka - High Temp.')
ax.plot(data_sitka['DATE'], data_sitka['TMIN'], color='c', label='Sitka - Low Temp.')

# Format plot
title = 'Daily High and Low Temperature - 2021'
title += '\nSitka, AK and Death Valley, CA'
ax.set_title(title)

# Configure the axis
ax.set_xlabel('', fontsize=16)
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%B'))
fig.autofmt_xdate()

ax.set_ylabel('Temperatures(F)', fontsize=16)
ax.tick_params(labelsize=14)

plt.legend()
plt.show()
jei2mxaa

jei2mxaa2#

  • 另一个答案充分解释了为什么'TMIN''TMAX'没有被转换为float类型。

1.这个答案使用pandas,它将正确地将列转换为float,并将缺失值保留为np.nan
1.将所有数据加载到单个 Dataframe 中
1.清理数据
1.在单个plot调用中使用seaborn绘图
1.参见How to create a min-max plot by month with fill_between

导入和加载清理数据

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates

# csv file names
stations = ['death_valley_2021_full', 'sitka_weather_2021_full']

# create the path to each file
paths = [f'https://raw.githubusercontent.com/ehmatthes/pcc_3e/main/chapter_16/the_csv_file_format/weather_data/{station}.csv' for station in stations]

# read the needed columns of both data files from github and combine them into a single dataframe
df = pd.concat([pd.read_csv(path, usecols=['NAME', 'DATE', 'TMIN', 'TMAX']) for path in paths], ignore_index=True)

# split the values in the name column into separate columns
df[['NAME', 'STATE-COUNTRY']] = df.NAME.str.split(', ', expand=True)

# convert DATE to a datetime.date
df.DATE = pd.to_datetime(df.DATE, format='%Y-%m-%d').dt.date

# convert dataframe to long form (it's currently a wide form)
df = df.melt(id_vars=['NAME', 'DATE'], value_vars=['TMIN', 'TMAX'], value_name='Temperature (F)')

# shorten DEATH VALLEY NATIONAL PARK
df.NAME = df.NAME.str.replace('DEATH VALLEY NATIONAL PARK', 'DEATH VALLEY')

# change the format of capitalization
df.NAME = df.NAME.str.capitalize()

# change the names for TMIN and TMAX
df.variable = df.variable.map({'TMIN': 'Low Temp', 'TMAX': 'High Temp'})

# combine NAME with TMIN or TMAX
df.NAME = df.NAME + ' - ' + df.variable

使用seaborn绘图

fig, ax = plt.subplots(figsize=(10, 8))

hue_order = sorted(df.NAME.unique())
ax = sns.lineplot(data=df, x='DATE', y='Temperature (F)', hue='NAME', hue_order=hue_order, palette=['r', 'b', 'm', 'c'])

sns.move_legend(ax, bbox_to_anchor=(1, 0.5), loc='center left', frameon=False)

# Format plot
title = 'Daily High and Low Temperature - 2021'
title += '\nSitka, AK and Death Valley, CA'
ax.set_title(title)

# Configure the axis
ax.set_xlabel('', fontsize=16)
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%B'))
fig.autofmt_xdate()

ax.set_ylabel('Temperatures(F)', fontsize=16)
ax.tick_params(labelsize=14)

df

NAME        DATE   variable  Temperature (F)
0       Death valley - Low Temp  2021-01-01   Low Temp             51.0
1       Death valley - Low Temp  2021-01-02   Low Temp             42.0
2       Death valley - Low Temp  2021-01-03   Low Temp             41.0
3       Death valley - Low Temp  2021-01-04   Low Temp             41.0
4       Death valley - Low Temp  2021-01-05   Low Temp             40.0
1455  Sitka airport - High Temp  2021-12-27  High Temp             35.0
1456  Sitka airport - High Temp  2021-12-28  High Temp             35.0
1457  Sitka airport - High Temp  2021-12-29  High Temp             41.0
1458  Sitka airport - High Temp  2021-12-30  High Temp             38.0
1459  Sitka airport - High Temp  2021-12-31  High Temp             39.0

相关问题