import pandas as pd
import numpy as np
from prophet import Prophet
# sample data
np.random.seed(0)
arr = np.random.randint(1, 200, 100)
df = pd.DataFrame(arr, columns=['y'])
df['ds'] = pd.date_range('2023-01-1', periods=100)
df.iloc[50:90, 0] = np.nan
og_df = df.copy()
# find first nan and create a train dataset
train = df.iloc[:df['y'].isna().idxmax()]
# find the number of periods to predict
periods = sum(df['y'].isna())
# fit your model, create a future DataFrame, and forecast
# add seasonality based on your actual data to get a better fit
m = Prophet()
m.fit(train)
future = m.make_future_dataframe(periods=periods) # add freq param if not using daily: freq='1h'
forecast = m.predict(future)
# assign your forecasted data to the original frame
missing_data = forecast.iloc[df['y'].isna().idxmax():][['ds', 'yhat']].rename(columns={'yhat': 'y'})
df.loc[df['y'].isna()] = missing_data
# sample plot
og_df.plot(x='ds', y='y', ylim=(0,500))
df.plot(x='ds', y='y', ylim=(0,500))
1条答案
按热度按时间kognpnkq1#
您可以尝试使用Prophet来创建一些未来的预测,以填补缺失的数据。这假设你的缺失数据是
NaN
而不是0
,并且所有缺失数据都是连续的。这只是一个简单的例子,您可能需要调整季节性以获得更好的适应性。