import datetime
from dateutil.relativedelta import relativedelta
# Function to generate the last X months
def get_last_months(start_date, months):
for i in range(months):
yield (start_date.year,start_date.month)
start_date += relativedelta(months = -1)
rollback=3
months=[i for i in get_last_months(datetime.datetime.today(), rollback)]
# Create paths required
base_path = "{y}/{m}/filename"
paths=[]
for i in months:
paths.append(base_path.format(y=i[0],m=i[1])
df = spark.read.parquet(*paths)
1条答案
按热度按时间yacmzcpb1#
下面的代码段应该可以工作。用您的路径替换
base_path
。上面的代码片段将帮助你从多个路径阅读。剩下的逻辑是你必须实现的。