import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns
import numpy as np
import calendar
from math import floor
from datetime import date
pd.set_option('display.max_columns', 700)
pd.set_option('display.max_rows', 100)
pd.set_option('display.min_rows', 10)
pd.set_option('display.expand_frame_repr', True)
plt.rcParams['figure.figsize'] = (16.0, 10.0)
# plt.style.use('ggplot')
# sns.set_style("white")
2020
Portland_dailyclimatedata1940-2
p = Path().cwd().parents[0] / 'data/Portland_dailyclimatedata1940-2019.xlsx'
# Data through the end of 2019
pdx_19 = pd.read_excel(p, sheet_name='Portland_dailyclimatedata1940-2', skiprows=list(range(6)))
# Data for 2020
pdx_20 = pd.read_excel(p, sheet_name='2020')
pdx_19.head()
pdx_20.head()
# Drop unused column
pdx_19.drop(columns=['AVG or Total'], inplace=True)
pdx_20.drop(columns=['Departure', 'HDD', 'CDD', 'New_Snow', 'Snow_Depth'], inplace=True)
# add prefix to days for use with wide_to_long
pdx_19.columns = list(pdx_19.columns[:3]) + [f'v_{day}' for day in pdx_19.columns[3:]]
# Select TX (max temp) and TN (min temp)
pdx_19.rename(columns={'Unnamed: 2': 'TYPE'}, inplace=True)
pdx_19 = pdx_19[pdx_19.TYPE.isin(['TX', 'TN', 'PR'])]
# change name to facilitate wide_to_long
pdx_20.rename(columns={'Date': 'date', 'MAX': 'v1', 'MIN': 'v2', 'AVG': 'v3', 'Precip': 'v4'}, inplace=True)
pdx_20.head()
pdx_19.head()
# reshape the data to tidy
pdx = pd.wide_to_long(pdx_19, stubnames='v', sep='_', i=['YR', 'MO', 'TYPE'], j='day').reset_index()
# Give a more descriptive name
pdx.TYPE = pdx.TYPE.map({'TX': 'MAX', 'TN': 'MIN', 'PR': 'PRE'})
# rename so they can be used to create a date column
pdx.rename(columns={'YR': 'year', 'MO': 'month'}, inplace=True)
# - denotes days that don't exist for a given month; drop those
pdx = pdx[pdx.v != '-'].copy()
# create date column
pdx['date'] = pd.to_datetime(pdx[['year', 'month', 'day']])
pdx.head()
pdx_20_l = pd.wide_to_long(pdx_20, stubnames=['v'], i=['date'], j='TYPE').reset_index()
pdx_20_l.TYPE = pdx_20_l.TYPE.map({1: 'MAX', 2: 'MIN', 3: 'AVG', 4: 'PRE'})
pdx_20_l['year'] = pdx_20_l.date.dt.year
pdx_20_l['month'] = pdx_20_l.date.dt.month
pdx_20_l['day'] = pdx_20_l.date.dt.day
pdx_20_l
pdx = pd.concat([pdx, pdx_20_l]).reset_index(drop=True)
# Set dtypes to float
pdx.v.replace({'M': np.nan, 'T': np.nan}, inplace=True)
pdx.v = pdx.v.astype('float')
# drop na
pdx.dropna(inplace=True)
# add decade
pdx['dec'] = pdx.year.apply(lambda x: f"{floor(x/10)*10}'s")
temp = pdx[~(pdx.TYPE == 'PRE')].copy().reset_index(drop=True)
temp.TYPE.unique()
# add bins
temp['range'] = pd.cut(temp.v, bins=[0, 64, 74, 84, 94, 200], labels=['< 65', '65 - 74', '75 - 84', '85 - 94', '>= 95'])
display(temp.head())
display(temp.tail())
# convert from str to float
precip = pdx[(pdx.TYPE == 'PRE')].copy().reset_index(drop=True)
precip.TYPE.unique()
display(precip.head())
display(precip.tail())
pdx_max = temp[temp.TYPE == 'MAX'].reset_index(drop=True).reset_index(drop=True)
pdx_max
pdx_max_jan_may = pdx_max[(pdx_max.date.dt.month >= 1) & (pdx_max.date.dt.month < 6)]
year
and range
¶pdx_max_g = pdx_max_jan_may.groupby([pdx_max_jan_may.date.dt.year, 'range'])['v'].agg('count').reset_index(level=0)
display(pdx_max_g.head())
display(pdx_max_g.tail())
years = list(range(1950, 2031, 10))
with sns.axes_style("darkgrid"):
for year in years:
plt.figure()
data = pdx_max_g[(pdx_max_g.date >= year - 10) & (pdx_max_g.date < year)]
ax = sns.barplot(x=data.index, y=data.v, hue=data.date)
plt.annotate('Prepared By: Trenton McKinney', xy=(3, 140.1), xytext=(3, 140.1), fontsize=8)
for p in ax.patches:
if p.get_height() > 0:
ax.annotate(format(p.get_height(), '.0f'),
(p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center', va = 'center', fontsize=8,
xytext = (0, 10), textcoords = 'offset points')
plt.ylim(0, 150)
plt.ylabel('Days')
plt.xlabel('High Temperatures °F')
plt.title(f"Portland, OR\nJan - May High Temperature Days: {year-10}'s")
pdx_max_june = pdx_max[(pdx_max.date.dt.month == 6)]
pdx_max_june = pdx_max_june.groupby([pdx_max_june.date.dt.year, 'range'])['v'].agg('count').reset_index(level=0)
pdx_max_june
years = list(range(1950, 2031, 10))
with sns.axes_style("darkgrid"):
for year in years:
plt.figure()
data = pdx_max_june[(pdx_max_june.date >= year - 10) & (pdx_max_june.date < year)]
ax = sns.barplot(x=data.index, y=data.v, hue=data.date)
plt.annotate('Prepared By: Trenton McKinney', xy=(0, 24.3), xytext=(0, 24.3), fontsize=8)
for p in ax.patches:
if p.get_height() > 0:
ax.annotate(format(p.get_height(), '.0f'),
(p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', fontsize=9,
xytext=(0, 10), textcoords='offset points')
plt.ylim(0, 25)
plt.ylabel('Days')
plt.xlabel('High Temperatures °F')
plt.title(f"Portland, OR\nJune High Temperature Days: {year-10}'s")
pdx_m_mean = pdx_max.set_index('date').resample('m').mean()
years = list(range(1950, 2031, 10))
with sns.axes_style("darkgrid"):
for year in years:
data = pdx_m_mean[(pdx_m_mean.index.year >= year - 10) & (pdx_m_mean.index.year < year)]
ax = sns.lineplot(x=data.index, y=data.v)
plt.annotate('Prepared By: Trenton McKinney', xy=(730120, 32), xytext=(730120, 32), fontsize=10)
plt.xlabel('Year')
plt.ylabel('Monthly Mean Max Temperature °F')
plt.title(f"Portland, OR\nResampled Monthly Mean High Temperature")
pdx_y_mean = pdx_max[pdx_max.year > 1940].set_index('date').resample('y').mean()
# filtered out 1940 because it's a partial year (only 2 months)
years = list(range(1950, 2021, 10))
with sns.axes_style("darkgrid"):
for year in years:
data = pdx_y_mean[(pdx_y_mean.index.year >= year - 10) & (pdx_y_mean.index.year < year)]
ax = sns.lineplot(x=data.index, y=data.v)
plt.annotate('Prepared By: Trenton McKinney', xy=(730120, 58.7), xytext=(730120, 58.7), fontsize=9)
plt.xlabel('Year')
plt.ylabel('Yearly Mean Max Temperature °F')
plt.title(f"Portland, OR\nResampled Yearly Mean High Temperature")
mym = pdx_max.groupby([pdx_max.date.dt.year, pdx_max.date.dt.month])['v'].mean().unstack(level=0)
mym.iloc[:, :5]
ix = list(range(0, len(mym.columns), 10))
with sns.axes_style("darkgrid"):
for i in ix:
plt.figure()
data = mym.iloc[:, i:i+10]
sns.lineplot(data=data, markers=None, dashes=False)
plt.annotate('Prepared By: Trenton McKinney', xy=(10, 31), xytext=(10, 31), fontsize=10)
plt.xticks(np.arange(1, 13), calendar.month_name[1:13])
plt.ylim(30, 100)
plt.xlabel('Month')
plt.ylabel('Mean Max Temperature °F')
plt.title(f"Portland, OR\nMonthly Mean per Year High Temperature")
plt.show()
mymd = pdx_max.groupby([pdx_max.dec, pdx_max.date.dt.month])['v'].mean().unstack(level=0)
mymd
with sns.axes_style("darkgrid"):
plt.figure()
sns.lineplot(data=mymd, markers=None, dashes=False)
plt.annotate('Prepared By: Trenton McKinney', xy=(10, 40.5), xytext=(10, 40.5), fontsize=10)
plt.xticks(np.arange(1, 13), calendar.month_name[1:13])
plt.ylim(40, 85)
plt.xlabel('Month')
plt.ylabel('Mean Max Temperature °F')
plt.title(f"Portland, OR\nMonthly Mean Per Decade High Temperature")
plt.show()
# PRE: precipitation data only
pdx_20_lp = precip[precip.year == 2020]
pdx_20_lp.head()
plt.figure(figsize=(25, 10))
with sns.axes_style("darkgrid"):
sns.barplot(pdx_20_lp.date, pdx_20_lp.v)
plt.annotate('Prepared By: Trenton McKinney', xy=(130, 1.03), xytext=(130, 1.03), fontsize=8)
ticks, labels = plt.xticks(rotation=90)
labels = [label.get_text()[:10] for label in labels]
plt.xticks(ticks=ticks, labels=labels)
plt.ylabel('Precipitation (inches)')
plt.xlabel('Date')
plt.title(f"Portland, OR\n 2020: Jan - June 16\nPrecipitation Days")
with sns.axes_style("darkgrid"):
sns.boxplot(pdx_20_lp.date.dt.month, pdx_20_lp.v)
plt.xticks(np.arange(0, 6), calendar.month_name[1:7])
# plt.annotate('Prepared By: Trenton McKinney', xy=(3, 0.9), xytext=(3, 0.9), fontsize=8)
plt.ylabel('Precipitation (inches)')
plt.xlabel('Month')
plt.title(f"Portland, OR\n 2020: Jan - June 16\nPrecipitation")
pdx_precip = precip
pmymd = pdx_precip.groupby([pdx_precip.dec, pdx_precip.month])['v'].mean().unstack(level=0)
pmymd
with sns.axes_style("darkgrid"):
plt.figure()
sns.lineplot(data=pmymd, markers=None, dashes=False)
# plt.annotate('Prepared By: Trenton McKinney', xy=(10, 0.06), xytext=(10, 0.06), fontsize=10)
plt.xticks(np.arange(1, 13), calendar.month_name[1:13])
plt.xlabel('Month')
plt.ylabel('Mean Precipitation (inches)')
plt.title(f"Portland, OR\nMonthly Mean Amount of Precipitation by Decade")
plt.show()
pmymds = pmymd.stack().reset_index()
with sns.axes_style("darkgrid"):
plt.figure(figsize=(20, 10))
ax = sns.barplot('month', 0, data=pmymds, hue='dec')
for p in ax.patches:
if p.get_height() > 0:
ax.annotate(format(p.get_height(), '.2f'),
(p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', fontsize=9, rotation=90,
xytext=(0, 10), textcoords='offset points')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
# plt.annotate('Prepared By: Trenton McKinney', xy=(9, 0.351), xytext=(9, 0.351), fontsize=10)
plt.xticks(np.arange(0, 12), calendar.month_name[1:13])
plt.xlabel('Month')
plt.ylabel('Mean Precipitation (inches)')
plt.title(f"Portland, OR\nMonthly Mean Amount of Precipitation by Decade")
plt.show()
precip_days_year = pdx_precip[pdx_precip.v > 0].groupby('year', as_index=False)['year'].agg({'count': 'count'})
with sns.axes_style("darkgrid"):
ax = sns.barplot(x='year', y='count', data=precip_days_year)
plt.xticks(rotation=90)
# plt.annotate('Prepared By: Trenton McKinney', xy=(3, 140.1), xytext=(3, 140.1), fontsize=8)
for p in ax.patches:
if p.get_height() > 0:
ax.annotate(format(p.get_height(), '.0f'),
(p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center', va = 'center', fontsize=9, rotation=90,
xytext = (0, 10), textcoords = 'offset points')
plt.ylabel('Days of Precipitation')
plt.xlabel('Year')
plt.title(f"Portland, OR\nPrecipitation Days Per Year")
plt.show()
precip_days_month = pdx_precip.groupby('month', as_index=False)['v'].agg({'mean': 'mean'})
ax = sns.barplot(x='month', y='mean', data=precip_days_month)
plt.xticks(rotation=90)
# plt.annotate('Prepared By: Trenton McKinney', xy=(3, 140.1), xytext=(3, 140.1), fontsize=8)
plt.xticks(np.arange(0, 12), calendar.month_name[1:13])
for p in ax.patches:
if p.get_height() > 0:
ax.annotate(format(p.get_height(), '.2f'),
(p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center', va = 'center', fontsize=8,
xytext = (0, 10), textcoords = 'offset points')
plt.ylabel('Mean Precipitation (inches)')
plt.xlabel('Month')
plt.title(f"Portland, OR\nMean Precipitation Per Month: 1940 - 2020")
plt.show()
gb_18_20 = pdx_precip[(pdx_precip.year.isin([2018, 2019, 2020])) & (pdx_precip.month < 7)].groupby(['year', 'month']).agg({'v': sum}).reset_index()
ax = sns.barplot(x=gb_18_20.month, y=gb_18_20.v, hue=gb_18_20.year)
plt.xticks(np.arange(0, 6), calendar.month_name[1:7])
for p in ax.patches:
if p.get_height() > 0:
ax.annotate(format(p.get_height(), '.2f'),
(p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center', va = 'center', fontsize=8,
xytext = (0, 10), textcoords = 'offset points')
plt.ylabel('Total Precipitation (inches)')
plt.xlabel('Month')
plt.title(f"Portland, OR\nTotal Monthly Precipitation\n(Jan - June): 2018 - 2020")
plt.show()
years = list(range(1950, 2031, 10))
with sns.axes_style("darkgrid"):
for year in years:
plt.figure()
data = pdx_precip[(pdx_precip.year >= year - 10) & (pdx_precip.year < year)]
data = data.groupby(['year', 'month']).agg({'v': sum}).reset_index()
ax = sns.barplot(x=data.month, y=data.v, hue=data.year)
# plt.annotate('Prepared By: Trenton McKinney', xy=(3, 140.1), xytext=(3, 140.1), fontsize=8)
for p in ax.patches:
if p.get_height() > 0:
ax.annotate(format(p.get_height(), '.2f'),
(p.get_x() + p.get_width() / 2., p.get_height() + 0.2),
ha = 'center', va = 'center', fontsize=9, rotation=90,
xytext = (0, 10), textcoords = 'offset points')
plt.xticks(np.arange(0, 12), calendar.month_name[1:13])
plt.ylim(0, 16)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.ylabel('Total Rain (inches)')
plt.xlabel('Month')
plt.title(f"Portland, OR\nTotal Rain Per Month: {year-10}'s")
pdx_precip_11_00 = pdx_precip[(pdx_precip.year > 2010)]
pdx_precip_11_00
with sns.axes_style("darkgrid"):
sns.boxplot(pdx_precip_11_00.date.dt.year, pdx_precip_11_00.v, color='lightgray')
sns.swarmplot(pdx_precip_11_00.date.dt.year, pdx_precip_11_00.v, size=2.5)
plt.annotate('Prepared By: Trenton McKinney', xy=(7, 2.51), xytext=(7, 2.51), fontsize=8)
plt.ylabel('Precipitation (inches)')
plt.xlabel('Year')
plt.title(f"Portland, OR\n 2011 - 2020 Amount of Precipitation")