Weather Visualization for Portland, OR: 1940 - 2020

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns
import numpy as np
import calendar
from math import floor
from datetime import date
In [2]:
pd.set_option('display.max_columns', 700)
pd.set_option('display.max_rows', 100)
pd.set_option('display.min_rows', 10)
pd.set_option('display.expand_frame_repr', True)
In [3]:
plt.rcParams['figure.figsize'] = (16.0, 10.0)
# plt.style.use('ggplot')
# sns.set_style("white")

Load and Clean Data

Load a local copy

In [4]:
p = Path().cwd().parents[0] / 'data/Portland_dailyclimatedata1940-2019.xlsx'
In [5]:
# Data through the end of 2019
pdx_19 = pd.read_excel(p, sheet_name='Portland_dailyclimatedata1940-2', skiprows=list(range(6)))

# Data for 2020
pdx_20 = pd.read_excel(p, sheet_name='2020')

View initial dataframe

In [6]:
pdx_19.head()
Out[6]:
YR MO Unnamed: 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 AVG or Total
0 1940 10 TX M M M M M M M M M M M M 75 70 64 72 72 78 78 64 63 61 58 57 57 57 56 53 59 59 52 M
1 1940 10 TN M M M M M M M M M M M M 57 53 52 50 58 58 59 54 48 41 53 48 41 38 37 45 48 50 46 M
2 1940 10 PR M M M M M M M M M M M M 0.01 T T 0 0.13 0 T 0.14 0.05 0 0.63 1.03 0 0 T 0.18 0.58 0.5 0.25 M
3 1940 10 SN M M M M M M M M M M M M 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 1940 11 TX 52 53 47 55 51 58 56 50 48 47 46 45 45 47 53 49 46 49 46 49 50 44 42 44 51 44 45 59 57 45 - 49.1
In [7]:
pdx_20.head()
Out[7]:
Date MAX MIN AVG Departure HDD CDD Precip New_Snow Snow_Depth
0 2020-01-01 56 47 51.5 11.6 13 0 0.15 0 0
1 2020-01-02 53 43 48.0 8.0 17 0 0.04 0 0
2 2020-01-03 62 44 53.0 12.9 12 0 0.12 0 0
3 2020-01-04 49 41 45.0 4.8 20 0 0.16 0 0
4 2020-01-05 49 42 45.5 5.2 19 0 0.26 0 0

Transform and clean data

In [8]:
# Drop unused column
pdx_19.drop(columns=['AVG or Total'], inplace=True)
pdx_20.drop(columns=['Departure', 'HDD', 'CDD', 'New_Snow', 'Snow_Depth'], inplace=True)
In [9]:
# add prefix to days for use with wide_to_long
pdx_19.columns = list(pdx_19.columns[:3]) + [f'v_{day}' for day in pdx_19.columns[3:]]
In [10]:
# Select TX (max temp) and TN (min temp)
pdx_19.rename(columns={'Unnamed: 2': 'TYPE'}, inplace=True)
pdx_19 = pdx_19[pdx_19.TYPE.isin(['TX', 'TN', 'PR'])]
In [11]:
# change name to facilitate wide_to_long
pdx_20.rename(columns={'Date': 'date', 'MAX': 'v1', 'MIN': 'v2', 'AVG': 'v3', 'Precip': 'v4'}, inplace=True)
In [12]:
pdx_20.head()
Out[12]:
date v1 v2 v3 v4
0 2020-01-01 56 47 51.5 0.15
1 2020-01-02 53 43 48.0 0.04
2 2020-01-03 62 44 53.0 0.12
3 2020-01-04 49 41 45.0 0.16
4 2020-01-05 49 42 45.5 0.26
In [13]:
pdx_19.head()
Out[13]:
YR MO TYPE v_1 v_2 v_3 v_4 v_5 v_6 v_7 v_8 v_9 v_10 v_11 v_12 v_13 v_14 v_15 v_16 v_17 v_18 v_19 v_20 v_21 v_22 v_23 v_24 v_25 v_26 v_27 v_28 v_29 v_30 v_31
0 1940 10 TX M M M M M M M M M M M M 75 70 64 72 72 78 78 64 63 61 58 57 57 57 56 53 59 59 52
1 1940 10 TN M M M M M M M M M M M M 57 53 52 50 58 58 59 54 48 41 53 48 41 38 37 45 48 50 46
2 1940 10 PR M M M M M M M M M M M M 0.01 T T 0 0.13 0 T 0.14 0.05 0 0.63 1.03 0 0 T 0.18 0.58 0.5 0.25
4 1940 11 TX 52 53 47 55 51 58 56 50 48 47 46 45 45 47 53 49 46 49 46 49 50 44 42 44 51 44 45 59 57 45 -
5 1940 11 TN 40 38 36 32 42 46 46 42 35 34 35 33 34 33 28 27 36 30 29 36 33 28 37 35 37 36 38 43 40 39 -

Convert to Tidy format

1940 - 2019 Data

In [14]:
# reshape the data to tidy
pdx = pd.wide_to_long(pdx_19, stubnames='v', sep='_', i=['YR', 'MO', 'TYPE'], j='day').reset_index()
In [15]:
# Give a more descriptive name
pdx.TYPE = pdx.TYPE.map({'TX': 'MAX', 'TN': 'MIN', 'PR': 'PRE'})
In [16]:
# rename so they can be used to create a date column
pdx.rename(columns={'YR': 'year', 'MO': 'month'}, inplace=True)
In [17]:
# - denotes days that don't exist for a given month; drop those
pdx = pdx[pdx.v != '-'].copy()
In [18]:
# create date column
pdx['date'] = pd.to_datetime(pdx[['year', 'month', 'day']])
In [19]:
pdx.head()
Out[19]:
year month TYPE day v date
0 1940 10 MAX 1 M 1940-10-01
1 1940 10 MAX 2 M 1940-10-02
2 1940 10 MAX 3 M 1940-10-03
3 1940 10 MAX 4 M 1940-10-04
4 1940 10 MAX 5 M 1940-10-05

2020 Data

In [20]:
pdx_20_l = pd.wide_to_long(pdx_20, stubnames=['v'], i=['date'], j='TYPE').reset_index()
In [21]:
pdx_20_l.TYPE = pdx_20_l.TYPE.map({1: 'MAX', 2: 'MIN', 3: 'AVG', 4: 'PRE'})
In [22]:
pdx_20_l['year'] = pdx_20_l.date.dt.year
pdx_20_l['month'] = pdx_20_l.date.dt.month
pdx_20_l['day'] = pdx_20_l.date.dt.day
In [23]:
pdx_20_l
Out[23]:
date TYPE v year month day
0 2020-01-01 MAX 56 2020 1 1
1 2020-01-02 MAX 53 2020 1 2
2 2020-01-03 MAX 62 2020 1 3
3 2020-01-04 MAX 49 2020 1 4
4 2020-01-05 MAX 49 2020 1 5
... ... ... ... ... ... ...
667 2020-06-12 PRE 0.04 2020 6 12
668 2020-06-13 PRE 0.21 2020 6 13
669 2020-06-14 PRE 0.01 2020 6 14
670 2020-06-15 PRE 0.35 2020 6 15
671 2020-06-16 PRE 0.19 2020 6 16

672 rows × 6 columns

Join all date ranges

In [24]:
pdx = pd.concat([pdx, pdx_20_l]).reset_index(drop=True)
In [25]:
# Set dtypes to float
pdx.v.replace({'M': np.nan, 'T': np.nan}, inplace=True)
pdx.v = pdx.v.astype('float')
In [26]:
# drop na
pdx.dropna(inplace=True)
In [27]:
# add decade
pdx['dec'] = pdx.year.apply(lambda x: f"{floor(x/10)*10}'s")

create temp only dataframe

In [28]:
temp = pdx[~(pdx.TYPE == 'PRE')].copy().reset_index(drop=True)
In [29]:
temp.TYPE.unique()
Out[29]:
array(['MAX', 'MIN', 'AVG'], dtype=object)
In [30]:
# add bins
temp['range'] = pd.cut(temp.v, bins=[0, 64, 74, 84, 94, 200], labels=['< 65', '65 - 74', '75 - 84', '85 - 94', '>= 95'])
In [31]:
display(temp.head())
display(temp.tail())
year month TYPE day v date dec range
0 1940 10 MAX 13 75.0 1940-10-13 1940's 75 - 84
1 1940 10 MAX 14 70.0 1940-10-14 1940's 65 - 74
2 1940 10 MAX 15 64.0 1940-10-15 1940's < 65
3 1940 10 MAX 16 72.0 1940-10-16 1940's 65 - 74
4 1940 10 MAX 17 72.0 1940-10-17 1940's 65 - 74
year month TYPE day v date dec range
58367 2020 6 AVG 12 58.5 2020-06-12 2020's < 65
58368 2020 6 AVG 13 56.5 2020-06-13 2020's < 65
58369 2020 6 AVG 14 59.0 2020-06-14 2020's < 65
58370 2020 6 AVG 15 60.5 2020-06-15 2020's < 65
58371 2020 6 AVG 16 60.0 2020-06-16 2020's < 65

create precipitation only dataframe

In [32]:
# convert from str to float
precip = pdx[(pdx.TYPE == 'PRE')].copy().reset_index(drop=True)
In [33]:
precip.TYPE.unique()
Out[33]:
array(['PRE'], dtype=object)
In [34]:
display(precip.head())
display(precip.tail())
year month TYPE day v date dec
0 1940 10 PRE 13 0.01 1940-10-13 1940's
1 1940 10 PRE 16 0.00 1940-10-16 1940's
2 1940 10 PRE 17 0.13 1940-10-17 1940's
3 1940 10 PRE 18 0.00 1940-10-18 1940's
4 1940 10 PRE 20 0.14 1940-10-20 1940's
year month TYPE day v date dec
25700 2020 6 PRE 12 0.04 2020-06-12 2020's
25701 2020 6 PRE 13 0.21 2020-06-13 2020's
25702 2020 6 PRE 14 0.01 2020-06-14 2020's
25703 2020 6 PRE 15 0.35 2020-06-15 2020's
25704 2020 6 PRE 16 0.19 2020-06-16 2020's

Create max temperature dataframe

In [35]:
pdx_max = temp[temp.TYPE == 'MAX'].reset_index(drop=True).reset_index(drop=True)
pdx_max
Out[35]:
year month TYPE day v date dec range
0 1940 10 MAX 13 75.0 1940-10-13 1940's 75 - 84
1 1940 10 MAX 14 70.0 1940-10-14 1940's 65 - 74
2 1940 10 MAX 15 64.0 1940-10-15 1940's < 65
3 1940 10 MAX 16 72.0 1940-10-16 1940's 65 - 74
4 1940 10 MAX 17 72.0 1940-10-17 1940's 65 - 74
... ... ... ... ... ... ... ... ...
29097 2020 6 MAX 12 62.0 2020-06-12 2020's < 65
29098 2020 6 MAX 13 60.0 2020-06-13 2020's < 65
29099 2020 6 MAX 14 68.0 2020-06-14 2020's 65 - 74
29100 2020 6 MAX 15 65.0 2020-06-15 2020's 65 - 74
29101 2020 6 MAX 16 67.0 2020-06-16 2020's 65 - 74

29102 rows × 8 columns

Create January to May Max Temperature

In [36]:
pdx_max_jan_may = pdx_max[(pdx_max.date.dt.month >= 1) & (pdx_max.date.dt.month < 6)]

Groupby year and range

In [37]:
pdx_max_g = pdx_max_jan_may.groupby([pdx_max_jan_may.date.dt.year, 'range'])['v'].agg('count').reset_index(level=0)
In [38]:
display(pdx_max_g.head())
display(pdx_max_g.tail())
date v
range
< 65 1941 107
65 - 74 1941 34
75 - 84 1941 9
85 - 94 1941 1
>= 95 1941 0
date v
range
< 65 2020 116
65 - 74 2020 24
75 - 84 2020 9
85 - 94 2020 3
>= 95 2020 0

Plot MAX Temperature data

In [39]:
years = list(range(1950, 2031, 10))

with sns.axes_style("darkgrid"):
    for year in years:
        plt.figure()
        data = pdx_max_g[(pdx_max_g.date >= year - 10) & (pdx_max_g.date < year)]
        ax = sns.barplot(x=data.index, y=data.v, hue=data.date)
        
        plt.annotate('Prepared By: Trenton McKinney', xy=(3, 140.1), xytext=(3, 140.1), fontsize=8)
        
        for p in ax.patches:
            if p.get_height() > 0:
                ax.annotate(format(p.get_height(), '.0f'),
                            (p.get_x() + p.get_width() / 2., p.get_height()),
                            ha = 'center', va = 'center', fontsize=8,
                            xytext = (0, 10), textcoords = 'offset points')
        
        plt.ylim(0, 150)
        plt.ylabel('Days')
        plt.xlabel('High Temperatures °F')
        plt.title(f"Portland, OR\nJan - May High Temperature Days: {year-10}'s")

Create June Max Temperature

In [40]:
pdx_max_june = pdx_max[(pdx_max.date.dt.month == 6)]
pdx_max_june = pdx_max_june.groupby([pdx_max_june.date.dt.year, 'range'])['v'].agg('count').reset_index(level=0)
pdx_max_june
Out[40]:
date v
range
< 65 1941 1
65 - 74 1941 22
75 - 84 1941 6
85 - 94 1941 0
>= 95 1941 1
... ... ...
< 65 2020 5
65 - 74 2020 8
75 - 84 2020 3
85 - 94 2020 0
>= 95 2020 0

400 rows × 2 columns

Plot June

In [41]:
years = list(range(1950, 2031, 10))

with sns.axes_style("darkgrid"):
    for year in years:
        plt.figure()
        data = pdx_max_june[(pdx_max_june.date >= year - 10) & (pdx_max_june.date < year)]
        ax = sns.barplot(x=data.index, y=data.v, hue=data.date)
        
        plt.annotate('Prepared By: Trenton McKinney', xy=(0, 24.3), xytext=(0, 24.3), fontsize=8)
        
        for p in ax.patches:
            if p.get_height() > 0:
                ax.annotate(format(p.get_height(), '.0f'),
                            (p.get_x() + p.get_width() / 2., p.get_height()),
                            ha='center', va='center', fontsize=9,
                            xytext=(0, 10), textcoords='offset points')
        
        plt.ylim(0, 25)
        plt.ylabel('Days')
        plt.xlabel('High Temperatures °F')
        plt.title(f"Portland, OR\nJune High Temperature Days: {year-10}'s")

Resample max monthly mean

In [42]:
pdx_m_mean = pdx_max.set_index('date').resample('m').mean()
In [43]:
years = list(range(1950, 2031, 10))

with sns.axes_style("darkgrid"):
    for year in years:
        data = pdx_m_mean[(pdx_m_mean.index.year >= year - 10) & (pdx_m_mean.index.year < year)]
        ax = sns.lineplot(x=data.index, y=data.v)
    
    plt.annotate('Prepared By: Trenton McKinney', xy=(730120, 32), xytext=(730120, 32), fontsize=10)
    plt.xlabel('Year')
    plt.ylabel('Monthly Mean Max Temperature °F')
    plt.title(f"Portland, OR\nResampled Monthly Mean High Temperature")

Mean: Resample max yearly

In [44]:
pdx_y_mean = pdx_max[pdx_max.year > 1940].set_index('date').resample('y').mean()
# filtered out 1940 because it's a partial year (only 2 months)
In [45]:
years = list(range(1950, 2021, 10))

with sns.axes_style("darkgrid"):
    for year in years:
        data = pdx_y_mean[(pdx_y_mean.index.year >= year - 10) & (pdx_y_mean.index.year < year)]
        ax = sns.lineplot(x=data.index, y=data.v)

    plt.annotate('Prepared By: Trenton McKinney', xy=(730120, 58.7), xytext=(730120, 58.7), fontsize=9)
    plt.xlabel('Year')
    plt.ylabel('Yearly Mean Max Temperature °F')
    plt.title(f"Portland, OR\nResampled Yearly Mean High Temperature")

Mean: Groupby year & month

In [46]:
mym = pdx_max.groupby([pdx_max.date.dt.year, pdx_max.date.dt.month])['v'].mean().unstack(level=0)
mym.iloc[:, :5]
Out[46]:
date 1940 1941 1942 1943 1944
date
1 NaN 47.354839 39.774194 40.032258 45.516129
2 NaN 55.142857 49.178571 52.642857 49.034483
3 NaN 63.516129 55.645161 53.451613 54.741935
4 NaN 65.800000 62.066667 64.066667 59.900000
5 NaN 67.096774 65.129032 65.161290 68.354839
6 NaN 71.566667 70.400000 69.633333 71.766667
7 NaN 84.516129 80.225806 78.935484 80.161290
8 NaN 77.580645 80.548387 77.967742 78.000000
9 NaN 68.800000 76.266667 79.466667 77.800000
10 63.421053 62.516129 65.451613 62.483871 67.903226
11 49.100000 54.800000 50.366667 53.500000 51.566667
12 48.516129 47.677419 47.677419 46.322581 44.774194
In [47]:
ix = list(range(0, len(mym.columns), 10))

with sns.axes_style("darkgrid"):
    for i in ix:
        plt.figure()
        data = mym.iloc[:, i:i+10]
        sns.lineplot(data=data, markers=None, dashes=False)
        plt.annotate('Prepared By: Trenton McKinney', xy=(10, 31), xytext=(10, 31), fontsize=10)

        plt.xticks(np.arange(1, 13), calendar.month_name[1:13])
        plt.ylim(30, 100)
        plt.xlabel('Month')
        plt.ylabel('Mean Max Temperature °F')
        plt.title(f"Portland, OR\nMonthly Mean per Year High Temperature")
        plt.show()

Mean: Groupby decade & month

In [48]:
mymd = pdx_max.groupby([pdx_max.dec, pdx_max.date.dt.month])['v'].mean().unstack(level=0)
mymd
Out[48]:
dec 1940's 1950's 1960's 1970's 1980's 1990's 2000's 2010's 2020's
date
1 43.512545 43.570968 44.354839 44.251613 46.529032 47.293548 46.338710 47.080645 50.161290
2 50.279528 48.578014 51.547703 50.666667 50.685512 51.372340 51.250883 49.851064 51.862069
3 55.433692 52.438710 55.090323 55.641935 56.793548 57.154839 55.583871 56.374194 54.612903
4 61.785185 60.893333 58.986667 60.006667 62.040000 61.593333 61.106667 61.550000 64.933333
5 68.103943 67.077419 65.922581 67.622581 67.641935 68.064516 68.422581 69.377419 69.741935
6 71.870370 70.526667 73.546667 74.256667 73.776667 72.703333 74.020000 74.496667 68.375000
7 79.534050 78.200000 79.325806 80.893548 78.848387 80.987097 81.806452 81.667742 NaN
8 78.100358 77.558065 78.945161 79.812903 81.016129 81.332258 80.774194 83.232258 NaN
9 74.488889 74.030000 73.973333 74.473333 74.693333 77.416667 75.476667 75.636667 NaN
10 62.604027 63.096774 63.554839 64.122581 64.741935 63.945161 63.103226 64.348387 NaN
11 52.123333 51.580000 52.946667 52.043333 52.890000 53.080000 52.653333 53.190000 NaN
12 46.448387 47.096774 45.577419 46.477419 45.145161 46.219355 45.564516 46.245161 NaN
In [49]:
with sns.axes_style("darkgrid"):

    plt.figure()
    sns.lineplot(data=mymd, markers=None, dashes=False)
    plt.annotate('Prepared By: Trenton McKinney', xy=(10, 40.5), xytext=(10, 40.5), fontsize=10)
    plt.xticks(np.arange(1, 13), calendar.month_name[1:13])
    plt.ylim(40, 85)
    plt.xlabel('Month')
    plt.ylabel('Mean Max Temperature °F')
    plt.title(f"Portland, OR\nMonthly Mean Per Decade High Temperature")
    plt.show()

Precipitation

2020

In [50]:
# PRE: precipitation data only
pdx_20_lp = precip[precip.year == 2020]
pdx_20_lp.head()
Out[50]:
year month TYPE day v date dec
25558 2020 1 PRE 1 0.15 2020-01-01 2020's
25559 2020 1 PRE 2 0.04 2020-01-02 2020's
25560 2020 1 PRE 3 0.12 2020-01-03 2020's
25561 2020 1 PRE 4 0.16 2020-01-04 2020's
25562 2020 1 PRE 5 0.26 2020-01-05 2020's
In [51]:
plt.figure(figsize=(25, 10))
with sns.axes_style("darkgrid"):

    sns.barplot(pdx_20_lp.date, pdx_20_lp.v)

    plt.annotate('Prepared By: Trenton McKinney', xy=(130, 1.03), xytext=(130, 1.03), fontsize=8)
    ticks, labels = plt.xticks(rotation=90)
    labels = [label.get_text()[:10] for label in labels]
    plt.xticks(ticks=ticks, labels=labels)
    
    plt.ylabel('Precipitation (inches)')
    plt.xlabel('Date')
    plt.title(f"Portland, OR\n 2020: Jan - June 16\nPrecipitation Days")
In [52]:
with sns.axes_style("darkgrid"):

    sns.boxplot(pdx_20_lp.date.dt.month, pdx_20_lp.v)
    
    plt.xticks(np.arange(0, 6), calendar.month_name[1:7])

#     plt.annotate('Prepared By: Trenton McKinney', xy=(3, 0.9), xytext=(3, 0.9), fontsize=8)
    
    plt.ylabel('Precipitation (inches)')
    plt.xlabel('Month')
    plt.title(f"Portland, OR\n 2020: Jan - June 16\nPrecipitation")

1940 - 2020

In [53]:
pdx_precip = precip

Mean Monthly Precipitation per Decade: 1940 - 2020

In [54]:
pmymd = pdx_precip.groupby([pdx_precip.dec, pdx_precip.month])['v'].mean().unstack(level=0)
pmymd
Out[54]:
dec 1940's 1950's 1960's 1970's 1980's 1990's 2000's 2010's 2020's
month
1 0.151803 0.267302 0.192635 0.222917 0.171506 0.201728 0.179574 0.166740 0.261379
2 0.165325 0.178613 0.147683 0.157868 0.154016 0.186490 0.102874 0.164146 0.067391
3 0.133475 0.144317 0.129892 0.130000 0.141181 0.134588 0.125926 0.175720 0.093462
4 0.080318 0.081467 0.098492 0.090775 0.092022 0.119593 0.089638 0.114444 0.029259
5 0.093803 0.070230 0.078614 0.081019 0.077969 0.101828 0.080601 0.086403 0.081852
6 0.060498 0.080913 0.050906 0.040675 0.072293 0.069134 0.046955 0.066000 0.210667
7 0.022103 0.013214 0.015035 0.022088 0.029894 0.025338 0.011937 0.016531 NaN
8 0.024545 0.028845 0.041866 0.053310 0.023604 0.021927 0.025679 0.011835 NaN
9 0.075279 0.055358 0.054126 0.071423 0.074345 0.036739 0.045110 0.081954 NaN
10 0.139922 0.153713 0.111507 0.090288 0.080827 0.141245 0.093238 0.148723 NaN
11 0.222182 0.174604 0.214296 0.188856 0.208148 0.235055 0.176410 0.183063 NaN
12 0.206460 0.220373 0.236391 0.229962 0.196545 0.200111 0.203781 0.215867 NaN
In [55]:
with sns.axes_style("darkgrid"):

    plt.figure()
    sns.lineplot(data=pmymd, markers=None, dashes=False)
#     plt.annotate('Prepared By: Trenton McKinney', xy=(10, 0.06), xytext=(10, 0.06), fontsize=10)
    plt.xticks(np.arange(1, 13), calendar.month_name[1:13])

    plt.xlabel('Month')
    plt.ylabel('Mean Precipitation (inches)')
    plt.title(f"Portland, OR\nMonthly Mean Amount of Precipitation by Decade")
    plt.show()
In [56]:
pmymds = pmymd.stack().reset_index()

with sns.axes_style("darkgrid"):

    plt.figure(figsize=(20, 10))
    ax = sns.barplot('month', 0, data=pmymds, hue='dec')

    for p in ax.patches:
        if p.get_height() > 0:
            ax.annotate(format(p.get_height(), '.2f'),
                        (p.get_x() + p.get_width() / 2., p.get_height()),
                        ha='center', va='center', fontsize=9, rotation=90,
                        xytext=(0, 10), textcoords='offset points')
    
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
#     plt.annotate('Prepared By: Trenton McKinney', xy=(9, 0.351), xytext=(9, 0.351), fontsize=10)
    plt.xticks(np.arange(0, 12), calendar.month_name[1:13])
    plt.xlabel('Month')
    plt.ylabel('Mean Precipitation (inches)')
    plt.title(f"Portland, OR\nMonthly Mean Amount of Precipitation by Decade")
    plt.show()
  • Note the 2020's are only made up of 5.5 months of data
  • Other decades are the mean over 10 years.

Total Precipitation Days: 1940 - 2020

In [57]:
precip_days_year = pdx_precip[pdx_precip.v > 0].groupby('year', as_index=False)['year'].agg({'count': 'count'})

with sns.axes_style("darkgrid"):
    ax = sns.barplot(x='year', y='count', data=precip_days_year)
    plt.xticks(rotation=90)

    # plt.annotate('Prepared By: Trenton McKinney', xy=(3, 140.1), xytext=(3, 140.1), fontsize=8)

    for p in ax.patches:
        if p.get_height() > 0:
            ax.annotate(format(p.get_height(), '.0f'),
                        (p.get_x() + p.get_width() / 2., p.get_height()),
                        ha = 'center', va = 'center', fontsize=9, rotation=90,
                        xytext = (0, 10), textcoords = 'offset points')

    plt.ylabel('Days of Precipitation')
    plt.xlabel('Year')
    plt.title(f"Portland, OR\nPrecipitation Days Per Year")

    plt.show()
  • 1940 and 2020 do not have complete data for the year

Mean Monthly Precipitation: 1940 - 2020

In [58]:
precip_days_month = pdx_precip.groupby('month', as_index=False)['v'].agg({'mean': 'mean'})
ax = sns.barplot(x='month', y='mean', data=precip_days_month)
plt.xticks(rotation=90)

# plt.annotate('Prepared By: Trenton McKinney', xy=(3, 140.1), xytext=(3, 140.1), fontsize=8)
plt.xticks(np.arange(0, 12), calendar.month_name[1:13])

for p in ax.patches:
    if p.get_height() > 0:
        ax.annotate(format(p.get_height(), '.2f'),
                    (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha = 'center', va = 'center', fontsize=8,
                    xytext = (0, 10), textcoords = 'offset points')

plt.ylabel('Mean Precipitation (inches)')
plt.xlabel('Month')
plt.title(f"Portland, OR\nMean Precipitation Per Month: 1940 - 2020")

plt.show()

Total Jan - May Precipitation: 2018 - 2020

In [59]:
gb_18_20 = pdx_precip[(pdx_precip.year.isin([2018, 2019, 2020])) & (pdx_precip.month < 7)].groupby(['year', 'month']).agg({'v': sum}).reset_index()
In [60]:
ax = sns.barplot(x=gb_18_20.month, y=gb_18_20.v, hue=gb_18_20.year)
plt.xticks(np.arange(0, 6), calendar.month_name[1:7])

for p in ax.patches:
    if p.get_height() > 0:
        ax.annotate(format(p.get_height(), '.2f'),
                    (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha = 'center', va = 'center', fontsize=8,
                    xytext = (0, 10), textcoords = 'offset points')

plt.ylabel('Total Precipitation (inches)')
plt.xlabel('Month')
plt.title(f"Portland, OR\nTotal Monthly Precipitation\n(Jan - June): 2018 - 2020")
plt.show()

Monthly Total Precipitation

In [61]:
years = list(range(1950, 2031, 10))

with sns.axes_style("darkgrid"):
    for year in years:
        plt.figure()
        data = pdx_precip[(pdx_precip.year >= year - 10) & (pdx_precip.year < year)]
        data = data.groupby(['year', 'month']).agg({'v': sum}).reset_index()
        ax = sns.barplot(x=data.month, y=data.v, hue=data.year)
        
#         plt.annotate('Prepared By: Trenton McKinney', xy=(3, 140.1), xytext=(3, 140.1), fontsize=8)
        
        for p in ax.patches:
            if p.get_height() > 0:
                ax.annotate(format(p.get_height(), '.2f'),
                            (p.get_x() + p.get_width() / 2., p.get_height() + 0.2),
                            ha = 'center', va = 'center', fontsize=9, rotation=90,
                            xytext = (0, 10), textcoords = 'offset points')

        plt.xticks(np.arange(0, 12), calendar.month_name[1:13])
        plt.ylim(0, 16)
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        plt.ylabel('Total Rain (inches)')
        plt.xlabel('Month')
        plt.title(f"Portland, OR\nTotal Rain Per Month: {year-10}'s")

2011 - 2020

In [62]:
pdx_precip_11_00 = pdx_precip[(pdx_precip.year > 2010)]
pdx_precip_11_00
Out[62]:
year month TYPE day v date dec
22646 2011 1 PRE 1 0.00 2011-01-01 2010's
22647 2011 1 PRE 2 0.00 2011-01-02 2010's
22648 2011 1 PRE 3 0.00 2011-01-03 2010's
22649 2011 1 PRE 4 0.00 2011-01-04 2010's
22650 2011 1 PRE 5 0.37 2011-01-05 2010's
... ... ... ... ... ... ... ...
25700 2020 6 PRE 12 0.04 2020-06-12 2020's
25701 2020 6 PRE 13 0.21 2020-06-13 2020's
25702 2020 6 PRE 14 0.01 2020-06-14 2020's
25703 2020 6 PRE 15 0.35 2020-06-15 2020's
25704 2020 6 PRE 16 0.19 2020-06-16 2020's

3059 rows × 7 columns

In [63]:
with sns.axes_style("darkgrid"):

    sns.boxplot(pdx_precip_11_00.date.dt.year, pdx_precip_11_00.v, color='lightgray')
    sns.swarmplot(pdx_precip_11_00.date.dt.year, pdx_precip_11_00.v, size=2.5)
    
    plt.annotate('Prepared By: Trenton McKinney', xy=(7, 2.51), xytext=(7, 2.51), fontsize=8)
    
    plt.ylabel('Precipitation (inches)')
    plt.xlabel('Year')
    plt.title(f"Portland, OR\n 2011 - 2020 Amount of Precipitation")