import pandas as pd
from pandas.plotting import register_matplotlib_converters
import matplotlib.pyplot as plt
import seaborn as sns
from pprint import pprint as pp
from pathlib import Path
import matplotlib.dates as mdates
sns.set()
register_matplotlib_converters()
# %matplotlib inline
pandas options
pd.set_option('max_columns', 200)
pd.set_option('max_rows', 300)
pd.set_option('display.expand_frame_repr', True)
Data
data_dir_path = Path.cwd()
skillz_data = data_dir_path / 'Skillz Analytics H2H Homework Data_v0.4.xlsx'
skillz_df = pd.read_excel(skillz_data, '1. Aggregate')
skillz_df.head()
skillz_df.info()
skillz_df.set_index('date', inplace=True)
skillz_df.index = skillz_df.index.to_period('D')
numeric_columns = skillz_df.columns[1:]
skillz_7d_control = skillz_df[numeric_columns][skillz_df.experiment_group == 'Control'].rolling(7, center=True).mean()
skillz_7d_test = skillz_df[numeric_columns][skillz_df.experiment_group == 'Test'].rolling(7, center=True).mean()
skillz_7d_control.head(10)
skillz_7d_test.head(10)
skillz_weekly_control = skillz_df[numeric_columns][skillz_df.experiment_group == 'Control'].resample('W').mean()
skillz_weekly_test = skillz_df[numeric_columns][skillz_df.experiment_group == 'Test'].resample('W').mean()
skillz_weekly_control
skillz_weekly_test
df_columns = skillz_df.columns[1:]
fix, axes = plt.subplots(8, 1, figsize=(15, 22), sharex=True)
for name, ax in zip(df_columns, axes):
sns.barplot(data=skillz_df, x=skillz_df.index, y=name, ax=ax, hue='experiment_group')
plt.xticks(rotation=90)
weeks = mdates.WeekdayLocator()
fix, axes = plt.subplots(8, 1, figsize=(15, 60), sharex=True)
for name, ax in zip(df_columns, axes):
ax.plot(skillz_df[name][skillz_df.experiment_group == 'Control'], marker='.', linestyle='-', linewidth=0.5, label='Control: Daily')
ax.plot(skillz_weekly_control[name], marker='o', markersize=8, linestyle='-', label='Control: Weekly Mean Resample')
ax.plot(skillz_7d_control[name], marker='.', markersize=5, linestyle='-', label='Control: 7-Day Rolling Mean')
ax.plot(skillz_df[name][skillz_df.experiment_group == 'Test'], marker='.', linestyle='-', linewidth=0.5, label='Test: Daily')
ax.plot(skillz_weekly_test[name], marker='o', markersize=8, linestyle='-', label='Test: Weekly Mean Resample')
ax.plot(skillz_7d_test[name], marker='.', markersize=5, linestyle='-', label='Test: 7-Day Rolling Mean')
plt.xticks(rotation=90)
ax.set_ylabel(name)
ax.xaxis.set_major_locator(weeks)
ax.legend()
plt.subplots(figsize=(7, 7))
sns.scatterplot(skillz_df.cash_daily_active_users, skillz_df.entry_fees, hue=skillz_df.experiment_group)
plt.show()
plt.subplots(figsize=(7, 7))
sns.scatterplot(skillz_df.entry_fees, skillz_df.cash_games, hue=skillz_df.experiment_group)
plt.show()
plt.subplots(figsize=(7, 7))
sns.scatterplot(skillz_df.deposits, skillz_df.cash_games, hue=skillz_df.experiment_group)
plt.show()
plt.subplots(figsize=(7, 7))
sns.scatterplot(skillz_df.new_depositors, skillz_df.deposits, hue=skillz_df.experiment_group)
plt.show()
plt.subplots(figsize=(7, 7))
sns.scatterplot(skillz_df.z_daily_active_users, skillz_df.z_games, hue=skillz_df.experiment_group)
plt.show()
The provided data is inadequate and associated questions aren't relavant for fraud analytics