-
Notifications
You must be signed in to change notification settings - Fork 1
/
crossword_app.py
104 lines (81 loc) · 3.29 KB
/
crossword_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#PANDAS WORK
df = pd.read_csv('data/crossword.csv') #read in data
recent_week = df.tail(1)['Week'].item() #save recent week as string
df['Week'] = pd.to_datetime(df['Week']) #set field as datetime
#indicator for solving after Thurs
df['late_week'] = ((df.Thursday == 1) | (df.Friday == 1) | (df.Saturday) | (df.Sunday)).astype(int)
#chart of total stats by day of week
days = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday','late_week']
df_totals = df[days].describe().T[['count','mean']]
df_totals['total correct'] = (df_totals['count'] * df_totals['mean']).astype(int)
df_totals = df_totals[['total correct','mean']]
df_totals['percent correct'] = df_totals['mean']
#get cumulative totals and running average
df_cumul = df[['WeekNum','Week']].copy()
df_cumul[days] = df[days].cumsum()
df_running_avg = df_cumul[['WeekNum','Week']].copy()
for d in days:
df_running_avg[d] = df_cumul[d]/df_cumul['WeekNum']
#get rolling averages
df_20 = df[days].rolling(20).mean()
df_52 = df[days].rolling(52).mean()
#make 20 week rolling average into a dataframe to compare current week to week - 20
df_L20 = df_20.iloc[[df_20.shape[0]-21, df_20.shape[0]-1]].T
df_L20['L20'] = df_L20[df_20.shape[0]-1]
df_L20['Comp'] = df_L20[df_20.shape[0]-21]
df_L20['percent_diff'] = df_L20['L20']/df_L20['Comp'] - 1
#get active streaks
streak_dict = {}
for day in df.columns[2:]:
df_temp = pd.DataFrame()
df_temp['Current'] = df[day]
df_temp['shifted'] = df[day].shift(1)
df_temp['StartStreak'] = df_temp['Current'] != df_temp['shifted']
df_temp['StreakId'] = df_temp['StartStreak'].cumsum()
if df_temp['Current'][df.shape[0]-1] == 1: #if streak is active
streak_dict[day] = df_temp[df_temp['StreakId'] == df_temp['StreakId'].max()].shape[0]
elif df_temp['Current'][df.shape[0]-1] == 0: #if streak is inactive
streak_dict[day] = 0
else:
print('Error')
#STREAMLIT WORK
st.title('Crossword Stats')
st.write('Check out my progress on the NY Times Crossword Puzzle!')
cols = st.columns(2)
with cols[0]:
st.metric('Total Weeks Tracked',df.shape[0])
with cols[1]:
st.metric('Most Recent Week',recent_week)
st.table(round(df_totals[['total correct','percent correct']].T,2))
st.header('20 Week Moving Averages')
cols = st.columns(4)
for i in range(4):
with cols[i]:
st.metric(days[i],"{:.0%}".format(df_L20.iloc[i]['L20']),"{:.0%}".format(df_L20.iloc[i]['percent_diff']))
cols = st.columns(4)
for i in range(4):
with cols[i]:
st.metric(days[i+4],"{:.0%}".format(df_L20.iloc[i+4]['L20']),"{:.0%}".format(df_L20.iloc[i+4]['percent_diff']))
st.header('Trends Over Time')
day = st.selectbox('Pick Weekday',days)
fig, ax = plt.subplots()
ax.plot(df.Week, df_running_avg[day], label='Running Average')
ax.plot(df.Week, df_20[day], label='L20 Average')
ax.plot(df.Week, df_52[day], label='L52 Average')
ax.set_ylim(0,1.1)
ax.legend(loc='best',prop={'size': 6})
plt.xticks(rotation=90)
st.pyplot(fig)
st.header('Active Streaks')
cols = st.columns(4)
for i in range(4):
with cols[i]:
st.metric(days[i],streak_dict[days[i]])
cols = st.columns(4)
for i in range(4):
with cols[i]:
st.metric(days[i+4],streak_dict[days[i+4]])