import datetime as dt
today = dt.datetime.now() 
print('This notebook was last updated on', today.strftime('%A %B %d, %Y at %H:%M'))

This notebook was last updated on Thursday June 09, 2022 at 12:15


import sys
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
from matplotlib import rc
from matplotlib import ticker
from matplotlib import dates as mdates
from matplotlib.dates import DateFormatter

rc('text', usetex=True)
rc('text.latex', preamble=r'\usepackage{cmbright}')
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})

%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style("dark")

mpl.rcParams['xtick.labelsize'] = 16 
mpl.rcParams['ytick.labelsize'] = 16 
mpl.rcParams['legend.fontsize'] = 14

sys.path.append('./utils')

# see https://github.com/dangeles/dangeles.github.io/blob/master/jupyter/utils/covid_utils.py
import covid_utils as cv


# load into a dataframe:
pop = pd.read_excel('../data/nst-est2019-01.xlsx', comment='#', header=1)

# fetch NYT data:
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv'
df = pd.read_csv(url, usecols=[0, 1, 3, 4], parse_dates=['date'], squeeze=True)

pop.columns = np.append(np.array(['state']), pop.columns[1:].values)
pop.state = pop.state.str.strip('.')

# merge dfs:
df = df.merge(pop, left_on='state', right_on='state')

# calculate per population numbers:
df['normedPopCases'] = df.cases/ df[2019]
df['normedPopDeaths'] = df.deaths / df[2019]

# find max:
cases = df.groupby('state').cases.apply(max).sum()
death_toll = df.groupby('state').deaths.apply(max).sum()

print('Cases in the US at last update: {0:.2f}'.format(cases / 10 ** 6), 'million')
print('Death toll in the US at last update: {0:.0f} thousand'.format(death_toll / 10 ** 3))
print('Daily deaths: {0:.0f}'.format(df.groupby(['date']).deaths.apply(np.sum).diff().values[-1]))

# predicted deaths based on cases and a 1 in 55 CFR
print('Predicted deaths tomorrow: {0:.0f}'.format(
        df.groupby(['date']).cases.apply(np.sum).diff().values[-20:-8].mean() / 55))
print('Predicted deaths in 2 weeks: {0:.0f}'.format(
        df.groupby(['date']).cases.apply(np.sum).diff().values[-8].mean() / 55))

# calculate worst off states right now:
c_ = []
for n, g in df[df.cases > 10 ** 3].groupby('state'):
    x = (g.date - g.date.min()) / dt.timedelta(days=1)

    if len(g) < 15:
        continue
    y = g.cases.rolling(window=10, win_type='gaussian',
                          center=True).mean(std=2).round()
    y = g.normedPopDeaths.rolling(window=10, win_type='gaussian',
                                  center=True).mean(std=2).round()
#     y = y.diff()
    c_ += [[n, y.dropna().values[-1]]]

worst = pd.DataFrame(c_, columns=['state', 'new_cases'])
worst.sort_values('new_cases', inplace=True)
worst = worst.state.values[-4:]
print('Worst states right now:', worst)

Cases in the US at last update: 85.05 million
Death toll in the US at last update: 1009 thousand
Daily deaths: 1044
Predicted deaths tomorrow: 1744
Predicted deaths in 2 weeks: 3976
Worst states right now: ['Michigan' 'Minnesota' 'Wisconsin' 'Wyoming']


us  = df.groupby('date')[['cases', 'deaths']].sum().reset_index()
us = us[us.date >= us[us.deaths > 10].date.min()]
us['RefTime'] = (us.date - us.date.min()) / dt.timedelta(days=1)

fig, ax = plt.subplots(ncols=3, sharex=True, figsize=(16, 4))

ax[0].plot(us.RefTime, us.cases, color='black', label='cases')
ax[0].plot(us.RefTime, us.deaths, color='red', label='deaths')
ax[1].plot(us.RefTime, us.cases.diff().rolling(win_type='exponential',
                                               window=14, center=True).mean(tau=10) / 10 ** 3,
          color='black', label='cases per 1000')
ax[2].plot(us.RefTime, us.deaths.diff().rolling(win_type='exponential',
                                                window=14, center=True).mean(tau=10),
          color='red', label='deaths')

ax[1].scatter(us.RefTime, np.gradient(us.cases) / 10 ** 3,
              color='black', label='cases (raw)', alpha=0.1)
ax[2].scatter(us.RefTime, np.gradient(us.deaths),
              color='red', label='deaths (raw)', alpha=0.1)
ax[2].axhline(np.max(np.gradient(us.deaths)), ls='--', color='blue',
              label='Max Daily Deaths = {0:.0f}'.format(np.max(np.gradient(us.deaths))))
ax[2].axhline(np.gradient(us.deaths)[-1], ls='-.', color='red',
              label='Daily Deaths = {0:.0f}'.format(np.gradient(us.deaths)[-1]))

ax[0].set_yscale('log')
# ax[1].set_yscale('log')

ax[0].set_ylabel('Number')
ax[1].set_ylabel('Number Cases per 1000 / Day')
ax[2].set_ylabel('Number Deaths / Day')
fig.text(0.5, -0.04, 'Days since first 10 US deaths',
         ha='center', fontsize=18)
plt.legend(loc=(1, .4))
plt.tight_layout()


fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(10, 7))
ax[0, :] = cv.plot(ax[0], df, 'cases', 'normedPopCases', n1=1, alpha=0.2 )
ax[1, :] = cv.plot(ax[1], df, 'deaths', 'normedPopDeaths',
                   1, 10 ** -6, ylab='Death', alpha=0.2)

for ai in ax:
    for aij in ai:
        aij.set_yscale('log')
_ = ax[0, 1].legend(loc=(1, 0))

ax[1, 1].set_ylim(1, 5 * 10**3)
plt.tight_layout()


fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(12, 7), constrained_layout=True)

fig.suptitle('Rate of Change of COVID19', fontsize=20)
ax[0, :] = cv.plot(ax[0], df, 'cases', 'normedPopCases',  n1=1, gradient=True, window=8)
ax[1, :] = cv.plot(ax[1], df, 'deaths', 'normedPopDeaths',
                   1, 10 ** -8, ylab='Death', gradient=True, window=8)

ax[1,1].set_ylim(0, 60)
_ = ax[0, 1].legend(loc=(1, 0))


for n, g in df.groupby('state'):
    x = 1000 * g[g.cases > 50].normedPopCases.rolling(window=10, win_type='gaussian', center=True).mean(std=2)
    if n not in worst:
        plt.plot(x, np.gradient(x), color='gray', lw=1, alpha=0.3)
    else:
        plt.plot(x, np.gradient(x), label=n, lw=3, zorder=np.inf)

plt.xscale('log')
plt.yscale('log')
plt.ylim(10 ** -3, 10)
plt.legend(loc=(1, 0.5))
plt.xlabel('Total Cases Per 1000 People')
plt.ylabel('Cases Per 1000 People / Day')

Text(0, 0.5, 'Cases Per 1000 People / Day')


for n, g in df.groupby('state'):
    x = 1000 * g[g.deaths > 5].normedPopDeaths.rolling(window=10, win_type='gaussian', center=True).mean(std=2)
    if n not in worst:
        plt.plot(x, np.gradient(x), color='gray', lw=1, alpha=0.3)
    else:
        plt.plot(x, np.gradient(x), label=n, lw=3, zorder=np.inf)

plt.ylim(10**-4, .1)
plt.xscale('log')
plt.yscale('log')
plt.legend(loc=(1, 0.5))
plt.xlabel('Total Deaths Per 1000 People')
plt.ylabel('Deaths Per 1000 People / Day')

Text(0, 0.5, 'Deaths Per 1000 People / Day')


df['SmoothCases'] = df.groupby('state').normedPopCases.apply(lambda x:
                        x.rolling(window=3, win_type='gaussian', center=True).mean(std=2))
df['GradCases'] = df.groupby('state').SmoothCases.transform(np.gradient)
tmp = df.loc[df.groupby('state')['GradCases'].idxmax()].sort_values('date')

plt.plot(tmp.date, 10000 * tmp.GradCases, 'o', color='black')
date_form = DateFormatter("%b-%d")
plt.gca().xaxis.set_major_formatter(date_form)
plt.xticks(rotation=45)
_ = plt.ylabel('Cases per 10k People / Day')
_ = plt.xlabel('Date of Peak to date')


fig, ax = plt.subplots(figsize=(8,13))
sns.stripplot(x='date', y='state', data=tmp.sort_values('date'), size=10)
ax.xaxis.set_major_formatter(date_form)
_ = plt.xticks(rotation=45)

affected = tmp[tmp.date < dt.datetime.now() - dt.timedelta(days=8)]
print('Proportion of population in peaking states: {0:.2g}%'.format(affected[2019].sum() / tmp[2019].sum() * 100))
print('Number of states peaking: {0}'.format(len(affected)))

Proportion of population in peaking states: 1e+02%
Number of states peaking: 52


fig, ax = plt.subplots(ncols=3, figsize=(16, 4.5))

ax[0] = cv.plot_cases_vs_deaths(df, xcol='cases', ycol='deaths', ax=ax[0])
ax[1] = cv.plot_cases_vs_deaths(df, CFR=True, ax=ax[1])
ax[2] = cv.plot_cases_vs_deaths(df, xcol='normedPopCases', ycol='normedPopDeaths', CFR=True, ax=ax[2])
_ = ax[2].legend(loc=(1, .2))
ax[2].set_xlabel('Cases / State Population')
plt.tight_layout()


cfr = df[df.deaths > 10].groupby('state').apply(lambda x: x.deaths / x.cases, ).reset_index()
cfr.columns = ['state', 'level', 'CFR']
cfr_max_curr = cfr.groupby('state').CFR.agg([np.min, np.max, lambda x: x.tail(1)]).reset_index()
cfr_max_curr.columns = ['state', 'minCFR', 'maxCFR', 'currentCFR']
cfr_max_curr.sort_values('currentCFR', inplace=True)

fig, ax = plt.subplots(figsize=(20,4))
sns.stripplot(y='maxCFR', x='state', data=cfr_max_curr, color='red', s=8, jitter=False) 
sns.stripplot(y='minCFR', x='state', data=cfr_max_curr, color='blue', s=8, jitter=False) 
sns.stripplot(y='currentCFR', x='state', data=cfr_max_curr, color='black', s=8, jitter=False)
plt.errorbar(y=(cfr_max_curr.maxCFR + cfr_max_curr.currentCFR) / 2, x=cfr_max_curr.state,
             yerr=(cfr_max_curr.maxCFR - cfr_max_curr.currentCFR) / 2, fmt='none', color='red')
_ = plt.errorbar(y=(cfr_max_curr.minCFR + cfr_max_curr.currentCFR) / 2, x=cfr_max_curr.state,
                 yerr=(cfr_max_curr.minCFR - cfr_max_curr.currentCFR) / 2, fmt='none', color='blue')

_ =plt.xticks(rotation=45, ha='right')


fig, ax = plt.subplots(figsize=(8,13))
tmp = df.groupby('state').normedPopDeaths.apply(np.max)
tmp = tmp.sort_values() 
sns.stripplot(x=tmp.sort_values() * 10 ** 3, y=tmp.index, size=10)
plt.axvline(1, ls='--')
plt.xlim(10**-3, 10**2)
plt.xlabel('Deaths per Thousand People')
plt.xscale('log')


maxR = cv.plot_rt(worst, df, df.cases > 10)
_ = plt.figtext(.5, .045, 'Days since 10 cases registered',
                fontsize=21, ha='center')

./utils/covid_utils.py:289: RuntimeWarning: invalid value encountered in subtract
  p = np.exp(p - logsumexp(p)) * p_prev @ prior


rt = np.array([])
s = np.array([], dtype=object)
for n, g in df[df.date > dt.datetime.now() - dt.timedelta(days=40)].groupby('state'):
    gp = g.copy()
    gp['newCases'] = gp.cases.diff().rolling(window=8,
                                             win_type='gaussian',
                                             center=True).mean(std=2).round()
    gp['newDeaths'] = gp.deaths.diff().rolling(window=8,
                                               win_type='gaussian',
                                               center=True).mean(std=2).round()

    maxR = cv.r_calc(gp, gamma=1/10)
    seven_day_avg = np.mean(maxR[maxR > 0][-7:])
    s = np.append(n, s)
    rt = np.append(seven_day_avg, rt)


fig, ax = plt.subplots(figsize=(20, 5))
rdf = pd.DataFrame([s, rt]).transpose()
rdf.columns = ['state', 'r']
rdf['color'] = rdf.r < 1
sns.stripplot(x='state', y='r', hue='color', data=rdf.sort_values('r'), size=10)
plt.legend([])
plt.axhline(1, ls='-.', color='black')
_ = plt.ylabel(r'$R_t$ averaged for last 7 days')
_ = plt.xticks(rotation=45, ha='right')


df['newCases'] = df.groupby('state').cases.diff()
tmp = df.groupby('state').last().merge(rdf, left_on='state', right_on='state')

active = (df.groupby('state').cases.last() - df.groupby('state').cases.nth(-10)).reset_index()
active['active'] = active.cases
active['activeNormed'] = (df.groupby('state').normedPopCases.last() -
                          df.groupby('state').normedPopCases.nth(-10)).values

tmp = tmp.merge(active[['state', 'active', 'activeNormed']], left_on='state', right_on='state')
tmp.r = tmp.r.astype(np.float64)

plt.scatter(tmp.newCases, tmp.r, s=100 * np.sqrt(tmp.normedPopCases / tmp.normedPopCases.max()), alpha=0.5)

plt.xlim(1, 10 ** 5)
plt.axhline(1, ls='--', color='black')
plt.xscale('log')

plt.xlabel('New Cases (non-smoothed)')
plt.ylabel('Average $R_t$')

Text(0, 0.5, 'Average $R_t$')


plt.scatter(tmp.newCases, tmp.r, s=100 * np.sqrt(tmp.activeNormed / tmp.activeNormed.max()), alpha=0.5)

plt.xlim(1, 10 ** 5)
plt.axhline(1, ls='--', color='black')
plt.xscale('log')

plt.xlabel('New Cases (non-smoothed)')
plt.ylabel('Average $R_t$')

Text(0, 0.5, 'Average $R_t$')


fig, ax = plt.subplots(figsize=(20, 10), nrows=2, sharex=True)
sns.stripplot(y='active', x='state', data=tmp.sort_values('activeNormed'), ax=ax[0], size=10)
sns.stripplot(y='activeNormed', x='state', data=tmp.sort_values('activeNormed'), ax=ax[1], size=10)
ax[0].set_yscale('log')
ax[1].set_yscale('log')

print('{0:.2g} * E5'.format(tmp.active.sum() / 10 ** 5))
_ = plt.xticks(rotation=45, ha='right')

12 * E5


fig, ax = plt.subplots(figsize=(20, 10), nrows=2, sharex=True)
sns.stripplot(y='active', x='state', data=tmp.sort_values('activeNormed'), ax=ax[0], size=10)
sns.stripplot(y='activeNormed', x='state', data=tmp.sort_values('activeNormed'), ax=ax[1], size=10)
ax[1].set_ylim(10 ** -5, tmp.activeNormed.max() * 1.1)
print('{0:.2g} * E5'.format(tmp.active.sum() / 10 ** 5))
_ = plt.xticks(rotation=45, ha='right')

12 * E5

The COVID-19 pandemic¶

Loading the data¶

COVID in the total US¶

Epidemiological curves of COVID-19¶

Are the curves flattening?¶

When are the peaks happening?¶

Case Fatality Rates¶

Total CFR by state¶

What states have the highest and lowest # of deaths per capita?¶

The viral reproductive viral, $R_t$, through time¶

What are the states with the best or worst $R_t$?¶