import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

sys.path.append('./utils')

# see https://github.com/dangeles/dangeles.github.io/blob/master/jupyter/utils/{covid_utils, cfr}.py
import covid_utils as cv
import cfr

df = cfr.fetch_data()


smooth = cfr.smooth_active(df, level=3)  # wavelet reconstruction, per state
pca = cfr.pca_plot(smooth, 'CasesPerPeriod', n_comps=4, plot=False)
groups = cfr.partition(pca, smooth)  # leiden clustering
fig, ax = cfr.plot_partitions(smooth, 'CasesPerPeriod', groups,
                              pivot_kwargs={'transform': lambda x: x})
fig.suptitle('Weekly Recorded Cases', fontsize=20)
ax[0].set_ylabel('Daily Cases Per Million Inhabitants')
plt.tight_layout()

/Users/davidangeles/opt/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py:4468: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downcast=downcast,


pca = cfr.pca_plot(smooth, 'DeathsPerPeriod', n_comps=4, plot=False)
groups = cfr.partition(pca, smooth)
fig, ax = cfr.plot_partitions(smooth, 'DeathsPerPeriod', groups,
                              pivot_kwargs={'transform': lambda x: x})
fig.suptitle('Weekly Recorded Deaths', fontsize=20)
ax[0].set_ylabel('Daily Deaths Per Million Inhabitants')
plt.tight_layout()


# Reff values need to be smoothed out a bit, especially because at early times
# testing led to overly high estimates of R0 due to a lack of randomized tests
smooth.Reff = smooth.Reff.replace(np.inf, np.nan)
reff = smooth.Reff.values.copy()
reff[reff > 5]= 5
reff[reff < .6] = 0.6

smooth['Reff'] = reff
color = {True: 'tab:red', False: 'tab:blue'}

smooth['Spreading'] = (smooth.Reff > 1)#.map(color)
sns.scatterplot('date', 'Reff', hue='Spreading', data=smooth.reset_index(), units='state',
                palette=color, s=5, estimator=None, alpha=0.3)
_ = plt.xticks(rotation=45)

/Users/davidangeles/opt/anaconda3/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning


# only keep dates after the NYC / Boston waves:
pca = cfr.pca_plot(smooth.loc[smooth.index > dt.datetime(2020, 5, 1)], 'Reff', n_comps=8, plot=False)
groups = cfr.partition(pca, smooth.loc[smooth.index > dt.datetime(2020, 5, 1)])
fig, ax = cfr.plot_partitions(smooth.loc[smooth.index > dt.datetime(2020, 5, 1)], 'Reff', groups,
                              pivot_kwargs={'transform': lambda x: x})
fig.suptitle('Weekly Recorded Reff', fontsize=30)

for ai in ax:
    ai.axhline(1, color='black', ls='--')
    ai.set_ylim(0, 3)

plt.tight_layout()


colors = {0: 'red', 1: 'blue', 2: 'black', 3: 'yellow', 4: 'orange'}

dp = []
for s, g in smooth.groupby('state'):
    group = groups.loc[s]
    # find location of delta peak:
    reff_delta = g.Reff.loc[g.index > dt.datetime(2021, 5, 1)].max()

    # find location of cov19 original strain peak:
    original = (g.index < dt.datetime(2021, 5, 1)) &\
               (g.index > dt.datetime(2020, 7, 1))
    reff_cov19 = g.Reff.loc[original].max()

    # store:
    dp += [(group, s, reff_delta, 'delta')]
    dp += [(group, s, reff_cov19, 'cov19')]

delta = pd.DataFrame(dp, columns=['group', 'state', 'Reff', 'strain'])
delta.group = delta.group.astype('category')


sns.boxplot(x='Reff', y='strain', data=delta, color='gray')
sns.stripplot(x='Reff', y='strain', data=delta, hue='group',
              jitter=False, dodge=True, s=10)
plt.legend(loc=(1, .2))
plt.xlabel('Maximal Recorded Reff')
plt.xlim(1, 5.1)

mean_Reff = delta.groupby(['strain', 'group']).Reff.median().groupby('strain').apply(np.mean)
print(mean_Reff)

strain
cov19    1.667626
delta    1.901461
Name: Reff, dtype: float64


worse = (mean_Reff.loc['delta'] / mean_Reff.loc['cov19'] - 1) * 100
m = 'Delta is {0:.3g}% more infectious than the original COV-19: R for Delta is {1:.2g}, R for SARS-Cov-2 is: {2:.2g} '
print(m.format(worse, mean_Reff['delta'], mean_Reff['cov19']))

Delta is 14% more infectious than the original COV-19: R for Delta is 1.9, R for SARS-Cov-2 is: 1.7


f = .5
worse = (mean_Reff.loc['delta'] / f / mean_Reff.loc['cov19'] - 1) * 100
print(m.format(worse, mean_Reff['delta'] / f, mean_Reff['cov19']))

f = .6
worse = (mean_Reff.loc['delta'] / f / mean_Reff.loc['cov19'] - 1) * 100
print(m.format(worse, mean_Reff['delta'] / f, mean_Reff['cov19']))

Delta is 128% more infectious than the original COV-19: R for Delta is 3.8, R for SARS-Cov-2 is: 1.7 
Delta is 90% more infectious than the original COV-19: R for Delta is 3.2, R for SARS-Cov-2 is: 1.7


delta_R0 = mean_Reff.loc['delta'] / f / (1 - .6)
print('Delta R_0: {0:.2g}'.format(delta_R0))

Delta R_0: 7.9

Wavelets, Delta and COV19, an exercise in time series analysis¶

Smoothed case estimates¶

Smoothed Death Estimates¶

Rough $R_{eff}$ estimates¶

$R_{eff}$ estimates, clustered¶

Maximal $R_{eff}$ for Delta and COV19¶

How much worse is Delta than the 'original' COV-19?¶