Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
5cd1644
gk fork, adding preprocessing for own data, predict script
wasd12345 Jun 26, 2018
aeabb2a
simple imputation
wasd12345 Jun 26, 2018
21731ed
making features from our data
wasd12345 Jun 27, 2018
6b48e09
sampling_period daily weekly etc
wasd12345 Jun 28, 2018
2a67dd1
py36 fixes
wasd12345 Jun 28, 2018
a681e1e
incorporating my modified features
wasd12345 Jun 29, 2018
0e7b628
continue putting in my features
wasd12345 Jul 2, 2018
ad81613
working with few example our features
wasd12345 Jul 2, 2018
858ff2a
working w our data and features
wasd12345 Jul 3, 2018
983fb9c
finished weekly aggregation'
wasd12345 Jul 5, 2018
1472457
removed dummy tensors, now only return exact needed
wasd12345 Jul 8, 2018
a395049
input pipe cleanup
wasd12345 Jul 9, 2018
a9066ae
input pipe features, arturius and full features all samplingperiods w…
wasd12345 Jul 9, 2018
06eacda
predict script add arguments needed; trainer minor updates
wasd12345 Jul 9, 2018
c1c6f37
--
wasd12345 Jul 10, 2018
c68ad1e
debugging, cleanup, adding other optimizers
wasd12345 Jul 12, 2018
e33979e
doing median of last 4 weeks imputation for daily sampled data
wasd12345 Jul 16, 2018
e555ee1
changed preprocessing to NANs instead of -1 which fixed the SMAPE 2 i…
wasd12345 Jul 16, 2018
ea234ba
future predictions on our data working
wasd12345 Jul 16, 2018
3b53e6a
multirun train-val bash script
wasd12345 Jul 19, 2018
9613294
added year-2010 / (2020-2010) as feature
wasd12345 Jul 19, 2018
a6ededd
added encoder-decoder context to every decoder timestep
wasd12345 Jul 19, 2018
d5277fd
misc. and starting on random series sizes
wasd12345 Jul 23, 2018
0a67384
starting on holidays
wasd12345 Jul 24, 2018
984a848
finished basic holiday encoding except thxgiving, easter
wasd12345 Jul 24, 2018
1a23d99
finished K-step lookback - moderate SMAPE improvement
wasd12345 Jul 25, 2018
046ba10
starting on SMAPE heatmaps
wasd12345 Jul 26, 2018
d3d6757
completely different train test split data
wasd12345 Jul 27, 2018
54fab19
doing all (t,v,b) predictions
wasd12345 Jul 28, 2018
dd37b80
working predictions by t,v, backoffset
wasd12345 Jul 31, 2018
d63c3e4
saving out smapes, bias
wasd12345 Jul 31, 2018
697e98f
performance heatmaps for 1 time test validation done
wasd12345 Aug 1, 2018
f476338
fixed kaggle's NAN issue from train_complete_threshold rounding
wasd12345 Aug 2, 2018
f387474
all working performance heatmaps
wasd12345 Aug 2, 2018
3e12a05
fixed Kaggle features future leaking back problem
wasd12345 Aug 7, 2018
d62b9de
4 chunk backtesting
wasd12345 Aug 10, 2018
e3d6ea2
working MLP postprocessor, helps SMAPE by >4pct
wasd12345 Aug 13, 2018
6cb6854
fixed heatmap issue for backtest mode for history+horizon size
wasd12345 Aug 13, 2018
352bdb7
SMAPE + K*quantile_loss
wasd12345 Aug 15, 2018
fee641d
when doing wuantiles, only use ave pinball losses, ignroe SMAPE
wasd12345 Aug 15, 2018
df1b288
working direct quantile forecaster
wasd12345 Aug 17, 2018
ccc7710
multistep encoder predicts differently
wasd12345 Aug 23, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,11 @@ data/*.zip
data/submission.csv.gz
!data/2017-08-15_2017-09-11.csv.zip

data/*
*/.DS_STORE
.DS_STORE
images/
ex_figs/

*.png
output/
92 changes: 92 additions & 0 deletions Adam_HD_optimizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#Copy paste from https://github.com/zadaianchuk/HyperGradientDescent/blob/master/Adam_HD_optimizer.py
#Hypergradient Descent Optimizer




from __future__ import division

import tensorflow as tf

class AdamHDOptimizer(tf.train.GradientDescentOptimizer):

def __init__(self, alpha_0, beta =10**(-7), name="HGD", mu=0.99, eps = 10**(-8),type_of_learning_rate ="global"):
super(AdamHDOptimizer, self).__init__(beta, name=name)

self._mu = mu
self._alpha_0 = alpha_0
self._beta = beta
self._eps = eps
self._type = type_of_learning_rate


def minimize(self, loss, global_step):

# Algo params as constant tensors
mu = tf.convert_to_tensor(self._mu, dtype=tf.float32)
alpha_0=tf.convert_to_tensor(self._alpha_0, dtype=tf.float32)
beta=tf.convert_to_tensor(self._beta, dtype=tf.float32)
eps = tf.convert_to_tensor(self._eps, dtype=tf.float32)

var_list = tf.trainable_variables()

# create and retrieve slot variables for:
# direction of previous step
ds = [self._get_or_make_slot(var,
tf.constant(0.0, tf.float32, var.get_shape()), "direction", "direction")
for var in var_list]
# current learning_rate alpha
if self._type == "global":
alpha = self._get_or_make_slot(alpha_0, alpha_0, "learning_rate", "learning_rate")
else:
alphas = [self._get_or_make_slot(var,
tf.constant(self._alpha_0, tf.float32, var.get_shape()), "learning_rates", "learning_rates")
for var in var_list]
# moving average estimation
ms = [self._get_or_make_slot(var,
tf.constant(0.0, tf.float32, var.get_shape()), "m", "m")
for var in var_list]
vs = [self._get_or_make_slot(var,
tf.constant(0.0, tf.float32, var.get_shape()), "v", "v")
for var in var_list]
# power of mu for bias-corrected first and second moment estimate
mu_power = tf.get_variable("mu_power", shape=(), dtype=tf.float32, trainable=False, initializer=tf.constant_initializer(1.0))

# update moving averages of first and second moment:
grads = tf.gradients(loss, var_list)
grads_squared = [tf.square(g) for g in grads]
m_updates = [m.assign(mu*m + (1.0-mu)*g) for m, g in zip(ms, grads)] #new means
v_updates = [v.assign(mu*v + (1.0-mu)*g2) for v, g2 in zip(vs, grads_squared)]
mu_power_update = [tf.assign(mu_power,tf.multiply(mu_power,mu))]
# bais correction of the estimates
with tf.control_dependencies(v_updates+m_updates+mu_power_update):
ms_hat = [tf.divide(m,tf.constant(1.0) - mu_power) for m in ms]
vs_hat = [tf.divide(v,tf.constant(1.0) - mu_power) for v in vs]

#update of learning rate alpha, main difference between ADAM and ADAM-HD
if self._type == "global":
hypergrad = sum([tf.reduce_sum(tf.multiply(d,g)) for d,g in zip(ds, grads)])
alphas_update = [alpha.assign(alpha-beta*hypergrad)]
else:
hypergrads = [tf.multiply(d,g) for d,g in zip(ds, grads)]
alphas_update = [alpha.assign(alpha-beta*hypergrad) for alpha,hypergrad in zip(alphas,hypergrads)]

# update step directions
with tf.control_dependencies(alphas_update): #we want to be sure that alphas calculated using previous step directions
ds_updates=[d.assign(-tf.divide(m, tf.sqrt(v) + self._eps)) for (m,v,d) in zip(ms_hat,vs_hat,ds)]

# update parameters of the model
with tf.control_dependencies(ds_updates):
if self._type == "global":
dirs = [alpha*d for d in ds]
alpha_norm = alpha
else:
dirs = [alpha*d for d, alpha in zip(ds,alphas)]
alpha_norm = sum([tf.reduce_mean(alpha**2) for alpha in alphas])
variable_updates = [v.assign_add(d) for v, d in zip(var_list, dirs)]
global_step.assign_add(1)
# add summaries (track alphas changes)
with tf.name_scope("summaries"):
with tf.name_scope("per_iteration"):
alpha_norm_sum=tf.summary.scalar("alpha", alpha_norm, collections=[tf.GraphKeys.SUMMARIES, "per_iteration"])
return tf.group(*variable_updates)
61 changes: 61 additions & 0 deletions MAKEFEATURES_TRAIN_ALL.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#WHen doing the chunking backtest approach, need to train/retrain model after
#each new chunk of training data comes in.

#For this setup, just retrain from scrach (not starting at last checkpoint of
#previous training chunk; completely starting over again)


# ==============================================================================
# PARAMETERS
# ==============================================================================
#For each of the N training sets: train model
#true false whether to remake feature sets vs. just skip directly to training
MAKE_FEATURESETS=false
#Make some cached features for all the training/test sets
makefeats_names="TRAINset1 TRAINset2 TRAINset3 TRAINset4 TESTset1 TESTset2 TESTset3 TESTset4"
train_names="TRAINset1 TRAINset2 TRAINset3 TRAINset4"
#In training, max number of epochs to do. By 25-50 things have usually plateaud
MAX_EPOCH=50




if $MAKE_FEATURESETS; then

echo 'Cleaning up, then remaking feature sets'
#Clean up between feature sets
cd data
rm -R TRAIN*
rm -R TEST*
rm -R cpt/
rm -R cpt_tmp/
rm -R logs/
rm *.pkl
cd ..
ll data/


# =============================================================================
# make_features.py
# =============================================================================
for v in $makefeats_names; do
#Create the features for our data
echo 'running make_features.py'
echo $v
python3 make_features.py data/$v ours daily full --add_days=0
done
fi


# =============================================================================
# trainer.py
# =============================================================================
for v in $train_names; do
echo 'running trainer.py'
echo $v
#By default, is already doing forward split, so also do side split
python3 trainer.py full daily --name=$v --hparam_set='encdec' --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=$MAX_EPOCH --patience=5 --verbose --save_epochs_performance
# --side_split #using the side_split option gives unrealistic values for SMAPE:
#says training, side split, and forward step SMAPEs are all only 3-8 %, so clearly unrealistic.
#Not sure if Kaggle guy calculated things differently when doing side_eval option??? Just leave off for now, only do forward eval.
done
227 changes: 227 additions & 0 deletions PERFORMANCE_HEATMAPS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
#Analyze the different performance metrics
#Make the performance heatmaps

#There will be 4 different TRAIN-TEST sets,
#each has a model trained on that train set and tested on that test set.
#So asssume to simulate production environment where we would retrain model
#every so often, we have e.g. 4 tests of the model, each with say 3 months more
#data appended to it. So, assume we will just do 4 separate analyses.

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import os
import numpy as np
import pickle
from collections import defaultdict


# =============================================================================
# PARAMETERS
# =============================================================================
OUTDIR = 'output'
NAMES = ['TESTset1', 'TESTset2', 'TESTset3', 'TESTset4']



# =============================================================================
# MAIN
# =============================================================================

def load_dict(path):
with open(path,'rb') as gg:
d = pickle.load(gg)
# print(d)
return d


def aggregate__overall(data_dict, real_only, id_subsets, bad_ids):
"""
For each (history,horizon) pair, marginalized over all id's and dates

format is (history,horizon,backoffset,id) : {'SMAPE':smape, 'bias':bi, #'MAE':mae, 'predict_start_date':dates[0], 'predict_end_date':dates[-1]}
"""
# print(data_dict.items())
agg_dict = defaultdict(lambda:[])
for k,v in data_dict.items():
series_id = k[3]
#Only use the real series, ignore the synthetic ones
#(synthetic series have name like {id}__... )
if real_only:
if '__' in series_id:
continue
#If have a set of holdout id's:
if id_subsets:
if series_id not in id_subsets:
continue
#Regardless of mode, if this is one of the corrupted time series, ignore it:
if series_id in bad_ids:
continue

history = k[0]
horizon = k[1]
smape = v['SMAPE']
agg_dict[(history,horizon)] += [smape]



#Now get mean SMAPE
metrics_dict = {}
for k,v in agg_dict.items():
mean = np.nanmean(v)
median = np.nanmedian(v)
sd = np.nanstd(v)
pctl_5 = np.percentile([i for i in v if np.isfinite(i)],5)#nanpercentile
pctl_95 = np.percentile([i for i in v if np.isfinite(i)],95)
metrics_dict[k] = {'mean':mean, 'median':median, 'sd':sd, '5pctl':pctl_5, '95pctl':pctl_95}

histories = list(np.unique([i[0] for i in metrics_dict.keys()]))
horizons = list(np.unique([i[1] for i in metrics_dict.keys()]))
# print(metrics_dict)
# print(histories)
# print(horizons)

metrics_arrays = {}
for metric in ['mean','median', 'sd','5pctl','95pctl']:
_array = np.nan * np.ones((len(histories),len(horizons)))
for k,v in metrics_dict.items():
i = histories.index(k[0])
j = horizons.index(k[1])
_array[i,j] = v[metric]
metrics_arrays[metric] = _array
print(metrics_arrays)
return metrics_dict, histories, horizons, metrics_arrays




def make_heatmap(metrics_arrays, histories, horizons, outdir, name):
"""
Visualize the SMAPE values
"""
#For scale, get highest value for heatmap.
#Use 200 (worst possible SMAPE), vs.
#to improve dynamic range use the highest measured SMAPE value from the heatmaps

# print('metrics_arrays')
# print(metrics_arrays)
for k,v in metrics_arrays.items():

savename = k+'_'+name

vmax = np.nanmin([200.,np.nanmax(np.ceil(v))])

plt.figure()
plt.imshow(v,vmin=0.,vmax=vmax)
plt.title(savename,fontsize=15)
plt.colorbar()
plt.xlabel('Horizon',fontsize=15)
plt.ylabel('History',fontsize=15)
plt.xticks(np.arange(len(horizons)),horizons,fontsize=15)
plt.yticks(np.arange(len(histories)),histories,fontsize=15)

for x, hor in enumerate(np.arange(len(horizons))):
for y, hist in enumerate(np.arange(len(histories))):
s = np.round(v[y,x],1)
plt.text(x-.25, y, s)
# plt.grid()
savepath = os.path.join(outdir,f'{savename}.png')
plt.savefig(savepath)




if __name__=='__main__':
# parser = argparse.ArgumentParser()
# parser.add_argument('--logdir', default='data/logs', help="Directory where numpy arrays of performance are")
# parser.add_argument('--K_last', default=3, dest='K_last', help='Save out per EPOCH metrics (NOT per step, only per EPOCH')
# args = parser.parse_args()
# param_dict = dict(vars(args))
#
# make_heatmaps(**param_dict)


#for each of the 4 dicts:


#HOLLYWOOD
#Make list of id's that were held out from training, to assess transfer ability
HOLD_OUTS = [str(i) for i in range(500)] #Not actually held out, but just get an idea of performance on earlier ids
special_ids = [str(i) for i in [531, 1007, 143, 130, 197, 203, 209, 215, 342, 476, 328, 182, 200, 145, 242, 44, 94, 147, 1, 5, 6, 7, 8, 12, 387, 429, 1005, 943]]
id_dict = {'allIDs':[],
'special_ids':special_ids,
'holdout_ids':HOLD_OUTS}

#Some of the ID's are just bad, have multiple month long gaps from corrupted data, etc., so can ignore them
#For now just use everything to get conservative estimate of performance
BAD_IDs = []#['44','46','581','582','583','584']




# =============================================================================
# Aggregated over all 4 test sets
# =============================================================================
all_data = {}
for chunkname in NAMES:
print('chunkname: ',chunkname)
path = os.path.join(OUTDIR,f'hist_horiz__{chunkname}.pickle')
data = load_dict(path)
new_data = {k+(chunkname,): v for k,v in data.items()}
all_data.update(new_data)

for real_only in [True,False]:
for k, id_subsets in id_dict.items():

r = 'real' if real_only else 'realAndsynthetic'
name = '4Ave' + '_' + r + '_' + k
print(name)


metrics_dict, histories, horizons, metrics_arrays = aggregate__overall(all_data, real_only, id_subsets, BAD_IDs)
make_heatmap(metrics_arrays, histories, horizons, OUTDIR, name)

#Save out the metrics dict
dict_savename = os.path.join(OUTDIR,f"hist_horiz__{name}__allchunks__metrics.pickle")
with open(dict_savename, "wb") as outp:
pickle.dump(metrics_dict, outp)





# =============================================================================
# Individual test sets
# =============================================================================
#For the 4 chunk backtesting performance assessment
for chunkname in NAMES:
print('chunkname: ',chunkname)
path = os.path.join(OUTDIR,f'hist_horiz__{chunkname}.pickle')
data = load_dict(path)

for real_only in [True,False]:
for k, id_subsets in id_dict.items():

r = 'real' if real_only else 'realAndsynthetic'
name = chunkname + '_' + r + '_' + k
print(name)


metrics_dict, histories, horizons, metrics_arrays = aggregate__overall(data, real_only, id_subsets, BAD_IDs)
make_heatmap(metrics_arrays, histories, horizons, OUTDIR, name)

#Save out the metrics dict
dict_savename = os.path.join(OUTDIR,f"hist_horiz__{name}__metrics.pickle")
with open(dict_savename, "wb") as outp:
pickle.dump(metrics_dict, outp)










Loading