From 5cd1644a8a257122d725de63e3f15419d37efa9f Mon Sep 17 00:00:00 2001
From: Greg Kocher <kocher@uber.com>
Date: Mon, 25 Jun 2018 17:51:18 -0700
Subject: [PATCH 01/42] gk fork, adding preprocessing for own data, predict
 script

---
 .gitignore               |   3 +
 PREDICT.py               | 176 +++++++++++++++++++++++++
 PREPROCESS.py            | 271 +++++++++++++++++++++++++++++++++++++++
 QUICKLOOK.py             |  34 +++++
 Readme.md                |  24 ++++
 ex_figs/quickcheck_0.png | Bin 0 -> 32041 bytes
 ex_figs/quickcheck_1.png | Bin 0 -> 37901 bytes
 ex_figs/quickcheck_2.png | Bin 0 -> 38176 bytes
 ex_figs/quickcheck_3.png | Bin 0 -> 41804 bytes
 ex_figs/quickcheck_4.png | Bin 0 -> 33743 bytes
 hparams.py               | 103 +++++++++++++++
 percent_dense.png        | Bin 0 -> 8162 bytes
 12 files changed, 611 insertions(+)
 create mode 100644 PREDICT.py
 create mode 100644 PREPROCESS.py
 create mode 100644 QUICKLOOK.py
 create mode 100644 ex_figs/quickcheck_0.png
 create mode 100644 ex_figs/quickcheck_1.png
 create mode 100644 ex_figs/quickcheck_2.png
 create mode 100644 ex_figs/quickcheck_3.png
 create mode 100644 ex_figs/quickcheck_4.png
 mode change 100644 => 100755 hparams.py
 create mode 100644 percent_dense.png

diff --git a/.gitignore b/.gitignore
index 5c09f44..4959db7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,6 @@ data/*.zip
 data/submission.csv.gz
 !data/2017-08-15_2017-09-11.csv.zip
 
+data/*
+*/.DS_STORE
+.DS_STORE
diff --git a/PREDICT.py b/PREDICT.py
new file mode 100644
index 0000000..0617d65
--- /dev/null
+++ b/PREDICT.py
@@ -0,0 +1,176 @@
+"""
+Created on Mon Jun 18 14:03:35 2018
+
+@author: gk
+"""
+
+
+
+#After training, do the predictions [but here  as a script instead of .ipynb]
+
+
+import tensorflow as tf
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+import os
+import pandas as pd
+import numpy as np
+from trainer import predict
+from hparams import build_hparams
+import hparams
+
+
+
+
+
+# =============================================================================
+# Performance Metrics
+# =============================================================================
+def smape(true, pred):
+    summ = np.abs(true) + np.abs(pred)
+    smape = np.where(summ == 0, 0, np.abs(true - pred) / summ)
+    #return np.mean(kaggle_smape) * 200
+    return smape * 200
+
+def mean_smape(true, pred):
+    raw_smape = smape(true, pred)
+    masked_smape = np.ma.array(raw_smape, mask=np.isnan(raw_smape))
+    return masked_smape.mean()
+
+
+
+# =============================================================================
+# 
+# =============================================================================
+#read_all funcion loads the (hardcoded) file "data/all.pkl", or otherwise train2.csv
+print('loading data...')
+from make_features import read_all
+df_all = read_all()
+print('df_all.columns')
+print(df_all.columns)
+
+
+# =============================================================================
+# 
+# =============================================================================
+prev = df_all#.loc[:,:'2017-07-08']
+paths = [p for p in tf.train.get_checkpoint_state('data/cpt/s32').all_model_checkpoint_paths]
+
+#tf.reset_default_graph()
+#preds = predict(paths, default_hparams(), back_offset=0,
+#                    n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True)
+t_preds = []
+for tm in range(3):
+    tf.reset_default_graph()
+    t_preds.append(predict(paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63,
+                    n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True))
+
+
+# =============================================================================
+# average the 3 models predictions
+# =============================================================================
+preds = sum(t_preds)/3.
+
+
+# =============================================================================
+# look at missing
+# =============================================================================
+missing_pages = prev.index.difference(preds.index)
+# Use zeros for missing pages
+rmdf = pd.DataFrame(index=missing_pages,
+                    data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns)
+f_preds = preds.append(rmdf).sort_index()
+
+# Use zero for negative predictions
+f_preds[f_preds < 0.5] = 0
+# Rouns predictions to nearest int
+f_preds = np.round(f_preds).astype(np.int64)
+
+
+
+# =============================================================================
+# save out all predictions all days (for our stuff will be relevant, for his Kaggle maybe just needed one day)
+# =============================================================================
+firstK = 1000 #for size issues, for now while dev, just a few to look at
+ggg = f_preds.iloc[:firstK]
+ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True)
+
+
+
+
+
+
+# =============================================================================
+# visualize to do wuick check
+# =============================================================================
+"""
+pages = ['(236984)_Astier_fr.wikipedia.org_all-access_all-agents', \
+         '龍抬頭_zh.wikipedia.org_mobile-web_all-agents',\
+         "'Tis_the_Season_(Vince_Gill_and_Olivia_Newton-John_album)_en.wikipedia.org_mobile-web_all-agents",\
+         'Peter_Townsend_(RAF_officer)_en.wikipedia.org_mobile-web_all-agents',\
+         "Heahmund_en.wikipedia.org_desktop_all-agents"]
+"""
+
+randomK = 1000
+print('Saving figs of {} time series as checks'.format(randomK))
+pagenames = list(f_preds.index)
+pages = np.random.choice(pagenames, size=randomK, replace=False)
+for jj, page in enumerate(pages):
+    plt.figure()
+    #prev.loc[page].fillna(0).plot(logy=True)
+    f_preds.loc[page].fillna(0).plot(logy=True)
+    #gt.loc[page].fillna(0).plot(logy=True)
+    f_preds.loc[page].plot(logy=True)
+    plt.title(page)
+    pathname = os.path.join('ex_figs', 'fig_{}.png'.format(jj))
+    plt.savefig(pathname)
+    
+    
+    
+    
+    
+    
+    
+    
+    
+# =============================================================================
+# load, maniupalte test data    
+# =============================================================================
+def read_keys():
+    import os.path
+    key_file = 'data/keys2.pkl'
+    if os.path.exists(key_file):
+        return pd.read_pickle(key_file)
+    else:
+        print('Reading keys...')
+        raw_keys = pd.read_csv('data/key_2.csv.zip')
+        print('Processing keys...')
+        pagedate = raw_keys.Page.str.rsplit('_', expand=True, n=1).rename(columns={0:'page',1:'date_str'})
+        keys = raw_keys.drop('Page', axis=1).assign(page=pagedate.page, date=pd.to_datetime(pagedate.date_str))
+        del raw_keys, pagedate
+        print('Pivoting keys...')
+        pkeys = keys.pivot(index='page', columns='date', values='Id')
+        print('Storing keys...')
+        pkeys.to_pickle(key_file)
+        return pkeys
+keys = read_keys()    
+
+# =============================================================================
+# 
+# =============================================================================
+subm_preds = f_preds.loc[:, '2017-09-13':]
+assert np.all(subm_preds.index == keys.index)
+assert np.all(subm_preds.columns == keys.columns)
+answers = pd.DataFrame({'Id':keys.values.flatten(), 'Visits':np.round(subm_preds).astype(np.int64).values.flatten()})
+answers.to_csv('data/submission.csv.gz', compression='gzip', index=False, header=True)
+
+
+
+print('f_preds')
+print(f_preds)
+
+print('missing')
+print(prev.loc[missing_pages, '2016-12-15':])
\ No newline at end of file
diff --git a/PREPROCESS.py b/PREPROCESS.py
new file mode 100644
index 0000000..747d5fc
--- /dev/null
+++ b/PREPROCESS.py
@@ -0,0 +1,271 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jun 21 12:55:54 2018
+
+@author: gk
+"""
+
+#Do some basic preprocessing to get my data in same format as Kaggle code
+
+
+#import matplotlib
+#matplotlib.use('Agg')
+#import matplotlib.pyplot as plt
+
+import os
+import pandas as pd
+#import numpy as np
+
+#from statsmodels.tsa.seasonal import seasonal_decompose
+#stl = seasonal_decompose(x)
+
+
+
+
+
+
+def load_my_data(myDataDir):
+    """
+    Load my data
+    """
+    files = os.listdir(myDataDir)
+    files = [i for i in files if i.endswith('.csv')]
+    files = sorted(files, key=lambda x: int(x.split(".")[0]))
+    #Exclude certain cities
+    #ignore_list = [] #id's of the cities to ignore
+    #files = [i for i in files if i.split(".")[0] not in ignore_list]
+    dflist = []
+    for ii, ff in enumerate(files):
+        df = pd.read_csv(os.path.join(myDataDir,ff))
+        dflist += [df]
+    df = pd.concat(dflist,sort=False)
+    df = df[['id','date','y']]
+    df['id'] = df['id'].astype(int)
+    return df
+
+
+def remove_cities(df,remove_id_list):
+    """
+    Remove blacklisted id's [since some downloaded id's no longer relevant,
+    or suspected to not be useful, or be corrupted]
+    
+    Or just ignore these files when loading data and don't need this
+    """
+    return df.loc[~df['id'].isin(remove_id_list)]
+    
+
+def get_earliest_latest_dates(df):
+    """
+    Get first and last dates seen across any time series
+    """
+    earliest = min(df['date'])
+    latest = max(df['date'])
+    print('earliest date',earliest)
+    print('latest date',latest)
+    return earliest, latest
+    
+
+
+
+#def __keep_btwn_dates(df,start_date,end_date):
+#    """
+#    Excerpt only the data between [inclusive] start and end date.
+#    Both dates are formatted as 'YYYY-mm-DD'
+#    """
+#    len1 = len(df)
+#    df = df.loc[(df['date']>=start_date) & (df['date']<=end_date)]
+#    df.reset_index(inplace=True,drop=True)
+#    len2 = len(df)
+#    rows_removed = len1 - len2
+#    print('rows_removed:',rows_removed,'of',len1)
+#    return df   
+
+
+
+def remove_seasonal_blocks(df):
+    """
+    For places in the data where there are missing gaps of length > 1 seasonality,
+    remove 
+    """
+    return
+
+
+
+
+
+
+
+def do_imputation(df,imputation_method):
+    """
+    For places in the data where missing gaps are smalle (<7 days),
+    just fill in those few missing days with a basic 
+    remove 
+    """
+    
+    
+    def imputation_small_gaps(df,imputation_method):
+        """
+        Do missing data imputation using the given forecasting method
+        Only use this for short missing segments; do not use for longer ones.
+        """
+        if imputation_method == 'STL':
+            #stl = seasonal_decompose(x)
+            df_filled = df
+            pass
+        else:
+            raise Exception('That method not implemented yet')
+        return df_filled    
+    
+    
+    def imputation_big_gaps(df):
+        """
+        Do missing data imputation / removal
+        For big gaps [gaps bigger than 1 seasonality]
+        """
+        df_filled = df
+        return df_filled    
+    
+    
+    def imputation__simple(df):
+        """
+        Juat as placeholder for now,
+        fill all missing with zeros,
+        or mean or median imputation
+        """
+        df_filled = df
+        return df_filled     
+    
+    
+    
+    #First deal with small gaps (missing gaps fewer than e.g. 7 days):
+    df = imputation_small_gaps(df,imputation_method)
+    
+    #Deal with longer gaps [e.g. by removing enough blocks of length S, where
+    #S is the seasonality, to completely get rid of gaps]
+    #...
+    #df = imputation_big_gaps(df)
+    
+    #Trim start and end of each series/ to align to get in phase
+    #df = 
+    #...
+    
+    return df
+
+
+
+
+
+
+
+def format_like_Kaggle(df, myDataDir, start_date=None, end_date=None):
+    """
+    Take my data and format it exactly as needed to use for the Kaggle seq2seq
+    model [requires making train_1.csv, train_2.csv, key_1.csv, key_2.csv]
+    [??? or does the seq2seq cTUlly OPEN THE .ZIPS DIRECTLY????????]
+    """
+    
+    
+    def make_train_csv(df, save_path, start_date, end_date):
+        """
+        Make the train_1.csv
+        """
+        #Rename columns to be as in Kaggle data:
+        df.rename(columns={'id':'Page'},inplace=True)
+        
+        #Get earliest and latest date across all series to align times [pad start/end]
+        earliest, latest = get_earliest_latest_dates(df)
+        
+        #Excerpt only the relevant time interval, if manually specified
+        if start_date:
+            earliest = max(earliest,start_date)
+        if end_date:
+            latest = min(latest,end_date)
+        
+        idx = pd.date_range(earliest,latest)
+        OUT_OF_RANGE_FILL_VALUE = -1 #np.NaN #0 #puttign as nan casts to float and cannot convert to int
+
+        #Reorganize data for each id (->"Page")
+        unique_ids = pd.unique(df['Page'])
+        df_list = []
+        for i, u in enumerate(unique_ids):
+            d = df.loc[df['Page']==u]
+            #Nan / zero pad start and end date range if needed {end missing}
+            dates = pd.Series(d['y'].values,index=d['date'])
+            dates.index = pd.DatetimeIndex(dates.index)        
+            dates = dates.reindex(idx, fill_value=OUT_OF_RANGE_FILL_VALUE)
+            dates.index = pd.to_datetime(dates.index).strftime('%Y-%m-%d')
+            dd = pd.DataFrame(dates).T 
+            dd['Page'] = u
+            df_list.append(dd)
+        
+        df = pd.concat(df_list,axis=0)
+        cols = df.columns.tolist()
+        df = df[cols[-1:]+cols[:-1]]
+        df.reset_index(drop=True,inplace=True)
+        df.to_csv(save_path,index=False)
+        return df
+    
+    
+    def make_key_csv(df):
+        """
+        Make the key_1.csv, key_2.csv
+        May actually not need this???
+        """
+        #save out
+        return    
+    
+    
+    #Make the train csv [for now just do 1, ignore the train 2 part ???]
+    save_path = os.path.join(os.path.split(myDataDir)[0],'train_1_my_data.csv')
+    df = make_train_csv(df, save_path, start_date, end_date)
+
+    #For the prediction phase, need the key ????
+#    make_key_csv(df)
+    
+    return
+
+
+
+
+
+
+
+
+
+
+
+if __name__ == '__main__':
+    
+    # =============================================================================
+    #     PARAMETERS
+    # =============================================================================
+    # TOTAL COMPLETED TRIPS:
+    myDataDir = r"/Users/......../Desktop/exData/totalCompletedTripsDaily"
+    imputation_method = 'STL'
+    START_DATE = '2015-01-01' #None
+    END_DATE = '2017-12-31' #None
+    REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful
+
+
+
+    # =============================================================================
+    #     MAIN
+    # =============================================================================
+    print('START_DATE',START_DATE)
+    print('END_DATE',END_DATE)
+    print('REMOVE_ID_LIST',REMOVE_ID_LIST)
+    print('imputation_method',imputation_method)
+    print('myDataDir',myDataDir)
+    
+    #Load
+    df = load_my_data(myDataDir)
+    
+    #Remove any bad/irrelevant cities
+    df = remove_cities(df,REMOVE_ID_LIST)
+    
+    #Put into same format as used by Kaggle, save out csv's    
+    format_like_Kaggle(df, myDataDir, start_date=START_DATE, end_date=END_DATE)
+    
+    
+    #Imputation, dealing with missing seasonality blocks / out of phase
+    df = do_imputation(df,imputation_method)
\ No newline at end of file
diff --git a/QUICKLOOK.py b/QUICKLOOK.py
new file mode 100644
index 0000000..8f22dbe
--- /dev/null
+++ b/QUICKLOOK.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Jun 18 14:03:35 2018
+
+@author: gk
+"""
+
+#For the KAGGLE data, looks like most series (~2/3) are dense [no sparsity]
+#important because in Arturius's script there is threshold on #0's allowed, default he seems to use is not allow any 0's
+#so then he is using ~2/3 of the time series ???
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+
+
+
+filepath = r"/......./kaggle-web-traffic-master/data/train_1.csv"
+
+df = pd.read_csv(filepath)
+
+rows = df.values
+
+x = [(i>0.).sum() -1 for i in rows]
+ndays = max(x)
+x = [float(i) / float (ndays) for i in x]
+
+x.sort()
+
+#Sorted plot of percent dense [so about 2/3 of the 145K Kaggle are dense]
+plt.figure()
+plt.plot(x)
+plt.show()
\ No newline at end of file
diff --git a/Readme.md b/Readme.md
index f7ae3e9..d651938 100644
--- a/Readme.md
+++ b/Readme.md
@@ -35,3 +35,27 @@ load and evaluate 30 different model weights. At the end,
 you'll get `submission.csv.gz` file in `data` directory.
 
 See also [detailed model description](how_it_works.md)
+
+
+
+
+-----------------------------------
+
+GK modifications for own data:
+1. PREPROCESS.py - Maximize reuse of existing architecture: just put my data in exact same format as Kaggle competition csv's
+2. $source activate gktf
+3. $cd ..../kaggle-web-traffic
+4. $python3 PREPROCESS.py
+5. $python3 make_features.py data/vars --add_days=63
+6. $python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
+7. $python3 PREDICT.py
+
+- confirmed it runs with 2 layers stacked, or with attention mechanism. Performance is worse in both cases, at least initially.
+
+
+To do:
+1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks]
+2. modify make_features / InputPipeline / VarFeeder  etc. to NOT do the lagged autocorrelations [if ts too short], to NOT use lagged_x, to NOT use wikipedia specific features.
+Use only features relevant to this data.  Still use the (tiled) median series value (before standard scaling), or few other quantiles, too. Keep day of week, add onehot encoded continent or use country like he has it.
+3. Prediction intervals
+4. Architecture improvements
\ No newline at end of file
diff --git a/ex_figs/quickcheck_0.png b/ex_figs/quickcheck_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..794e34bc6bb121fd4008f0b43b9a084fde2a6ca8
GIT binary patch
literal 32041
zcmdqIby!ww_bxhVq)}RFMU+MqK}t##Q0Z;~0cns96$D8Ur4$qdL6Gi{6cFi>?gnY;
z+GDcT_uF;tbM15fJbvq2%lDn{oKKB$k9*u>%vTTZ-ytTTBR~*@_^zC^GJ;_GA_&Gf
zJ}!K7y>oaP{)6o(c~=!5{&?a)3WU#R?Bui@5rotT{Xa&Uc&Y__DdZ%h>7-(7=Hz1J
zV2W59IoUn2b$Vi9%=FaM!O_CjhKZjW{>RDm*vZLGn1|=T|A5=p!JOyi#~wliVM6Xo
z-&S?~@MpwLS9RtXZ>>Ft_){M1mFHGprqxSUzi5>t4{Tq{)-uyqDZdhHs&z$sI8UbB
zNWYaoI!it(&f=x~3RPOkv<<suP|z>7rIyVDtXaFZCq#ApcC(w_-(8zN2syT0o8tA{
z6>ILqXT}bMKR+!>ZkAz4!ynZ`8srsx^MMG9iJY7~BGL%C$il+nz)$3hUcUbl<2CyG
zCSK$$To$6mm4K@(m+KHYxH|pl|8F;6mrzh3f(3r+Te9C;R8#mUMz2@qbo=2$GEYxW
zGFsYK{)E($sohr?qTjytdxnL_A|i4@Utb^0L?W77FUNDwA%E@)J9~BNNAIBDnL2gN
z&4k|G-t&Wn9NWu7sr{=hvDaKDUtJzwT_v@(wRKwTIdlHT6JDVNGR$BqF3CraE~3NG
z&;;s#cb}O1=u@=s`QXD%j7)XHP}vthen{7QtyXN3FnF%rp5JoU(H9PWNqim`F1<Q;
zdC)6F@%;^>h7ek_RjZyf@&un_xAJg3hVIk{cU)a>4i68H&&_F=&T(;a`pJH-`7@B8
zVr}5HMJYf>{N~M@Z(>jP|1Q`$I%fP~i!;p}iRLkQD)&B}&@sCemXX-`ILEx50k25G
ztL2EuX?2wTXn)J`Ver+_1W^xOlLn$kj~?ADyMm2dZMnKyb<%K$(aSF;hNh&XWPH5l
zg=U%4mHqAI;1v02<K@A^`Q8j_eeXTfioad*L5-uOj(A2!M!VxLNOLT@**NveC_?G3
z*Zz!PrDU8(K6Njw?M{WWva#XT1&}t*w!Y6bt|t)Lpa1-`tu1Q1+-?H1%xU2{tkI<A
zIgVz&8S(QMFZi71AAZv-SGRB38TBAFX$+HT45QC|x~$Eqm4~gJN%aDkxRzA>&~STi
z(MdRjL6q+H?b~LnANfrQ-b6;OFBgvuZaQEhSq4>Mq2~oL5%q7H&xVI}%fs?vek~t_
zy8n2IAMdh`y3Ns-4?WJ_TI!oR+8JwfY!?lPzvW`Q^gC1Ssg0nZ;GeN7MxKf_$yml)
zHp4gnjFcCj)i(XeEcMdx<oGB}r#S2Ld&y)vkL^D-U=*}S2I1}A!Je-wGLIjpbu2hZ
z;xpTflwI1`+*I6t&ujD?VNzCBz9(vydOu!TQ!~bR$o!)ho35_zkK?0*OtI~cCx@wP
zfB#azxkVLGQc(p(ap?r*<nVrtm&7Nhqq|z>vYO@UdH;Uc+SV3jxafAwz*KwewL5+<
zUk15t&YnS7n3-!HeFQTKf;}euu5YmDSm;E87>`#!!}Qh6HTKtQ<g1u{r<mOrT<)?u
zSg;EAgM&oG#c3~A7BS?DP;qHrL;~TAH1<pXs6Bkhgam5kn+cU&F+(Q<+ZXU*XxQo;
zBO?zII8yF%D_PNUYkSC+P(wqbz6cijs?zH~I8BC<=f(|ngbZBC!AV69zlOl@dep~{
z;q}4PQv>E7{e$H@ecn}7i4TvAC~hzJ=dvRGJ*Jct6py-kd2L5U{{9&_2S-q}-~ZiX
zC&_xK2%nhvT~TRS8OznH1n|sSMxR4wYZn)nfb8t-$@PSUgs$k^eDn6n5tp(0!@bRk
zsVS3OV%9rhKVq+$=a@8-WLOtYFJ)KB%E{qZ9&PKmy17lj#~TB4D<kDGU@vrnHbf5|
zJix}r{^WJAy*qYtjEA@$@6C}@Q{y2Wj|IB=04|W2Rm`@ZKVOF}OiVoM=y<3QCvaEE
z==ufQ$seyYa||)Gi=LdxRJT&8&sX{^8zbf=j4)lih)owNyO?CtlNO=s<~9BdZ+v0F
zBG+9A7Fhzz;o?#p+@1(vl>FQ|-m78o4n6VOzCLQ(iuIRHLnY-MA2>9!KIxS|Em^HR
zusc3<LuSjX=mZ}7CQ1cN9;{Z~0V7T9wd^dgP+4$}mVfY|(JK2p+xH@3IspkMCxMmG
z?^bzZF?=RuWMsaBg;pljz8E)MRz5VXw>V?i4&>p00nxzY5FQ?$VUN}CG~(VBlg@dw
zll8$ge8$+|g79#N7SGzn4u~Z2Ej~KVwcUoPMm`X`4e&~L-4Z)fJs8XY3kjpY_0s!j
zzh-FYd`(S_>&!ci9HY9I{4PyjzI>USi8hdJj^a`(e`@XL=l98WtddgSU(|7y2*ox5
ztKUSUqoZ)d^(Nt>FU3y|yC01W72BF@OgBC5N<LGd%_S)*$!j+*0r5|E@UoyF^)<5=
z34qy8FgbwO00wt_Jdz8qx5*{PgLx_?hKj5w78cI<`1puy^{B|*zpwsb70`=@*AQ9!
zok<Ai4Bleg@#*$1>|lzQdZxBN%pTqLLd#ys?k|dp?%MY@!IUQ8WMsd)Z;j8)_=6ix
zW)_d~yV*wd`5bRD{+;i{f`z83Wl#+kSiA@qKc?~b_YXSy>n>{68t1>ZX6Neeo+K4O
z(qCl#eqhB+$2*Hid?%7kv9@^38|JQE4Lb!OS_f{La`ffF$1scTFBnK|ZEbS9Lfyer
zI9L>4F)=aG(IGlaHeLQ$P#*B|-dxOW!AFr}J}2}R78dh>m|qWU#wzJ;Y-}EnR+Kh*
zxqoMXe~N9#8se^7<(M{8d9K$}cO9W_14hj2x@K6h^;<`?!j-S9tIKZ8YsYA@=PS*1
z%eMu#-riy`ph6p*ItFqloZ+41bVhb%&=r+0hU=5{G&i2mI5;};nYUd?PEDOMIN6V$
zT$jWTp}mHSA~AY+m*k@~J+mSrBGUBAZ+IW?nT!1GkV>h%;c&3MT-OwNd3}E=o0XTB
z?19KSHVVhn(~ksfhBXIygoWwAK<WUD)~m5dnQq-W-x$sq?LAPn*>(#eM8nRQPx#Tn
zj(m8AMs~u$=ID11YB-qMnda#A-#WI55HNQ)+r`&c%2y}*b4|KRuD)<y`faqcGUB>3
z;$q(}y3^l7xi=Lq&ce-25=_n0z*BXIX<%R=@VG;w?4%=6>ZaSqMf4CFLeGB&gD?y4
z&NdK-(EJ2;_JQaYg>Mq6IXKCW#zy&rqxD9{pFe-HchjVNz7uW?KJ|FGgs;eY=w)@a
zM8?5oHa3<kSI(d|3UC8<^AgZF>C-0!4j&VFWNMlSJ6rN}nQqkkz`DS)mjHrV9XMaW
zRAaaaYy!Yd$I=#!h%4{evuD>vJyx0d`OgAYH&j2vvbH=Zb)4(S)Dc93E`$IX2M2z!
zz3Fo(V%`b6kSqL>@A`F0U^>tAvm3#@!@+WlA;t%Wg~_5nJvnr&KmO>oO{-n-_}K&T
zBMRrmp4*$Vt@e8xrhgZE2`eirQ78k?N8_^$d$F3fHl41X9-rf^0=O>IwQJ|B`m$u&
z6GYdCY^%_fn2LQ7(#X=iZD+?Twm;8`jf2C?#YHS1#d#anWk3Cs9QAJrr4E!jaT`>I
z2#$H4qrdf3+RTj2`(Rm6xBMw_>wCdFw{K%GxJ{Fzu{QGZ1KE#aUauiuVBwLBccsX8
z&9#AJ2f}HabS2;2o&OyEIx6ZV5EG}R-^8$Nba4J_*VfnSNT*=K&=8<hY@>%h8-V1F
zx;hnN2(~~Y=6N&0Yg-Fq|2c>mB|ti!t_*8^=^Gy|b)*q^Ovd&gfe6^#2VrM|ljEJL
zh?tl_1D~TpNquwVrq8j6YzWOWFp#>D5&FA#@1jh{ZnlM%Q@7;d;A*Sj*bl&`wT)&T
zlb^4d<?i0CA>^s>ef}JC*yrS^%e&}Um-d@M6$1cOHCVY)hC0{b!PrRy*q%K+MBqt3
z*;GSl-SSW|pU<)9ll~kGKo`Upj*xM#{bS>&+b=p&A26cCM@&qt#9@ZOdvAuCf`TH3
zi{>GYogL1bZ@aL19K?Qk@Os99^+28skhVZ@x%b~sP)-5a2|g()Df;kwaHLgg#4kKN
z95Mnj@F*&sq9^1J#P*UACMs@S_T^hZye8)6F2kNuHqLi`4nUa1cE_>mw{Zxm@H5pp
z;>DSmB)}Km7p#k)?1iuG>_jIl!NHp35^4q}WkuYN7uNF9YoQf&FF=^?Cx~3}BQpkT
z!1Q&Ui+7<o0GPc;L@(U4nETBcL-0xe!w|ab_sZzcpXUG`q>#`K&JOGXeERMXf)8G5
z4`7%la-G?aZ1ww20>Wf7Tw>J40F1+a|F6aSFqLe5QNE{tG?}H9ioZukN4rji3SuK)
z4}4}@VkI+r@98zomJ!bARaYHu1gEK_hPm!9q$-vPv^x9vzkEp$D$5G!|D}HvwijKq
zgM)(~nPf}cb$JNTK>*-awslG!QV=HZqb&`Xg80$jNQ7x}a+2bp6*#1EIhV8A!j`)d
zP^Gqy;xO-zKF0&UGOql1dqoBKIr*zsuY#6&zW;p#emx0x27ItBL6idt1mF(_^Qf-`
zd#B>&=AH!37_<!ZgUcoQiIGtvJ3BiWVBA@NpSK0ylgfPw2?=Gqy(<B1a$S$M`cjsG
z6J#MwMYdx$>$#|%B21Ngv*&{k{)~RtLCC<XRi0GYO+4?gEJQ=sSxJ2R@oEeVi6_G)
zos+r8N2@-B2nz>CbA!2taNU6!giTCe6h%`GuI3SHrgp!A*{yH&84v;ojz~!0_9L4F
z>#rRd(F>vBlgzD}aE|=g{QDzEokuccIYE7kzus|R`o>%{4=ZR@rp}p&0SM>Wbi8zs
zOISj80G15d2j~4jBq&`NkVin+WhJD)Uo-yz4M%cva@FnPCn24m@0b9c+J#&p<$(B_
zrK6)`F0gepkk5}*iK}PphvG4MlJW8Jbyc3bd|%q#T{yvGMd%QwDW|Ade`~dZwUy$A
zlNwevnBxl#120PW)ep#g(5L)e9b=SwNmK)voP5NsUl9l)*Qo7-5dN7nxeJ9~b&9#O
zbW4r9D{-~7v^2lFUoU&Q90ZXXLEN_%{l6%_H*R`!DJZrc(CY^r-umo&+erxKQupuQ
zk8Lx30`B{`H=Pn_RzPj7<ib+K9foI+pV?L&ed#|*3}AR(RpnFKX7LT|+Kl7JEE*ZR
zu-}M!Y%}rjkg9x9cve+aRp5oj{ok4AD=I4PiH-sf<9pn}RB^b`tO+)#%Ezq;r_`CQ
z#!+7cX#<tZ%5a1kmF;i|IdEhdxRo+{`E$4!0n%k<MZ>@LC%--JLOa#huU~0yJ>^7s
z9-L%JdHH8pH{uIsr|tCzvf0iuXDex0*{elBy*9}tV~EUY_#Xw{m9d{|i+?;+#05cB
ztbTi<E?}s~+r-}9zRM1&+Z-r$H2>OOA44X&Ue;WGfslZJ*JJw$ND0+-by&bf*4EY}
z931i{McxbA%1TT7^^S%C%%ib2h~bRMc|jY$;9x=k)WpXR9zSN+F1F$NP8}y`O9Jj*
zalF5@yXtddwJmi1Wv|HIbR--`VUkq|XIuQO0GJ0W(CVzLEISvMJ7bkz7;ld&`Mz{7
zP_)Y*$2zZ#-zH)Zp?UV~S(lq^I728L#RpLjDkR6}#2espd?EFt`h{YTot2<b%<Aor
z94{bWx#~{2*q0st=@XM~g==ol+!j!fY5;kYhL9SFK8j^aUmbW6brT^WAs90tBO_zr
zfRs^;)7j+7U_sL)PcN{&0^2bLEL@`MXanz8xh9RqKnhFjr*ILu5j!sJ0*Shp#3_vy
z0(2r9KUqsW_xK?xCHXWTaQNP~vL;KfyaNdQFeIM4z&LrWesdzP8*oP}z0RLwQ>X^|
zz6OlrGqC2sprBd^OyW-S9T;G4k!{zEYOzrYf%2)9;nEkkTvp^v6!P-&UMD2jY<v(A
z6>Yxo3njSUzHy2i{q62@>+0@a|J(gw3f2|0(h4qj0dfsoaF75ouYD#aCIk@gotRwn
zRiF_N-TlkU%O^7c&&edm!RCXGxSTIV-_Z&i0D5%lbocK!z|EbNULFwn6CZpO>o0TW
z$ji@1q2_(L4={sQXyO5MImqU77rI8OV%oAP6%`fx`3~mDzTY|6VH1Ul^71cIQ&XF|
zZqBwDDWrBM8dQ1r=NMs)dTuH{87d<7Ib07A@cvq`W2~(mH_%rFnMTl@v7yaaL4iQN
z57WT@oycKSt3a6+@Dt7Qr(7f#Zu<K9VT&BD)tMfb4gw2rva+SQwYQj&Z4BB9N@j1^
z3<sh-`plU#B`3!Rlyjx_Q!no#d{3tuh#^JL_Bu)a@<lSckCmPMXR;yyd6!4c*cfB^
z(`CsF^~^Q!e-M~36B83-gdD$67#{pxc){qsi{a+xMkC~qOu?d&t#7?)6&Vrnj6u|$
ziI0z*(Q^$07{@D^*Eg+vT#1F%^CkfC$|9*?GMEUc7M=O#4^Y|y;9Uoj%<I>$eIc#C
z(q`iNabQC%Z)@&X^{^cYWSQNLPev<*Ca0#*3<!*lRxwWCcD`9Fh19#WNqZv0>r@mJ
zeusyikIc>Ahx7@1?(*($Ed`)?M4y4r_1Gsyil~-iSc8e4B0dR;EaW{C%gfeJ4U|MA
z^z;YLU$>+{`3_{&fcW&Sao44uJmD0e3l(@e|97GLXH(Pd01|qIumL#vKf|Tx05Va|
zAI2yi2K+iM({*d18xPoIiQ`<D@j<3e@i%qZRxL<65JRAFUAuf!2aLe?5Q$9fLS7)3
zC?^A;Qoe4tHePenc~LdCjgaEVKs(OPxWpifletOVROm~hWUlj)IuH-mt5@$0ih-k`
zL*wA!m=gG+X6(NG2SSHyfkk)2mqy5%TPJVS@q7Tk-rd_X>P(U`3pb*%qJRwp9Rot~
z9dgXp_`3cG);o5GyY`$0l^0lLLtYmR?d#(c5&h`R&`>NopMJ0C@B|MRm*x8P-bdwJ
z%YsE94gOu~YkwpeD%$Lb3V-k?$oeI3E?lTst0iPR_rhkh;w%0r5V(~4hJ>)U#>Omu
zWK?6#pikM^+t<Lsw%m`348LU6{e>7p8Zk3=*ae~I*;U7MXx4!4i0Lm1q#E*-utr9o
z3t&+F#kP#bCMN4XCp$hU0T^h%VWLxPQ(tJ+hYEVYgB8~gmvdd$jUSnr-76+uT?UDS
z++}sN9x~H3{H}(jG&*lH&hO#V;?zJz)!stHP6Yd8=j4=z*@0^ZXz07)><l}SCU|Z%
z5r|S#8sX&7U2{^QGrLE3@zN#KBOiBs!T^hF9#$&>O$GSnr@N7CNYHQF^ZbwSQJU9k
zj1yEl2D#O9h?a4O&mmisf{do~o7OeR7)>T>{n5yxdRsvC#&rHBQKN|6IC6M&l=86L
zVdmbS{#>bb(-^HXFiMM7vC^vJeL~P4Zivr8q%sDn4b@u}+6rOlzsp0^OMTgbw?1Ek
zPyiq%@{*J>3{qkF-n&xuy-`(mc*Jymn{(}Gt_hS{aL*D1nAY}`tA76eiQrIZbO*6?
z4Jb1fdpZ$aI??g*F`Hgo6gDodG32U{Nej8}tYpS;*|dUrD42zKLms)dxhVzCk5GIs
zg3u^sXO}b3_j@ftz=;RF%GSsc%4|oYF>nIBe(<jds+59eIRX4RXjwj*+dnusSn(XV
z`=UQ6Q{&)7LCc`-6nrIipbKr7F1;Y;*?;lpKoWp0TXre4XY8$Za9Kr6IaO7P`yXx^
zIX{<yzX7)AJQ?5*J|LnKkkjTJ8}_Jiz4<IL6qOP{XAT3for0W#?ci~tHVlE6o0|*J
zy*5#&Zg6fdmpD8+f6L3{PvybNwS&<cE)3uG!?P>*@2&50>y@D)4{%NS$ymO5c++b-
z9iOaUwNdQ~KpLDNO-etGJvjLkgi3k8%Ux^{8RlOP`Gmao3sjxmPL6i$xBqC#6h8tO
zNvUdS2%{%tWMp*u(@)OK%v|EO$@cr3)(f=^^$9>5jYAwka9zMJ+HkRbcC(fk4EU|*
zR<FiQhv~qi<|$bA?sVh>G|dH0j0Z|-Er2^VH8r)&b!O1%A(+;ImH{dSpJ6pJJu@=_
zxoACT0(|h;1*zuYu4^xu=s@EKa>KDAFE3v=kZ->JizB=LP@68@M<w<CyN2zbre<cw
zz@{`SJ%#kWw$xBh1{p*0{S9jn^}|t{8^|-A?@9?)<+2_ryY8EGNjWJgX?$@p=!W$m
zu$;Q-UC8L)m}!CK(^?M}K*oLN-aP^|O{mx&w3_ev8rj;O%CfaGavqXCUM-tMh~T@B
zAELRR(?S=Hgp!6L2>R#^MCWfPDA96<hKG|DJsEfo>Ru8W0ZZ&z0n#kC_&}R(g!CWH
z!h14xs8Oi|l9n})gyecZ%+EhQ1im~!kbjv?G0qRdu~cgJ-R$M_wiV38&rmk0@3DLx
zaAC3rkMRe9&o^9x7ieOM>hrL{N}8JZ;^N|H&I$W*ZNLQ_043YT#>Q(Pu3#YO?I=1b
zDJccMdUab<lg4|0;qu{5)k%HT@fOcQIG<J0=g+y1n$DtvKKc_Vvw*zA%w?hybrO90
z1upZA8&tsA=SRQa0`tDabxS1z7d#Y|gCSo>nQH_@I><n!b3-T|{vAene$5L!`F!8}
z{QO+Ux%(o=yHnsuc*pDEK88TUT^CZK?KY<GqMC(p{QZ4T&&98)9T^%tXc++Ho(tY5
zM=q!+0?J~E`_|Q`tD~WK46X<`WIdQy?&D4tB3gcIs0+LVp#)WzfFUh}#}|*f6G9l8
zA1daNl#;SP*tTwozeUZ!z>rdDOAOjO32amd{Vgg;L>WL9$pu%;ZytYp2jFR9WhD&A
z=~Q8#KBNRBv+W;kl{<+E2x^n>M)HDYw!6P?Jl~O6;=0a)Duf;@rFYgDAfl&p5`ByX
zVYOcTc%2kw=Q6=myCA9OdhFOp%gCSx55%*}yeKZqPkH&~ll?OT$-vA|-KopE?vJLt
z!3M1Zn*O1QIz9oxSzzC_?d=a>Yveg~idz1Zf~m0Kl3ZX1osShLp$vdmJ!D|X{g*+1
zhNQ><8xM~V)O2zN25x<~8OlhlR7r|fhIq*29(R8UMRR7Ti;;mMPt_dF!{bbDZk6v#
z&7;o=^1XaKy)`H`JfN&{0QQ*Me+0(e`RO(mnpHr$Y+Jea<?&eMchCr^wKLTPEV`r>
z5=69yE8JAv74-EJXs(%s0R5l>nH(W&YHE_i@7Ai8#E*FQ?h@#6+-UAGF)_jH>E(r5
zN&R$FBxO5}ZwwSt6x&;43bm?1A`&==fyzT{ix`DuAOu7L1j+>Wa*zC}^~n$gv&iTh
zg#Ao{f*K$>sDkqU%D{V12H(hyYV9?%)Q@zv3`ko1AYCB?W4IAJ0a`_DZ0sadkJKxQ
zz^%zUJ3A-AWV*bIZAQ*Q9q2iP0o1ILd`^xbRXum36;sUnuw=e3`y;wj=tA2?HbX#N
zdS0)U*FY$L`SRspFpOYO2f;>8m$VL^%kE7@l<zBk@aJ0O9J?PG94RHK=-=09@dvV(
z$&_gQH)f5$%-=?_!Q7?q+zEzBpyD+-E$x*XPx_gFoiWGVu)eyiF{)MlvhUW-o0_g3
z9>G901EFdX7e5jg6~%@gCpi@r8=REPNSSjz=-~ALhACbkbNqn#AOn=q4>&#$9tw~i
zD63A6%L~Hwv#CL%J`0I11uV3-xmg}WJOxyxiR*Nq{Bh=#h9p7oM=t@ldIe7X3aWzl
z8V7QWUbz14ydwjeqklj^09qacW$kr;xl49yc0QRTXkT~GXo60)%y}sYh;I<kVGN0N
zEnQd#M0IL((?Om~0^FDk6&xjlWKm$Rs%ycCg^f)XcsT*+Se-@II=OC}X7&(yILv$r
zjfUFaO^9v)xsNNjA|Rkzu{#;uW&K^B=ga;0tBbuEmqA%dE?OQbKeYnCfB-^}u`sc)
zWI$-V7q9nV<}<lu5y2I;TaZ1}Uuze6fs)w6-ap^m|IR;y^zNHxZf&Px;=LZQY&6va
zTXb|%NiRh83JARr;l2#m+e)Vh`6TOA_Wh?9;8+A|La$)IO~CcSY)I6|pJHe@LimCt
zi=OQK`DR)r<mvLDKPamXdW11)U*h;oaG?ayk$m?Y;I%RQau>1;lpnto&ufCPkYm<L
zhsF*N0f8h_h#js;?H(KqIn&#wm(m~^LQp~m_<+g_F=)u9baS6&p-2FA+poqgZ||qR
zD3mOc{hV9!_P*i-QG3L_x!Kv5P(USvEbZC-)Vl9Gqa3YK*g<Ss3r=Oq;kRcr2HTT^
zgXIekD<DU`0T4{esa=4-u&{tKq1Dmv>kv>>cU$z+NBeOUuIs}$nQaUZM-q*nIx0@X
z2)|aLesFb5yAbqiSx{gz(qtBUVGA>YJ9n0cG_(cc(}azYWV%(94Hf2wSQKhoY-=gb
zoZbqgH07ANBT0s^2Zpz#OEv}{K^3t2120d{CzTr{5v>1qGsG?mN<0HGGw>?qzugZL
zPQ15%)YmhE-VfD~lifz~H6Q@0zY2ArNg{3X4D;*Q*jM0D8qEWPYR^oN=827&W_&_I
zN2hCOn*<4zOC*5~To@q=FvQ8Z0OyDGX`Xm`YHDiyHyN~7aPjl=lYwI$F}TPQ`OX;5
zZ*Q%ypSE128+~xwjdFbIc@FKf;QRmBXEBJ*93B7>;|0{00i#sSDyZy)pg>;@l`1%c
zinI9S<VwVlk$(H9rGx%|80uoa>jVB93`awqtxPQGsy1OZ2)UB8J=^tj`8N^}gve55
ztR|D0C0zu)m$E!Qes!LWj9l#uJR-@Mo15;A;IqKAd+6>`I(ELrmdJW7BW>9Ly&|d#
z`bf{U6Y_{4KJ;Rw212;yj#uOtnC9@$7kCo0;w;M7{&`T|18H^Sm0arwF{G@tG!as=
z$9-A!kbY0i&yq!8<zmW7x799CKi>v>QdWNR`TcF_UZfE@8<B6fPryVTAtcO=B_(}T
z{WI1$m;hS4>d7`>qzlCb|GPF|etfuxmR=yH0VuwLIwZ$R`_G>e(0u`+It=2Kx?1<E
zG`h3wXS{^7{(X?))2BB;4S;&WZtb5V;JaviUW0f$1tb}z7(nt~zj?#5>^M=2b3w$F
zjF5`68tn-P2?<HJ6eCx=wvogLM)p-Jl*AVpQH-d^4M?lawvW+r3OtnsmiGcAMq?0U
zz@X|NIlr>3Qsv!=Gp18$=^q$~4|Rfu$-Rw@jn@$oiMuQ;TZYi8W3KcW>(lkUy}g^R
zYfSq)tJ(ddRX#p{hKflMJD?%JaxoCJegb+Xns!lz)1E(12x{2^2PfyZjRb^TP3S4c
zxrqFS)<^~LZ6{%RsG3p?Nl8lDK}i6qhEf2k(iB(^o%T(@Z=fh_xE=$9UCQ0P?631e
zSIRvv66hn+#DLi(?wcA4<eAd@CJ|PgFLGWUqy)Vne@_$IF0?a?B!})u_ntF{@#4M~
zVbV}gV1pX}b<o`IXx}+9-B7C@JP)K1dSNH_KKsBl_XzV!NqThCqAjsL1q9%+u(Faf
zGG_NPFfxY2!A33hqe>U3Hz0UC%$g!o+G1hEN;AiK9jN+E)&;}d)N}&!^{!>87eX$|
z3^Emv0;p*<Kzj+gDijh4skx<re}hClSFL_d$vGtraUg;rHQ}5<yMan+xYI4DGa|f}
zJ?v;91)?+&lt9VOoqGW_kR-@X`U@=1(cO5WnYG#Q4|E8}XX!RFCoRh8Z*^Y-vkbEE
zGZYW_4!%FWv=sdJuhZ`C?#(hGY1R7y$~Gqbhe@VM>S4<vZHk!m!z?W6M2AP2lYG#B
zWqRqvF@{p&AHQI^dv|_{=ARF@>}Y9NoRvATTRZurlZ}adpN{kMN-tgv_XLBLlfHWD
zav^Z10~OlGRX-ifvZh@voox2cr-<-Ki9oql8f3ZpE|c`)4^zXypIW@g;4}ET=Pvqe
zhqLG}aly?aB2hED+5q#J=0w}Qf)-}-eM^am$iLEHeWJg4#!oJuVhLIN8)k)1u5`%+
zDZsk?jO=UV<UJ-%bm^z=t-_7n3Xb`e8V*y5PX_1pUU;Bt2(*^{ln}#xjjj*{yd>id
z|4&+AM%U4&fSG)aG=#JKfQ^nr7z5*Ayk7dRXHLD7IGr^LCp=hnR;X2-4rYShgL694
zUXJ)Fkf8IvcquXbKYIxywWQ8*8lH}O%Z^@x#re&DY>gU5D<NTu5WbRyU5ied=JZiB
z?<C+Ua^Fw4po#U=mudeQM6v@m>z}<f1MK^E8t7U5yZtcv|I7%cgKjlU+6499np13K
z*mLEAxd8gzT2r=zb2Pw}ZHFsxj*s#53Bu&!Pn`?>G4{BnJIU{h`fqHOgy5;FEv=oz
z0M#V8-j#U8d7CLi{=%@6$~-VmAR@Dih2H3l4#oe%1cg*|`Yq$=%2BLkxrSbc?MAO_
zU^4&buW&Lwr^iEtg2U;y#@2w9{Chup&jS4jHcQgIIs|_ldSM>-=P-X!h@G7OLZ)=R
zhT)%wBhU#ZChWX8J#PlzDxbP#PxAuSI1n5A#hxofG`!WFip)uteqane-(>Ns0q2cC
z#!EA(5`(TZNJ!1|+My=fSz>PjT@ft*)U(xx#X;rz(a@l-7WpdeC2An2H~;zb1O+Jr
z&vpNS0}arw0qrJ$jyC{@2D(Lx=0uk+;UGEQNA3c)qZgp8kp0W-oD%UT>JRvm(u*^*
z^SE#CfK&oN&N+Glg8yVawZRXN(GdhqvjCdcpdp0?Qc|=S3q`A3AXR8x9%}T!tI&>t
zjKwRk)TjuZKwoW!FdJNQCI5_0N(n3I;<ASd1DZ4Ceh2=K%5n%3Jn7%=3RI|{0_L7a
zcl<xVdzqu)Sf?T!E=4-Aub}Oy-~Ci6Ik`Y%59kj7^fT+@GchrlSX$!nf))}rX_Z%L
zab=Mz<EIfhh;9Vu2{a9UF5&Xy9l%9E?DG8D2)v&TQtp%?T3TA7XVO=ryra>RzFUaF
z%g&AuwGp=7N}$s3RdK9SdM!aH;Yf0mR>c}_GNR@s{~Bpo?kg%Hf_6cIT3Wvi|8fEn
ztJ@eB94sX*jV%o2-%}8N8i4xQS2{h6mS4YWq4OYl2vRh;d-o(IB#_#=I&ymYP>{!x
zAaekrgb-@Sfp6bZgq#=5(w&RvHNs?5i17^xA%e^|YR3XLv>xo<QtdOAAxMtUrN}yR
zBcvS_nxi<io|`AQ;R06$^`HinSG2|j#g#L}=WjTvf_dOJ%?X2RDqj~ywn0#l)T}Z-
z*j=CE^FG`q5Vh6Sy#NJS=rO$dw?ta?`@H;~Cvcmus<&`+_!VP#F)rW#AR#IF9OY5(
z<g(4dNt})0UwOWw0Hx|EKq4)iq`$bq-nqbaQQX|JzaELuf}>@k;0l<zI5lZZhd5tA
zQ5jtacD2GW)+QtQ*HgyH>4b3&<7~t|KEPgZ&z?qIYF?DbX#dw2y~8vTs6$^ty%pj9
zgUQBnxfXp6A;u|)2EhU1&zfW80~quKVRVpuz?MuFn{7by{^u{sebHIPqwbHC#Ow+5
zEM!#DOG~RgLqQm`2E{oFKr?d0QR_4~!2DkY8ZsjeKo{Z4KLG62;=GfG9!5RjQX3|l
zG9&yNJ!=khb}`x8_OJ#2d_Zayh+m_|bvi+qlysZ_=~NpLexQM{z_a8vhn$8ezMOt|
zrdKUJt&8rOGi*cv`bptGpBkS~^Pc<93^~<KH~4fZhDJn8FzbJ&-lA~;`2;#S*q)eW
zEVE-)mO}Kvp6~Jb>f*lqcP_g4ae*HQ(Y2n!^6B3=_0A+O4&9bkr954B<)C?%PE4R2
z@#!a{v-tmv@rfRVY05XI-&2PKk!mfl0(~^}7W%&GxkGWex;;SL*Js;OyC4(|Z>I~t
zWRz|487V2J@Dj)a)_OWOzc&nqp}6dAKB!ToqqFsUqg73ZWO36WA7BBePz0<RPx*H@
z!p%<Liyr#vLf&*rU@0p7Yd___uYN8xmAmT+$Qa<de}PkAhEs5G-$?WZRfG-;-;M>2
z?i@^ANYw2|H)0ooBjFew$jXww0Ly|~r4nSZN*+JLc(@u!MNY29eR_8gx8dV)whV-d
zAMBC<PAQ`M+{23!CFN0`0#x(p{w_}SVDLgi53n=|khMENuz~dMXQv_y3*KuD1g1lH
z`+SfujxZW|y*{|pOF(Z7T5thPpu}T`TN~IcWSY#pyd9m2tSoq)6yYb1F3z|06BEEd
zRGYqX_yQ8GZExcu)3dYCY`xy~K>EI%5N=N39eY7uShifKr4uq-Xhn@n$`As{nAGP^
zLr^K$4q85nGospDYU*X6il`D+e+0$u+i3M0(np1OVM?f_sP%fXq_c&XV_^A~zh_3!
zb6HdYbosw!_)~Lp0U*UQ2?^0c8yvl|ib_WMp0uhWdmjepJ>QqH)hHW~1;~+=kulo-
zGXOQin1KUm08v5Z1yHVt(DWW)k%g0!2vnt%rb<w|K{6wTq~3Jh6Y4h5C-%^jT&eYP
ziyATjdyb|u{|A=;Ixg<2e+j_lpTR;xh(RCgO&S^+pe<2jpMa1sqvEoJw3@6E1}BLx
z18PT<D*p$f4_#7d6%mc0kP$V4Le=?2@qBRO2Z$L^{Y}Y+T7=r7BU4hkQ3aU*(-J24
zBXF%Pr6Yb`x`9jazDZdObwxja$^p?H@Z^M!^-Oi0#AVhbO9=)fwA^<K9ehj48t30g
zqg>cN4oAnPR+AU>zl4uZF*v%1*#d;8%$K<rR|13^Xf$9E6VuNX6%%6sn!weUdl`ZR
zBsEOT%!#L5`!*N|JEnVy0~LfSjnEa$w*Pmrr|nrLThgLz%vVS`b5QKe07mlThcrkG
zcgpDL>4_0&cKm-1Ih=iNQ<IZq(EJ;JU*j|q8ro2u8f+^VEOdaoe2um_FYYbU5YA|j
z_`ifY_4(XaX^U6PPLceBuO);5@_~C4eaK8Ot{u9jQ?hB_5XnO~GkY3yc>aw!&U_Sm
z(@QVQ&=3Hmx`PC;PEIeG+i>7U_#}ZLGwLz^r!G&+_N;{cs;>-VxZ9~Mpx~H#xp()`
zB?&32L(d_tr&#AA7STii4Mz1jAKq3F?%MsLUpP1)>!t>&NzyMA`NJ?SDpNBnVqO`z
zktg5ueBEWUbp96kR}@KwM6V#fEPWq{^h@lE1)%<nzCiLJq16AAo}R#CI$c8Kb-clR
zpcw!T-6`PAnqgo`a9MZu@~`p(+Wh)Y8dA?osA*C#GE_$icQm3gmKt5}S^S7He4|-y
zJH*m02ld<^fS>o<m2o;UN-%$-kbDE&Z~AYtGZNZ}+@m}Vo^U4s<Lqw?Y{WJbLI%1c
zdjIT5nXe*jIXvhp3dHx@YcG&l(8jdrqTmHTS*`}w+fzR**@29&E|VZ7j)GO`??F}h
z`d`-`VhChm?7Mj&hz*+S!2$p?yMw!~`r@dKTm+K*&x5;YBu=!^d`kE<Nx7DQ^G8D$
zL!teOuRagO&XL0*ovf!b@|Ct!?YG!fewQWMin0y6CQ9AFw3gGEYH8zjv}dGYrcq1C
zX62oC9o~4+@4-#X_(WK1alPtrS;W14pe9H2_N<L)@YY?Qsqkr?FdG7;(9_W$#Nj+C
z7^!QONyj|)N@cGY`PFu3(*Mo4JxBT9l?hTGA%8*(m)ysF29(`~n8-ARotG?&N2w6{
zicpxkGKN5yuSEX%pj?IPdGl;@mZVBj+!J%stl_4J6&GhduaHYNS%>e-S#t4WetI7Y
z<YD!p2J-#gMSEL0dZaqGM*(^0Cyv;0NFm>*nyN}U-XQ0gBEJ78y4RDjwoo9Q0wq+)
zxbdJ~e)#o-o5tTHQll$(jF4{+-MtnL$RgZMPn8Hlm=Sl*(@tqJc`J;vQf*S-1))pf
z*L1bRuD&e%#YL{w2&edHLfKBDNn!k!&|Z*b!n2Ivt#BnbBsmDS;*t2NDMV2{$G!I(
zvm##4_uH^!<bnJZ<m%M`VNoo3x2C@1sph-?{tnVv#qJLi?$rxII6no>A$s>>u~`hn
zU*lwKL{`0o(;NstHSdY5jAk6Z`ZAQ&-zp^Yqy0E~v`Crv{4b3(2;)XMbc21VGM}E}
zU7=b9%)jdn(G9rzmGLGHKKhAU^?@_duTARH52q8uqo1?pp{b>iDAY>8W7gI=(=qkS
ztjj1A;FpDR(`w>VOJ`IDnxvf6W~yMY59JAJ*DtfCvz+b^ryfCDH81&eS7s8@Kbauh
z<>iKxf-NCh&dKqHwE+<;-gWtV;B=Wpze4ZQ9BVoO+$7zVGTt&~9fO}YxkG_SOnuVU
zTnm>&s*kI>)>=m&)gRB<J~FDV80q)?lg^TCM`gDsWK1-k`1Tf3Z6y?$j=C#MTAA8{
zD#xGhlMzecZR^bQ1uK^rKVI_@PAn03-y;7|689`x<ASon*$JBJ4Hew#bT83xy}Q|6
zo8)S2|H*lc@y|wG^*tW_g=jW*(`#!qGk#pi8>kH<;<TLpd6%i7?-^Tq)9D_*i()0x
ztHyN}c}FQxy`d?-S1;9~E9dtsjfLv@?<Jhy3>6`pf;V$te4yr0xC<`*nzf}B6BjYK
zrdOS?k`-jV=kGr6=SwCjv=UBPO^}l9dza~axL&=LRTzSBA9@(d2n44!pqgyz<zM|c
zL<sR=1!eK-EOpWp-q5FR!k)~}?}|8nV|=TLl&~?Mo0_4-TmSKbcun<yH=t0;0Hb;F
z!gr_Cq&O*6=xc?%U%v_6;IX8H@X4po^hpgqKXhf~c(90D8S(D2HBZm`Y~0@hn?>>U
z&#G)DwpPQ13+O5NG6w5HtM&tTRV1VzY2r52a*l5kA5j*M=qZQ`+nT;pIDGg`E%l8v
zGSesUlzqTT^Cwug;4uYRM0X_B^P-0sPL9qsJ5yz!Dzh)MEbfiRhRNuC$`&2g9h@^O
zT5x`jp?@Yh>B{B){vG!+o!A=1Y0jItC`wnlKFvzuc!o$E$YS&hW@Z}?ad@rP_jyFx
zAJ|~nn9as|eYk(A53@*>KGSleuVa+pQxvd&RBE`_fJBK-?^5%SM@CtTA&d09xV*&@
z0WYX+Gx8=i$xV<Mj2vI9&DW8qAxC47)6j}&s;Mil#RY>}*pO;1#`fI$kLZK7<zqt1
zrwShW?|(5}Eg`&_DxopTM&2uTYTWm@aWtL|PHrK#W?E^8@c3Y%N81QC!g<Rs*Ydum
z|J5YS`9<Xo@%Z;3owU6=^-E@z{%)K<TDlk&7Kxbbq*u0asmcfJ%4~P^Bz8!PSuZWy
z%oL|$&T|-2<x~X4=sXlH^UdEN;Io)9dLkOJIRE8!q}?Loy3lp$FJzA(tD__hS`wz_
z*~lZLC|odx*5#ydIsn@DN~+Q;pJsBId)U@}Eb+XB$9Mjupv>U<0>+JWB<^MLpNd-L
z7tj*)BSNp5(N-u_LxbTZ8Rn+Thq|)lDn2udq`E<=w)(H;80Dp<MxX$rdN=w88rroA
z{eft6&xH{ImUKz?ub7(KjdWLii#b&hdcHG1K5;&~mYw6ST6>T6TMK4oe0(^bS^a0{
zG#+pFl=6C$pIF9OSXkrKx7sieS!Zj%9->Vj2{JXRR1)8<#a=1sD?t!==RpQCb0~%a
zLQX&W)s6Ec4KdT9k^fS1R*usATi%b>xID?wLTAJ!p%eqT`SjBFw<fCL1fP0ddeWB&
zFsl{P9YqcOU-9w$#(N41e$euu88ek-e^)w;`Bx{-1=XRt#iI=Z!*?zi(GyNdZ%Z`w
z%rJCx@E(el)-Ce=?d3qI=KtO(j!#SKnMY#c<3r)CiAj*+c|A^`5X)=Pb+O`Td*~)~
ztD$wbv^4hQ<Ye}3w1Wdx;NkTZb(wrnN62U9F}<0F#}<#u2x1B?v%4hYjmP4YF?yP-
ziZ39U{?_q-vMMae5NCIc=*2fVO%^a34JD;NT8V?ULbKHqXn|aZfsI>Y_#O}BU7Br+
z4~CcQJojdzyTpLDyB@AL@>vfGpt4AM+<oa@xh8&I)00bw5-}fp4+u>yK8x(F+WoQ)
z*}H+^Gx9U<gh{>UEy7M(*gz@a7MOQKN|WIN{q=1nhKGbs&nYF+9UzI&j)WH)Uf_|U
zb>pCA=)rG<)>DMy2!WR}HeKfv19%;#e=3K^Mn^-bfvBFEyw`I^3tzaWolw7i3TbVM
z#tGTbC0JSCJIL9Zb`H^d+A-Ux`Y0`fJNkKk5m~`|PH8Q|ryC~$o_IJCQ!~#rF_P22
z3?i<Zvx@Uh!cbvkFADJU17EX3e4##U+?%eZS!_cOp93IUnSi%u{(DIwcnarLPojuk
zHB^EGNb*GN6&R$iCK2A;Gb?I6P@kr$UaTFYk)R&16|kzgKg#uS^^<(fmA-2|*AV4*
zDo31#Q(9i|wwMkS{1OH8!`@y(YYa|K1x~kESzj=}mct>MXdE@DReWwW@k4D>6w}TB
zl1a-i6%2v2Z?7(z5(MY9ccy>&fb;F}Fly{=)e2i<$E`}tH2gf_8m(&88P$U*=r(>1
zUAU^t*GjyP%I>Ols6$x-q&v-`C$wl=Ic%&t$Y>uL26_}8%~IVgqh<T`{_EoJ^#WG|
zeH>?u^KY#!qJPC=E6->MMDUT^cks`{jxnDhU=)^l=;Vp?Ea2z@j;6=_Z^a6Ol_rM&
zxl$n%nJn+Jwq{=5F*h}KVT9t`j20dlVrjt@jxp@D{pGJ0t?&iTtI(>awO!~%OXI;)
zqfYMkPOBFuPeOB~5&89A|5G1F1J7ACe-QdUynE*l66M|E$jHbDcrV%47lBeWD-<mt
zO_heWoX!+^YIxs-?8=oZG=erCuyhosKNQ7tmNk4$v6>ZDZsIgNaP@fW`L>d_7c+D5
z!1!<R8McX}ye#az)nUa8_ew+8zrP>QyL&a$Vgu16f2ZJarOY>UFj1vtrLYS}_a&mK
zrMJ=$DWNL_tN$PCCPTbpwmoS|nt6}TB)HAeCa0t@!<)R&KPUqQv>a%yhxb=HloZ(<
zw0qrRgEv|*ao<u_lY}0U5AY%@+;lW|Mt<R+i?$i5&o?+CK^~u<4aib_@n|mp=%%Sf
zEGvhv`}abEPu~dp2QQNC(Nrt5JyTWxO)0B4z%6S<BWxLRNhq9-9)YH9y+ypm|6TSI
z^2Sg~ic1pKxCyz^Ek>Sf#Jz;d|F)n8HS()HF!OS@$qx-Y-npitwADG&xq-K5xrg6W
zihFULqo>1>D{cno+4I^Y+}hpiA1mQ5SI=C>Y*D-Gy?Ehs^XiqWzP}+NCSq!8_1<^W
zfm&yM`<+Dn>XE|z<BHe9X-|m^O($t2lJYXzUJN(iA}ooza~J2~lK$UZ%LVFP#5{rG
zKfM5>jutWHTOoRLe0RKwCpQK2DAjU;(h%-`H-<<J`kEq@z7*?TQzFC3`k|QOt#3}{
z)eYQ+drQiL7hnH!{}ROKD@!Rc)x;(J4i);+rMFfvx%c}<SyGDC^)Wd2HMa`#ZaO_p
zu(;W|fa9=S!l{+Ti6m6SCC1<@MbRX76Ncy>-ujZQlI3qnQ*Kd_{h8+D{xz)_?t)ap
zk`T)3pv8u0#>?3c-MGe|2gPtns}caaPCwgQMyR=;bx0{eTAUQ#E1a-GwL|)C*(lF?
zHwo+Utxb*OxwqD_AIkF(*Xa#<-O_m05pMQ6mc~0gsU|xE?=`(8+%laUO*+$?`FR_j
z#A7Qb@cB}14TqK|QtDNILzF2Ekpd~uZlQ}BS{Cu{2>Pcrr2W>bp81CS%J8~`(Xl~h
zHI-|Htlin*nx<UL-E0Zvk3X=vo%r`V*)P6lzZ+__SV@IukB2VMBAM}8xkMrF=C~;)
zn%#SQZzAeFD$$IMB?MaYpE4p}I~qYD3NrC`@$Wl18yA1@;&zM%TH<>lpSX@k2g_`J
zr+W(R`Lhj0F_t_w^)dA=jZov)VojAW@AD&e(<mslk06Q1c~Xd}dG>?DH>cES^Rsd?
ziUdl!q0*@;7mBN&W%CG;o^BZgzuVWBCcC2ZG1K1RNbErQ@Avas`p*y?uo|YY!u2a(
zEG&iTi2T<3jJVd!YuA*fsFt)`;X;H9Fdxr2zpKg)F`zEZ(>?RUiOBE@aX|#j8YRI@
z;WI6a<f{)qM?YguimOv3N4A2}W9wdRv|^HwuE|zKIZw<?usP%D5E3l7yu)}XMzWlx
zgtG=YSjOCqEW}1EtK!h~X~O+ng-|YsnZ&d>XAGV9jLcOOx@K7xmkOt8Of~TbcXKJ#
zFy6XgEdBi@q1k#2m9%u(kJZ=lPIt}owTBklWmTModrVK&k?VBCm2|xrC$z^BO?;V$
zBvnxn88OLBjM-7v6!Qe~vE-KcA<j#WTSskeDvT^-y^iqOzL;V1isR-K+)!h)!ua-3
z>2Bd!1m4L?QN-jcD^JRrH5NIimCGw&j-i5D#pzK!u&8p%&~B3KpcGW9A7p8P)UDs?
zPr8#11MW4)cfB>wCA+6Y4j1k_y&8E5{8U~qerMM`PP8xa?GVkTHOu|Ocq7N5cD?r6
zwCoeau=48SPfz2_UG|Tn?@ccuQRYOS?3oIp*ZFxtgHcDmZa_A#^e(MUPdDizhI)*9
z<?rmj%m!I9VkBG(UG$s-??dK$@BVD+WA0XXE#x6esL}1n=Q)94kyj@wHj6c$tai8G
zou9Iub2$8iCw*7mlk%j-v4v9C8J~dl)Yx&&2+DNZZMLZwikGbuXdYRMyQ#JQ<{p<X
zUNIgdZCo2!y<)bL`HkB!UWrmUI}K6K{z$5P8RVIytBEQ^?%8IP)ssFP4vBFKSUM~m
z*<@0Ky^WcFJX;PIu#sv|R$MlngJP>U5ZA+dxM2oY1_evPsI&!}3GdA<{Z+=vNl?P!
z{c$H)bzo+Q-;uEMsBT%VxveqLg+W$=D_pr8^1F@Vfn@-?tOsS!(K$^@?P!bMc-SLq
z?W*^(AS3)>=^$#K#%e%LtbmE5eM7-&Z;WwZPxx>(oZ^tQIzPU)^+WbyzAtLsj3jzB
z(*f`6`Z6AS$+^93<LP-PeCA@Ygt@c5eNrA|4X~t&ziL|qzL)ZSIk+)@Z>|66Yo`u9
zC+3`)9P~TAlIOU@j*wPJhBAFQRoQ-il0Lt2ce#hy$>yx#wis3&O;uSQ!KdIaX_!|?
zeUaBu@ubL2r}Z1k5L{jmA2siO3(_R?&CXK5M4YMwX_jjbIahYNBdXSaosh^ETk9c1
zi!>5idDmPDDRsh>1}nEI<`@o)ZSCI^@XdWKVr%5;I3r`)T_Ra2JaP6*mT1s}1k2#d
zEa%^T$B>4S@M%{LyaLs+@Jp;D?x$8$`AE7IwVi3E+Q=jWBd1#;mIL8mxz?ZcjMO&F
zqN#|#u&6B0-Gbec?ys>o^+agv-f*}CmAxT;cL~8yn|U^<-ORX(cYLo-!{bf!F6X^|
ze!^W@vxMFQ@n8*oQn#U|rNFc~EOFcJjO3pK!e@RA(p2+jhCVK^a{HYl!o76A?|i6K
zk4`K$zw@#ZWbsyEkjx*O4{05`?h^bCS$bYQID45S!f~ADRBuxvXNH%AT8k|V;#t7L
z2tM65)z3)Y@p4{&oZ&G(r$^DVolx~E^`yM2HjIbVS1e&=Z98XJ?{OOTU~^P#PD-X}
z-!~iU`sI3$Hz}_!|JJ-!nAu$YmdcQ4xTT>#<<-p(4nxV48VOp}8+|`^?=h)eKQkS<
zdu?~g>;~1!enwZ@?dw<|hzu-4Zk(j2AVPBZPJOZR7kBm|B_eal2}5U6<w6gbr#-vO
zz}8S+0x3fne(qAq1~NZ05u~kt)mNOM(zs8Eh?9%!GyYzRAE(c?ca^Ns-SVQvnUoTa
z8md=>UT!%aTFn?8y0cJ*w~v`SR8rz8zjkW!jP|maUcOezV6S%KsahPntGb2i4>n_r
z6+}qjkbPE<S99LBwi?H3%J{F^#_o|xFKt2Ua_XPz*b*)pJzHNZR=#uje34A0E{XY4
zu^1>)GJh}j%991rhx>V)UxtWA@-_(G7b5A!9W%D`bd^pSkJhcHK6~a)T^QwDdBerA
z=Xg$5uX^Ky7G=I~#y~Gk69x>QHY}nmf5IMdQkm^-f&^E6eU8meYS~aQQz9H>kCUTi
zXU*Xe-Tu)h)nsw!PN!9yt>#BnWyQC6)m_Koo1D5yE7|Y0)EUifh!U82o67HJI&adg
zZ1TscS=|g1@q2%s$MyT$_0XuF<!Lj^%^W@FLnAWveV`WtMY+-e<-)GMKvTo3;uyF+
zc_K|Ut-Tw2qzX&HMx-ybD-0M=fRFo~S%GMN>NZ(UOj=wW&NMe2GCfCokI)xqb_@%T
zXS8VlrsF$~y3&wxO)kZL-U{l_Xf-}RewP78fmxD>2o=M=M`U|e#v&XEgtt}H&^{0z
zoIA;nRFH2ClqB+c&)?OwH`8ZP-GBSqbI2I`dt+bsV$0}Y!lfMxL}0DBlFZrd=ZmNc
z9hFg>_qK_(=C92s!hQXwbxxOTc-!8z)ph$@N|vfTM#q}#-od~yr5?eSq_%H4CjmvI
z#FLLWx=P%`E!<w09q7=?9`s3E(kb($%xB2ZLO6l>@69>bcWQpa)OOc<S(HJFz>E4r
z>vg+XLG>SJ)Hp&L1cp^Fy7}K5HQU>9kv}6B)v$l0WQhojn5D6wce$mLo=khyOWTDy
ze`sdaTwb*4w;8Pt3u*H2h9By`$zF{zo|tVNU-F2|ExFK)rPPYnd1uVzaUNEB$~rGk
zrbBHG!OzP%9(?_DtF=wq+rtnWVG`-k8Klx|uKS~){V5GoGhy8?<;p|V{g&{9=0aYT
z{^~B8>Z)!>%1GXj*bId3`MdQJart)wb!Jn1zTMt8aV7Q!wc?yV7){mo*47^AkE+J&
zv+H+vMRj~7qAkUVS?@m6GU$p)TUsP4m_C4-KlcW=o{P>^-`*0!{x$ist5<yyGSwkZ
zyZ536?*^6=>ffcmd{&rjt-q2*HI&i$_(}S&!Dd}*yJz>qkLKi~u&!nX_rS-Dhm1(|
ztBfywj5l}%x7*gd`AP0marjb|`3f>ty_AgCAHaDsVN0na)pctrTz>4t$HRqzK_)hf
z_;5wk!<^OV3<f4PzM*dpd(d-9CW4h;^C6$H=6SIxSsNpas2YXt%(kQj#J10;PR@_>
z+x?ZIr<8GeD4~ngW3aLZ`+1#teP6QcF3%Wa{9;ym*5ZoO<%N`$F=zW&_>>iMZ+~IV
z>*E}y`TlhMC9Wi)h)zvH&EdgD+Ruy~X^wmligE(Ss->E4)ZbyOUU3sGBr<e=bJu<)
zfh;X>=Ta;dwl98FYND%q(%{3wlE6FomJD7#q~2Tm_gA+KF+O?X@17h#BEp?Z!9unZ
zRI{<wgF@d1NxWq^Xk*6qJ>FT4-OPN0Jj{&iXQRVBb~2#9;q7v7y(C&JgV>e$IhB%Z
z!FG$=;p%TGy?qZbo!;5Y%9E4OG(-`#nIv1D_y~!ABlIA53wNqKZsiqn5QJH(<aI4~
zszc~+>-Lk35B&IzZP!hfNX@@DSDZQVaqQ+yi&G~G==Kk}7`gb`IjzRv&E3^jkrb_v
z#|5|S_BuR0*Ot$;JDa7Bht#Gs6DOJe_CppH!c#cp1&VP=^rva7X<6Nk#;!bwsI64_
zg)we5L^{Q2u#2Z!No1HNRlSVysnaK6`YUevMGtrVl*EF}QsP4Ax%Hg~K5K375B6PV
z-dH|KnOS?nW&Kl*e-9_>sZw>tM)!+^Il)0CA=B5BOD;Fo+gabH9J3$uLAl&Wh=-1!
z;F1YhvsCqn7KpPr1C^zo;bXc%GAejej4t~4)#n1aPC6gA&~&P-m+t8D(WuQxA!+8E
zQm;<Doo62E^0gE%{v2{Kc%E)i?>g;1#{2hrO2XthtGqL6-9hVz?Qd@}vJ*G|Zl3+=
zRa8Z?lR5hAnqp?Z*YGT{P5pi`sXgPj&5u}*tt{#$HL4m`rjD&x2^~p?rq!xzUiwl-
zHQyTPC-}^cJjvrQ=eOn}89qLwev|YHcOWN6nQb$rU8E*nq(PbP?1^i*gF4@ZjgJwB
zCknyrNN59@fU~$6@`{~vlDYCBKkl1K^W5KfIqw5}J<N8~T?q4(a~@mfj`2-o;bUUA
znT)Ec`{J1q$%H!*rXRZmxOkils}4KX8$vkW<iSLAAHII|LEsu2^;}JyaqWWa4DQc+
zp%MO)B;|Fl4%Anx2jd$tTwi)u?cOf!66ENO6wR=;sVlP$n@VXXnaq7J9}%747g<Er
zyL$KM-=@{NY|@W)Z?4~3uMS}@`NY&+{=IcVh+8I6d0+K~o<o9cxdU!aVhS<dBWtzk
z?6IVW^Bc>250E86>~Fk}7%Xqas0R@4WbG52!<!v13f|{nU?t_9xt;Kc#5lxs{;L%)
z)_-g4tb?kG+JC<f2M$Q1q#y`Lr$~27m$Wn@sEB}c9~ub>L69y50SQGy>QEvrCEXxW
z(j_2wZQu9K{O-)1JNF-kfpd1Ocw(*fe7@AEBgCylJ4m66TJ(V^ay&CO2a!`%&PJ^&
z@Sg92vb*BT`4cw6ZzyP5JXhyXOs1)V!Kc9Y=gs228M-MM+6QO$ZBIv*(scc;G<UJ>
zSR40teaSyQ#~l!CS$V#+e_`Ly!T>$JScqBt&OYy8=jiaPZNPH+rRH4r*K*5aa$UM|
zQ1GvS_iQ<+r`4#b^=D(@?bR^H#4UN9Lcz{mw|w*Gf#6utRN4iyjnc%`;jP`6eVXK4
zL?HAl@M+PTS`kmWmiqdaOCnAI`93j{P|`q4mAU}B^6*U6JcI_ruWx9W_wnOZ;dCOy
z5~0Zy1#*fZcgv7w-MR-}#5cdx;g6?1ZC!2t!ekZixm7fKvJo+tUv(2RM?Ha4o*_Z6
zJn-2{_E~ZFH7Ng4Mty#A>l3bIb{sU1BQIUe=M$Xkh58>O(N3ARyF0sQLTt`03LE^s
zW|pI?R~YV&U{QJh6wraZid%TMT#FJv7#3r_rfOn8bU$CHV!N|0gT2Y{D14GcM|!$U
zF(BYHqT;390mr{-Mn=H$U_NGrbN=YkL+>0H`Zl0~U(0Q#8z=bGT%4l3KfB1M*I!Rc
zp;{!F0)n{jhCJCxNqH93QKYO(GWYi-@tqPJbVNVr@mBg_pzs-vn%c0^cHOkzubZ~l
zS0*@3qmcv8?m6lBHA&?VfLggN4__pPJDD)&B*4(LQt>@Wjvw_{8Gos7>kq7&sjV^8
z-8J`&%+wdJ?3q`B9IK|j>l{A-`xQ)-YF}mP_LcGn9d9;?p=W5YP;7*w#cw2Uk(j1v
z8EkQ}SMLUKR7jYq+&=re=_v^5(_$GqM463^L=|6N+&}cAQN3GQ{)NzcYtBAx0xwew
zP(sE(T*Ke()1_^ab|co@qQU4$8;{@M-M&cs@LlB$jP^utJpw5Cr&M?%!mW1t(PP(B
zA_Nx^RmAnku)Rz5zy4KX<`ohdMfWd+v_vLdtaV>*yFcJj&^8aIk!6Z9E_y>HS4xDa
z|Kg2Td*@K4CX~FE^$lSnB@5)d^34L0w-hnv<9gNPUCOr4Zb`W{v`8N%b=2OizE^qL
z(vy&#u-q8XXhC(49|^Vjyvz4IyXV!n_2GUfM(cgr*`=dQOtarkqX_f+VZ<8_*5;*U
z8rjc`R^AA_n|&vrtHde~4$T8Pp)9OK9sIlUH=-!YjY{<sh)zflA~j+zOM2OhnVFqC
zr$KkTHs+KY&qQ%`Xj4HZ85#B(5fi~UFB$_e^EcE`FkE&n=e)Tq)OyctPwbIh2(f~X
z-Ta2as(!Xab>M5ye#@H_%sz`V=A9SSB%2B0HzL*%kmd(1Ki}S;eb}5cFOQqoyA<&+
zD2IFRstW7x%v^cA@p@H8m)^&a)0WI0N(tO)oC^rCz%A?{GL3HI`)<$+g`dc&`jrV6
z(%R);`g0FJ0Uv9$_KY(+V&90NEz41fcY+qz2AdK?+rhbQCGMnLPF31eV*;qYj!1)h
zE#*})HC~uJyG_Et{vDbFIim;~^VGMs{>J5mT}ug^O8iKl0;ac(adUstt`(j1uy!mh
zhw=JqdEpxlCB}T~!J|Lvj_^U^^elS2(RySbOz~m&R=*a$WVGsFihp^T#ZDxF>_)sx
ztt}RN;tHi}A<ty?{QQ3r%pcaHjh>;_;}lnYuF@ev@25&W`2N#Pm9@Qo=9%iN=~Mz}
z$`yS*=8}o2moceE@pd>V^l~$wZd}S`<I%y#KYwohe8-7vX2M}tU@VE4KMJuouvGrV
z!#?ib{z^#W6E(#ugwneo|3W_uHH5Z*WMr&o^jPRs=@H{<IAufuJHAf;{qXzdeDuLo
zBnl!`Ro;PAqmvVO1`CUr%Bi1__zlcNNRzfzNg!5f18V%l-s+5?X{oyGSJuWLH43dr
ze%o8t^Yj5j?!oc4ZwSV+;echfTx?-xZap%AkyS3(9vbG(l$lbVZqt#*emb;arE6?Q
z1ksmLN3DEP_8t>?O*_s^G#0nE<eyRed`p!Tc~7IIx!i4nf4{%&aCX0YRWO`MrY(44
zDd~(``lE*ktK=S{Khw#eH}47!Cj7N<2U@$VZw>!CHm}L2YuZo8H~hJ=pI@HjQ8F|5
zFodKuMHA5bFs$~UQLX=SZ}QHunGwcyUKzz$@<Km&aJ2l8BLV9Zf6FhiJsLJ8%C=8_
z*U`sXgJ_!UTC-~1%riRiVZ!>hu<gl+soAr;;t^a&0%-b5LmayI%>y)XK~nd2kHs=C
zQE5*FydsQNM$1j!+ZpRe$^NtyCLMeEJHJ-jirj#fz(ktmi3!hb?;Qt+f!AFGHuATt
z?8S@UQ84phgeLCOdD*Yt?(nDS%5r%8$mQg7kgmiSiTmFUR-2M)`b0^}aG#XYiM5#@
z>u%Ola+jk<;m6f^mReDBN4)9CUmnP9?xS-DX<WMV_D>g|p3HM3dng;!2QgBZyGmW}
zgn;q;8Bgw2OHmWuaUWuDpkXsUkLZc=oo_p1%P-B|^Q%|6l}=dfXp35T%R!7Fz@cvG
z<vg@~8@^nK3+k?TG}&Hf6`cE7iyn0+bUX0&P^=a%G%BSqvGoND4vHmXbt#W+=}JcE
z9N0J<Wh0Azia4<wT`kc~N1m+6Y0LFL?2u}h`gpG|s_$D!KjLXI(&HTITy0N5;&j|M
zBcY3YYC)VavF+WkFEk%*k5J}v{^e@5@hK~?N0~F*cgQAZlXzfEDjY%cmqu-!t`v(k
zFY?75)ZzkjO3w%C&G(;wL{fe*L?|<LmC{2md3)RYkLo;vY1(`j1MTAQ*QP`EhIYF`
zQkdCBOGQ((_$!p}e|KdmAr!K&Ch7cpsGMKvv05)Tz}zG<#NCv$z2FeOlrL4d^T()3
zKI{{I+Xplswc{ff_Uatn=gVfUDxFtGqNJX@oPZ#iES<=>W(gdO-Uo2{6$M`QHxg4e
zCP~w<c8OD3JN>EgIoHnQ09vm;sN4d}jvM)RZu*Ge6GGUsgl1f7EF>re$sva6H<F6;
zjYFYQkof^v2RoL`>Ea|v5Cs?+<$w8dUCVxF0hft~6xV-`FlR`>V2U_P3Ah_IAN!N7
zXcUhTB7$qecBg0WZ5q+ZVeeJKtv3rbe0*)ew#kcESo{U1raohTgJz{GW$sU!on(_V
z=}G<6&?{N=V1(|3*V1L3%#zwez6B;bOIN}`5)sSKunR14#kT%@uKD&xH+}ugZ?qo8
z{g4JeqO~`_FD7%wS_&$@sDBqj*IVqLAD2)C+36y*4n^n2+~<&dbH9Ag7uuT2UzN&s
z387oCG)~0gP4H}wkOy~jyiivBSpG_@dAuzUKcN|@&QSZ%nccQ#nBhkZjWTR09e#N5
z<$O}Ug>(MpwE+bAL6_QBo+T-qt9wlGIuVj9&xb~)%5J?Oto2;C%5ZX@UyD7Xh|}%K
zdFbIxoMS=m_BJ|rC_|7SYCpuD31NxiADJiFDJO>WAuW;5i|a$PQJqs$X2pFu2o)c`
z>tKI{`!@(`o%`4aB{fqB#*gT%B(Q325kG&|galrDE_1>5-Sjn4)1t@?84{&X1n?6Y
zlJ?&iP44!*a9uSJ;J4Oj-RA6tJj^~$%!~Mm2FO+)J9wk`rypml(O|Bclw(+oh}%vL
z*(}aCbLJ|kNV+>fqI2druxUj=z1zsh>)fSzC&ElJiR`NspOdKnM$`ZTV)n7mQL)CG
z{_W;XYr{o0@PckkM%@L^Yet04`C~4Q_uAGLSd75=!AtCyD;6SUA_Gwi64iRMKq6NB
zNO39Wr?@*$&vabk7=s3sW`h4cGY@in=x6ZMhYZ9)Q(Zf0KZ4Lau@tYM%8B)N_D|mj
zDMknhiT9tsB}V1eYUM^5zc-Juz&-y{Y)ze%5}qqH?9#b!+IS|P8Nx<7x3nl$dgIEi
zQT%&Y2}Z^QT0-NxsauvU4o<EMekBSEG+9lcD3pGiCoQxO1j^dHn-L(;HIV%AV>-Mc
z<sH^x1e!98!d&1C5%SJQAOv#WdWC5sk~Mv0|2NCT!_mJRJ;CQ&oi5LX$}8*Ov;yu=
zbc2lu%6xRs`wo8L)$w7QpMITu{CL)00x{LjY0RaU_0HWlRiWBP>sGdJ{{%ChlI~=v
zqe3sznwy?bL-X$$;#vmyXd@d4FG)z$TqiV8QUZFtseu!+2R*9En>`dnJbW1f-?r-%
zlw4P!`BL^*=0ErdIP8$&huvt#_Vjn9mai?f<)PV84eVUI+{|Y+o(9({{R^)f5XgmM
z+#d}P_NMD&=gX12Qezwl8eVm2T8I%mN&iG#yO{#HVM<qII|7kX;aC$&vgNv%WlOAK
zdPBZhYT;qM1G(ElaCDbig+G4m9^Llhmdf0GX&38=NX>gYVXV^&Qz0+w9+H!--o!{O
ze6j5$j+Y4B!#NQ%wB-P|+}*@0mOuXBLjtxFJiDYj%ww_b-?J8|&cV}!rhh#m8n4%W
z%>L<SPs(Gp#D}*!F19JP8G<GFcv?>`|6uCv${MI<Cs-|6C8WASKd2WL8_E%=5tMql
z%wqp?7Y8oj<WGg*tE2QcEEPgE3w#WJeh>-M9?UxYa@1@c{6pE{I`?ODHL;V5Vr72#
z!u@3bi`0$#U2UbLqDV|8S52-?4RzeR#^7DYtvAELF0w6N^Lc98jJVMBVWoZK*!Zr2
z0sEJKF`{Y-`mF+HFRJzY=+MZt6%Vo;-$O$4aobyl(}4$OcchEN$egb>T%`;GMJ&VK
z1XtRXZqj_*d@TwQRs}pO3Kn`M9EJ79=ywMC3i9Lbf#ZY5sYFE@RfmsVe6;qO!m82@
z-=YndFAneV+*SSlKIw{vwg4k@SEyh#8LF|LPvy6x<{kl4M_lwnlW(%^Qj|yor5+BP
z{^0wb($z8N+S;bC9kQ?wP_pI6IDwFrn#~SX;lIZgGq^5SQzdp0g}B$7B_pw53*kJ;
ze|GKigb$Jh_RDz`Cs!o?A(@I-yNs!~EKAllW&d=|%k`8k+Qm#!ekzdAsvSiGhT(hu
zP(qdMrI)<n)*~C|AH&5S4`EdPUVhV4VQyS&mvgT;OzK!xm{v;?cpZc#3#Ib=@$>JD
z#+QdI;{J;a!N^JBsH_oNNYd=55(tK$Bm8E<B-@#mCpYx;k}q0T{O-9c)oZb9W`-FN
zVpJsPuA;VaUgH;vWj2c1)2^pCV|eugcwG3Coro;ZBZDj7HS%&z<+O?g_c<&tMLV7z
z5MbKhD1K)Bp(fw;vFtSL^0`A07N(w;r!`DzqWFxX30~ETRE?HxcZJLYI_M|fw~{)a
z807xZD*IuD;+I_P&5|@$6{g*pGGUqV;PGiBHdtBTQffce2*7I@3Pns5ZOOI_K2-3T
z&kR%_gXpI$ugCncZzBrm5LkqC==o9il4wPdsc&prBc=cPJM`T>O#hp5C!BTq&-ij1
z9L<0*C>Jatq&Lg(n3&|g?OW?BAnaFfJA8m2pYp8b>$X;|m~8n+s{^^&B=Jet@jlM^
z3Ub@G^V%X7=+dO`z6%|}S&X(&;GGieun7*Yr~Z%j*XZj!ZlQj@TP(kvB*5q?ncbHE
z(qGZw;sIOu8ePfb3TDLo_}_`zC$@YIpY~H1|B7(&8BWu)BdMQ+-RWSX3?kKS-*nXC
z_BvHRUtHnaof7en9uuh%Z)^*Ffd6SHx(nH8l256Ot$>l6OBFZ9UhISRh|2Sy(N9qg
ztL^3c)7%hf@Z6!u5J=RhJ&S5#Unlc>b^|ij)2MYyE5GP!P*n;zeq~X8a6f7SU8?^W
zV()*~<I90^4Xk^9mi#fmgt3q8mZP=OijNos(b>Qn0dUR>$BV<qg$Srq(UGekJdFY-
z{76LFT58;i1_M^AzG(<^5FdMZ*tE{Hpc58{hij5^e-;r9M?}z9a322FV<tWs;T)9e
z^g4Q*8uLvnnKCLeU4xA##2yL^cudAh(uIV`?Fw&>KHV|BqWE4p4>LIUgi4J%Z2+?H
zoN@mK&eQ|80`)mKjy>{j$g_0t_YN^4eKKFz+E`uM+G4Y0@)Pxb#9{45$5zx?$isDU
zX>9I{ng;{<Cj$On+S4K*M1Nf{XO)bpzqJvij_%(1pib`0l1}vP^@m$b&qS_ulkOyP
zMJ(h6fp^ZAi@S}FBN7zu9imy7)Bg4p+5ybQ7EpKx+E($(`>x+Rs0dW!>OiaxHz_;v
zq((acg5cH4j*Aun^bKNY>avH^%F)z+ER1?q0(n`xA-`Q6QAYoA2#brp@x5SyE4*N4
zCdrO_C;V7{ny<h1O{`Dz+vRp8y_V~meA!anPE)|`t=4EtAM0#lg|(6nq54P=+HVAK
z82k@(tN23L8673OD5r>h45LZIb3-X;u=)^$l=C<aNamx$V-IGD(HQSnQTlP#sCdXq
zUYB^b&OXuBRNLX|teKbUx7DJwcOMDU@V(>N=o5$p-3XvD&e9i>=8QXpsfdw{o%0(@
zJD;eki|G|!aFpFa1Dr)Wpuc?w3?*<dt6}#mZ9pdsIfVJtfJvoeXc!I<kxC0{FTOpB
zCr03QLnzwU_~O{pS;JGV4G8UkDw507{+ggTdkX#zJeRpf*`O(wkmm-+=4=Fnqd(4w
zR@~WLIu5lP-@m`|_U&7knniwGZqaxhAU#?E_HP$}#;pN^t*EF-*3r>X#FB%oD+0Ym
zoMJ1TWLzQ2J#s|u9_z6^_M{xg880<&Bl@k;d`I2V?vGaWTd}31Sf7kmEko@C?!WVC
zdN>x>ZW*vlOHV9_O{kJG$h;M~!8jikg|Dx#U-TP5`#NuX{x<7P7a@UQMTJt+8e$lx
z3@R{xER21c+NzlHcVZV#|FA@*S)jYeM2fkFjc?Fm(<0#4f+cIMV$?95(Y6w9vf?d|
zidVpp6M3(xep5dtPj9I28SB(`#Pc${#FV(g!KZN&qaf<k-x{Tpe)Xtuc5*TbV88Mk
z8q7X6HZ;%yyzP~Z3_wi^8GUm2cLOki?aeq7Sg8sBB6235JGWc~_Y-2w2Vu=<$;%16
z5SuWsKMEDS<~Lazx9i?;1es1K?unB(gDVFB;;d+<*7B<QnbgYa>Jm7NG}{TlW1X5J
zGogSz19P5DYl^i)&B0Nr^^J|jOJV@JeK@9_^e9y1R*1!_f7@e2A_%GMm0Mj#I&eqd
z!N9d?wB_!1@;mJe0Wk^K05dg<I#MC<;*LOn<|vCmD==P${Pw74nF3|BRC{&Ylmy(b
z=N(k8CdjN^FZ8JrH(#g0+}!rfrF4iyY81B8VL&M4((!=7mto&AK2P)fFL^1xC~5w$
zlt)$;^N`MY{o~u~$fI8c?Fkh$7{I}!CPb-LlRyAswU$D$S+u_XBq=G$YP!~iEHgqy
zz1_)fR?R9I;x}feT)&Tr^@#9HZ(F;3cO>uIEH~$k%z9na<NkxRVxp+ifZ5l2FP@%y
zROHTl<>J&|C(oER^t8UDYr0Ru6_)T&BB;;qPIq@VT({eG1c+8J=#?z99xSN}2?-%0
zqF+>Pf*kLx(fdWjr_k28*TO3r3Kg>4kDzhslJeXRDyTZFVwoZW%`w@298ht@)1rUr
zMQCv{agw8?w>TA`VNEH_ofY#u`R<P-(M2D;+R92YaAfRIS{hL1JT}LJKw&XCC1qZ5
zF@DXA2Q5I9MMv{~!GkFPPaGTo82OytLP&S9N8|Px#S@iqbgVo?{gV!3wH>`xX`!H=
z?x_dzJ9@7z2YpShyCyO#oN8YK$sHGcE6$2t{rS32(UW9jG#CJ4wFa<DYAGK<C<ojM
z5W>GY1)iBgF!dQm1j0Z{xBv-;Pr?JnQs(mQbMasWjAGD3*7WmW(Js;oF=vRHo0Pzv
zC4N7TDPF?pSsR=0hI5-<b}{`xjr97HeLh+|$k?o`!6vdIQ8~MJIcLjMTs^zjc6)5*
zFpb>bB@>Y$>HV=LFeMETo2`JNM&_R{$;rt@rKO>Lb|ItP?&L3)Ki=yNXsbo(PV#n9
zTT?hkI^7FOli{+#kN+~xXHVGD_ke)^rig*d<@~vu-`@PnH?7-Cn6-D$>fYIYLQDln
zQQU2OIeb&_Rv8@lXu|81H~Vh+lnUa0UwPiP=Fw*R9XGZucmQ$W?8Xl*n=-fyWokv_
z-E=ewjmfyT;$zgM`2aw8UTiRelmdMHuL5RK4Ba(*i1t^yE+Tco!Fw@`8o!~{mH1T6
zq`~TnthFtM!Zs9h*+7Wk@Nhf$%d(D#LR-(%fpbbrxVFrFHU|Vl?bjytdb3(^tzTwM
zZG8nRZ7!uvIQ1__)U!&GItYYxT-gN!iAPNP`h?`z!()Cu=tynVFf*T&m`sts5{a;)
z9tZVSGjoB$^x~Ny7`<jh^^VOOOBPT~{gR1ycPjY_S+llj*GVhxp;#uCn~y3mWkavS
z*{>7&-1xy=OC8ww(5;Fu>?$c}k#&9fa82nqW*OhqKY78Iu#8r?PskKtXL@$MX>k=7
zc$Rj5HP@nhJnMSdD`RqsNk!CPXTET7!;dI?MFCTIeRH;aII#ecBf9U=Iaj?1((1;z
z%&iHzh#t=vi^8^+FRas_pVKy#N|pFXskL=-*jb`<q?f0Lx<M3H6$D;J6gOVwjY-w>
zAiu+)!LX*4qEJGs{F&h*lWCF)xhg-}d4k3E3CTvSh*LiQ7sn1-^MBZCPRx~a80QNh
z1q_cFGTBCdg<Sg*WBT<tb}v~c&x$sczk=juf<SI9GQNNKENA^5A^GQ~_yGiA)k#DA
zsjCW>tkrf`c+Ob9J~k7vhWp2_d>*5pU%bfM`1@Fa|A7}D;YKq3mU=FB3wTEHST8RF
zNgQXLtOLy+Ct@o!MjdZ4BhRWoXY7>XWhBlM<h3F{MMp=Qyhp<u?$jojP^3FOG2)tT
zp{hX_)cuz24FtvV<k~_riI+a1b9j?9IW^a#0S$xeUqh-^o(Q0W(u92Ppg{zTCY;~?
zdrFjM3ieddEw9n!>_Q2K*C{EnyA`jJv0McM$%+gwK>*}!L=S+N6!X~$iRs&F+$c@P
zUc~gt3`gmB6@#PJ;71pHM9wXC@<|Lk9a}$scL|!0A+68{lwZFHb%ok6Or%T?7n$Nu
zU*YBzc%r@@^&ClP8B-{J`-;=ka6@{ETkI}J1Y7LobmD%rSVnh?olhc}NOEOsBHUp&
z$=uWQby44Kzb{2upGI|FDGA2Y%tAhd5E$DLReVA6yB-vug4L$lJD3+a=Q$MiZgItv
zlCcQ<r%8k(j@}H7G$ln$ZLMunaU&lIQ{Te5uI37L$mLd;zQ<FX)=7(0Df!nlU~wKv
zR!%Ve(=99s@s#qzP%hXau(qnrSGuXFc>iDDXsV&zJvu_g&``zF_qrS!49OF%dA|~;
zuz2mGAE!55D}FZ|Z+Lp%h!|cm?1k_<Zt<qjZu`4V*GtKcU1!P*{g(>ZkoTp3VojI#
zc_|8=58lbfal|xi>%X$~w_;hj{G_{gGU^zSbO*gPWib_C{89QgS;FfIyjz5$qoYM-
zW#PkJS=Ndm2dI7g)RInWEXG0kka$gO&pmh`%jqz{e1w_eOk3zwYPDq2HELCWHc6)Q
z*`L2|7-4?eW*Lg1&!AhG;BKX*W_oX_K~FunuPx#u;BtA}AMeS_NphsD0jENZPovh5
z<o%mfK?%6I;MH4UTO@AHF3dbJvVbWaU63!4CwrmG0=d;m?O!gJw>g8U^>587GagaJ
zntqF-HCKFiWW7eSe3*kW2+H~!f->|cO$djRu%dlC=v7Q&UZTzwL_w-<xgIwbLh^&v
za?rb%2u8BUc(B<NH;pdrh!&c_4ry8ne67WS4Gu#4puR7-DGZ?FA_Zf>py-H+vAw2Y
zO^9I7VZVO+S2FEgB!<LYbKFIxt{4=y6BvV{;@iy3@0XNrLKT}cP8ZcQhdxeZDv7Fm
zYeHU{hinU$x5#v{K<)=OEIS8BORdXNo<E1;l@6Ps2qD}GO>1wF%>Ekb9WGCcNYk%W
zcYVAS6|aN<1H#G0rK_hG0@%HG&+bKDjnXX-(H$8V6X~PdIhp-&Yt&y{_d!Zf-D6{q
z3zcPiV=Ovt#ey4zVCUP8`f!La=eCNR%H2ryfh&coLasjm$`u1<y-lQU`73OH)(7(q
zW_RyBLget5^G`h0k%{DxWf~%d_bcuk_+%EyWOF=1=}6_Xl|htPvS2qoQW74z8e~QY
zTvLi$I;e`#wUR#+h8HT8U>Wu5f18s7co0^F6ENsEkaxftgIgW&9n~VRFt-lTT6oZ*
z_?&<f2jVBRE_x+CQV{OR4)PH8Uj)^hD(p0}F1o5Y;0r41|88~L{U$QiPImwj{_OLE
zigGNW09!Om9hB9+<_Eo-%EW_p<(!RE$20|OKObdilTW-A0Jp?OBdm;T$?0^L>6`2H
zUCw^BY80^1BxGB+R9LJb1_pc5=nmZ5<!lpGr64mwcjxb}WMnM(up<K}KN9K#G!REJ
z&)?1+$$77x?1>Clth8{&XXZ7LPs{vn;QvJ@2l3M$Mp+X4eej2LGjLfAq|TvUD~gJ{
zaZ+TF>`xASR=|PsX&KBgGynKK7s!e-v!~SABH!XaHYy5U-BL$2Y6)2C#ImTM_>)|L
z1-TBl`lo5c5sllI_;7T`gcSILB0b!ZykThwx9A_(hS#y|z^2RncDOxXOUUAX&mnOn
z&Pja%F31GIpVXo`5T{n4#-jYh{t%i#q(urw!mZ2gi8euJ5n6Qyi}V9N&7Kvwic41x
z0r)aCm;g*9|Lz?OmOpbX|HofaTEPt|_yNQM{(oT%V|5UjUN7_!FEmn6WgjsQaIdkj
z{uW@&07)yQD!zzF`GT<x`TGJ|G4LaiXqMw_I*aNU7=!_U$W3NuB#d$g=-If<&CN%a
z*U982+fDj&hI8{PX;;DU@qQ$ZJOPjxtCO7tz=<LOW%4dS!N>-%2nL(DnVwKp;I-$3
zl!dg_fOn0QToi{ue9R0}JOOkTz*ZMPD!n7|vx_ae9gRT7gUXGM9|d4GeEl{cioxWm
zV;3NB0wB*rJY!ml30LfY(?_8HbET(Gp2$Cb%#r1_2=ZOPq57Jq6kXxAb`R#^rV2j3
z%E<`^nJIy+x7Yi88XTr|al}=0K#(bW^A|P82VkToP<9;BVg7$|u5dtjR}lz^AYu3#
zdgYq_fPKxBb<I>`oTIQQn;(@TIkES=-N?rGpM>NB#TgL{e<D>v7R8a&Dh>`@Kx5+N
z-rn=BBk<!X(2aul1J1o*P5~^wqM(5MQDz>!He6Ve<qDX&756{P1_C6^*3fp43PIWR
z2?B`U-MDG4>2{b&@xUwGXRdr4_+9~WUzGmT84ZA~-p4v-z$F2cuwaH3+ur_LCr%iH
z7H}>KVSr{vWhZ?b(9)QHUbh2F1F^9wA7;SvG6n|8$CZ6{4@gzWb7f|{5Oy)YUUVt=
zoNU$GI`bNyyK#dpvxV;y``^^HgFn0_qRgMAt^LrveUGlhsimN$j(T(~Dgo!sxa%|u
z92x3dH`TEi(tF--6zD=`$9>#~V|Svgct5EEks~WzII!iRUTyAwI9mH>y+lDgrNbtu
z!)9${gL&wSel*!9HQ$oo+4mPuNylXV8!!|0YvV$_)C(Zb>C!Nin){wb^KGDFKD4jQ
zhmcG+Pko$m{y(XgQ?)jXv28B3QZ@h^O9chkfa@RqOA_$@6bIwM-6x*jH8G+2QEtgM
z(F77FjSSJ-d9Fs_B+9#YC{R@t``?d}jwxVbV`E{==J76&GlWIY0Qx<@qJq@Gz@T=k
zdWs(?q?ox4DtY~UV-E&Y^y&Vp@@_G#7@5aahWwYebSH7lcFG5R1+b-;R2E(cz<$4L
zZhnJ4;KZ6v!h;aN(J`Q0Y_Qz2`H{H*D5BII>2QaDa?&!|Aq)!>j6+ZXQVlKt8IVJQ
zU9z?{H;;91_y%7TTKf^3Y8k*j0#gNhH2_fj$`27Dr!at}s4MvZqDFx@=0Cr}A1b&F
zPqfHL!SMg_VG!I!MzUvhxHJEH^uDxI@!>-jkYOjhd2=Pbk8(?$Y2721NVs-$b>WfR
z^aZh~Y1;h98mt{Z24UJu@~e+u?1*rU+gyF2vHD}rlOY$>-#q}Dc!N!_NF;t6s2I}$
z$f&KJNc@1!SXn?2fpV&=DGf_Toi2gHWO_nzZD1ho;B)@zA8`H$4DJp9;{^D=!L4$J
zt<}!`&-<sl{@b1V{tR1VtNRj1=YRhAfMnBWuJbcUEU*uJ2FTS)MvG56hphNRFO&sm
zvl-SLI$JmFZEY2|4wp{tJ6Ai8_k(`TobCF}DW4slp6_JwHN-h2ECA&YANENqvhqz2
ziN64235$?a7;{H;#xTf8=Z!_Kt}6eT9Q3)+2`Do^{ZqMf?$zWWEz`7p;nlRAfAPmJ
z;Mc%*sjpO)3$YdUQ(@XtLoU#0%zu|vDnn$9hB#->3Md-J-XLO9TdXjEu4^s$Tu>@u
zXI1Tfhi`V3{Km2P9pDWV6|d*dOvL^Br32#c|L?*TXuZ5=9cNipk;hW*EGV?DN9PUC
z`ALWJ*rSJ}?p(9S1xgLu9I(R#jXVU7Bit?k#gBlvU1vL0q??bC-|FwsUZ@N^_CH=|
zWRUQh)2jlZ(|<o(@EC?H^VTh21t?S|Ha6Tl_WkXkpBl~rPZn^&6TlJ*KsQAWK>yU#
z)pr5t>n<43Or5^jDpS-@;ybXakP9+!7?U5t5P%0z=No19#Jnl$&!@pM7|hBC$ay3{
z71pl2Hl24C&2t-7Ojv5vE^wTD8h9fgD7h{B*G)}M=D7(?I=2ytsL;y*KK{3!9tBNJ
zB0%3=m}vsnFmoR@fY5V5mIIcT0&uEmFfu$g4q2IIj|W?PWxo7-D<ri7T+s^f)7}hI
zPlf43oo_}NC7l33d4#D`BZZQZ5+KC@qB9bLK7an4xWrpKkF`Apih8jiIFmo)6%5*K
ziX)yq8wAar7mKhWtOKGj|4LXvapgED*?rc0;XzFp{>ai$K3&Lxa(#Ur)_++9dOn?i
z3lEC!937MC>;>L5SZ=t9Lp!vb`8zxR<BAFx-Jj3v8VGdx02vYJLv;czOY6_~cOzuZ
zgR=4Jb$}g!l@nnR5!fq1RzRUbw~5nn-k-s|;W$6!SgHf0QMw+$m;o|99t7q_5a4*J
zAI$-kuf)zhF=p~6fY@GMUY>trpCt#@BVi*1s-pQPQ{G-_*v^kW4shFPDl~3ooM#vF
zJ>rICIsE`hI91&JNfn>6Ua0m00J*}vPS9w#ZQAjsJ?H#S_OmbPwtCbb{f`T@6IvyK
zhsTBc9q4(+Ih570PNw8@ENU+R^1G6zW~^=ViJm7Qc3((`+1uIez4A`=np39QzW6oo
zuU)CK`qSyo<91LgTnV)Era{LWe||!d($3U3->9wlycbHe+tCw!OtJ0Hln5VQi;Prx
zU<{~O|EJMnDR|iP+8RU&D%#r078dknjYs!k^zrkUnP;6qS}+!ve;_Wh2!O>f*N&wV
zmS1G&;tJ`oX#q2X+_GoOV`)&CLCMACHdyk5wFH#uy$gDpM3{tT+~;I~co5JQ4Y&l%
zqWXg~=DdE8aa|n(Vi7Uj-D-YE6ZXc<0aA3txZOQHgCir$Qg%T70v3&`d@{-jgZF{2
z0HP(V8Cr$M)HPGFVy-`}0N|*&+#Sq0n~o?7nBfd#mtbuspbF`i5)i1U_GP2V5-51U
zIPp`Xd-sCVpM81X)2z(2e(M<%5%2>AwCFRSS>XZBMI8ew(lqO<u10Xb?b{$qaM?KY
zIR@{zdVD^2@!RwF(9pf+Eigl$<qrZXyszh&xukCDm-VKUsmcctv}j-81~WZ(6-VBJ
z{XNa1@^V}VxHBXK2O`zg!bvG9zHw_a^&+rD36E)YsNXD1ZU@uI>ujqARt12yo?sCb
zAOI?UOnVbw!D#0AIhSFntcy#fr@1v#ls_TFu>g(`=^Gd<B&zeaM3ON>5Kyd&03O%S
z@b&UI5Ko~4h@WNGZJ<eqOG4icOCct687%^?I0lSC7a%j{v3@K&qHFFw7d^4%0n0R{
zN_akNAJb5riHObNSl|Jry1+0r15)+RoX#&Z&whardtmE%omI_LC6K%zr=mhauzpoi
za<Vnh)C1}mD&u3C<S)t^wmV?Ogz#|G`9Yx!(Z;DF`Hf!ao_zh;@A}<=z`xgk+OVvg
zT#%<P_}U;??gqYE?WGJ*F$9Q01?fOAzpMnWH-huBKw5wh0&|UaVf0#9Vac{{B?XtE
z6_7>)dI9!|@S{c}ZnE40g8-D*+W?)Hld<N%VF)Bj<tQaNxy4q%8lb#q9RdBd4My;~
zEVjp6u7c9}@U{gEY>lUYmV^W(3nY1%&Uqun-_`&2ZGi(2>oyee<<$;GVuYT2acOBg
zS9wD)(B!%X$i=g#nuaA5U0q!uQUndg`@8uv02l^X^%2%)1Wec4Kmcaj8#bn2zupDC
zK-bsT2P)KYp&Sq)*?pn`6S)Hb9bv3KEKI<gLI74bQ-vMVZgHuM+yLX0>U_&7Pyx0w
zy1IjnbL?bFylE`o6wPtai_O3s&iZP&x_<IqXbt-fR)sx*P?axek>8V(_q)7#>o)~D
z_JM`Q`1S(0;WW_8X_VmM>C4Q{-fmiYDKYKt?mlyndG*w7)qmSfd<MwR{I<MUH<~$^
zA=9-V0z=Bv6O{qGN^<l6ggpP3Xy$)AxqluNUM+#m2#DrEWb~L2&WxCt{+)CCKds7t
g<EH=DmwPX92aOjn@i>_P^9(_%N?M9#@>aqB3$lEPTmS$7

literal 0
HcmV?d00001

diff --git a/ex_figs/quickcheck_1.png b/ex_figs/quickcheck_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..7e089e8ce3da430be79dfc6f52d2300078359e8f
GIT binary patch
literal 37901
zcmd?Rby!u~*EYO1n`TRQgGdMxA}K8>D3X%WC@7NBy%muzQMx2Vr9lA!=|)mgy1To-
zxjg52p5Obt-(TOqpXbtZx!G&3Ip-MT9{0G%oCc|>+`EELiw{B26(vP^4G6+OLl9yL
z7aM#c)H^l@{)g!(r}PLH{PMzm5(xf|XRoN|2th=q@NdLt>Fnp=Lop`>T_;UD3ny38
zmro%ZQzv_CJ16VsX3Q>6UphXwvt_=?3;yF_e&*z4FV4sJKVRUrduhq{HnpDsf|wyC
zdD%y9No(WoUYc4b(p##1x5hai$hL<s73z{ak|CGZmKClk51V~GmFDArk|=xk9!;|>
z4f(yhZwv4_Z{qWcmWP(32{><KJr*9p;`%1@@mtny!*@xOOzRIffBw)*oM%6-GUhXW
zmzbCsy~lpo=Gf&PE{n?|BM*M5H&Cx&6O)jTsM`xbY^<!T!8cJ*P+(wSItnQR{+iC&
z07Zhoa<=pT|L@0ME=OCYx9ldy=6z2OH(e{ci_BXj#0SBjXM+b}bRzX8{8PVGrT0e#
zBn6(d5~FBtHu{{-wMFw&qmCA(FPC>5G(1IY$Eb&~#iw(}J7W2WlssA6B3bVAlvt`p
z90*aP+%Aq6*URpB8y%S)@63beTHD+4S9b5#doFf8;QRSzAV(*lf`*)3`J*l5&zCb0
zoFeIIwmO)Xa5(9;+X=pJdsw8O$H>rO_5CDo{bRA)wpl8p>-qlB)vH&L&!0c{v5m6(
zQ)&|u8j3P-pa1aD_Wq-`)>fGSLI$~URNt3aBM&^+NpBH$<v8M%PioXAkRrK!HMaf3
zMB%0|k%^_n!6RN?UKO47A5P1p;FsKZDEjwrPZjy;FvD7M2X@sIlwV!iacUI-E@buM
zg}{qJo^LOP2;s*ANv_)OEvd)0NV{z@qp*=u$8%(-rG}L*bzsw0atgH9JWtolGE`Hg
z4n|k<AKm!={d?T4=cEa@p1&@rxu8Y6xDJ4w?9EgnqnB_aZU`W{r=da0Te)^qV!M&-
zb8hY<dQIF{O(9gXe_j+Q$jM>oe|z?-a<iI_LnX<6c=Lmfk?dkuG6@q?#A-neBiO94
zJ9OVG&=E}1;Q>rO)L<S8W@fB^%IqSxXIsoKFV1<53o^fc#TT^fl8@yzG!ahht@ifJ
z%+AgzFV`)aoR2jQcbILSuDLwt%dgxJZ)|Cq^`|m|AaW`y^Y-^#ib_hd(d-ZTUDkAy
zL>=UI?KXq=P8PaTBYe+hnaa<0I-(*Y(JXh~PQ@Dg$lBQ4Nbx?<gLkSkL8yVjb?nvI
znOBp5wJ@EL`#i<oQZLfM!Qt6>g%G315=)YlxBjkJnf1tPI=d<oJUqN-lhu+bo*QBT
zxn<b}SGl<f$|ikoYiMW?!;8(YBxLeED;jOVV~I>i2;SRsIu!<PLwQ5L_SvnOv*X=^
zbplErblVe+Gw&+*i4ZEjndtzgyZ7(ozkBzN-{;INV7E7!fQr}T&$s7_4<6hBi|VOx
zeu1JBMm;XI-d+*iaC5X}0v5S?FI*n%AZ!t^OXgr93FDosLj^5c(+$1fo@=tPvztxT
zzB&S<lb=dlp6vo>6$P%aQMoB~>%}1I%a<=l=bOF}@7|G~`e6GZC@3g&i_HAbPgPFx
zzvSjNYN|^5^BPt#OGz<uad9n;mhnlQEM2XsRwXy0hBO-8H-0QeMn!!*+N{H2!kmhG
zb=qNk@me!m#dM_jscwZ6S2)A%x4^WOYqr%VD>=QTHji_SYbL5~&QJCg6%}hg-U+Zj
zSkr^hO=fkU9wzdeHR48$8W|Xbs%PAtKHaQofj_hz6OBC`8XPqH6(>L~<t3_JU=+rY
z;)Y@ObDwhK=b=%P&*f>&WytdHZ1cuI65XF3LX><)p?)|-X0>QUaL5S(UlrOi#T~nB
z!h;)r_KRlrOz%A=XF76Z?MR7bZ}HOyRlqln4o2;Yp7pZEDYNfwPEM}R`V+_Tn_^rX
zcl(;nGzRIGSX{CA759pQTiax~(DZYDzV=#;=b^2{`QGoN2Br(NUClYA>$o!&@So-g
zric&RIy3Q>DRqmfzTp?=r?aJ_wvMANmcRwx*o>B%_NFW7mRi$<(~G@^T^~4ax&1W8
zg!>{x)lNINZjFy5qx&35lX<pEvXw7i&DqX_Ghn9{;A9~PBz9U_noQB=568u=%E@y=
zB8e@i`e=q=%=>UW?zSuM#Zim2$N0h(I$;xFdrn^7=0v;dAinA&j3Fb>8}OEunG0XR
zM!vm5_er#@q_|iO?D)}<n}U6@&$-9m@^75ZM3KW~_iW{OjEvquuoAQBdjF&4>{Nb$
zuL0=E59#TwQ0>}CNs&b-ZPn>UW%*1nH~h1j@b%rjv96AehN(I~2LL=gdc~NVKMxt(
z+uOGq$y7z@cm0T@YFneO!p@niTZ93^L;yq$zg0hH05dVPQL)PFv^sbt)%$=3j?P?z
zAEc^M$C>9B06W>(*i3*A9BfKHZ*2>u5iEStMtZt9e||XGB6&FGS6?qz^Yb40Tg<44
zgUv}kY(n~_y^B{!9FK3^(+vTV6WvsBOo9NI;qyHX@g4=s$Akndeo+T_BLczju`OO(
zbvR8FItHdSlE4$Umog&YkPQST$7OjlTxydDF;`z+c%J$I=b{9^ODDU!x+tMW5YP%P
zE~Ow+$pG5V0K^PljQ&$;+JJppsuazR2hChwY+jO|0waZBpoc4*bBN{OHzH)JKvV>u
zzjSm2;Q9J!H}#U|4e`uOQwXb+l+l<&3q$aE=FrE14Uip%mOY;o>B>KR7`F81dyd;q
z8%Od6#~b2FpVK^u83wGT1FalwJjiUhFYCZ2`L-*ifPlcvQg24E_fW3hRVeV<gBZ@z
z?nDt)h`7YE+hqh>Y<YiwWBjphp7!a1KLHg!Gy@_)XHDAs#0equ{{4HfW>K%BLWmgz
z;iq9c0OT0x83T|9z(=sVHC0DQoqXn}4V&rzqIqDmGW5~e2%zR20}uu2rKQ>qRRF7l
zN^M538%4xC?TEpHSUEWE;j%Q2{cu!>V3cfJFPr3L^f@wRc|$BDB*e<arL-+B;XMHs
zs{`44q;w|V9(sCeP^8NPkA`BB8k;^!sdAl=h#33<f=-pdua23JQR4HkZ~tfY;Ki?>
zKNACtGCMok4nEJ-&RH6-EGwUWOSd*^(VdcaJQ62tlMomYK_#5nS$zY%OF7|I+sB`h
zo(JS)?8;jw&Kgh5u_Iakz=-28BQJ3@<>$8DM8&XCbAB*NQRcb7f(3~>ue|S=C1jGK
z2l-9L#YOn**RO@ntJ-5Oqc+syPMm{zk8#p??~qba1%QYfvAONKIS~fu3jnjgedxqp
zH`bOPG$M&4(#pLr&kvEGJ6fUIA!FYwesyRZCNE;jkfsacL}8nrN>@A2Pi%laXlZE?
zw$SmLpzp1Njc{Aeij9nk!7dp=MYmy=j9916ueV1?U*M+YKYqLc(vhr&#=ARrGj$4#
zmIm_@%|^j$AgJbi-Hu{(6F`2MjVr(~0e)J#YW9MPmFrOLaFIF1snV$9%a_ots>78b
z$A&8+Q(trPU%E-QwY3p0&VcA5O6+_K5W#b7Vzwn>76k7>&BwD%Nf0@6^YbHu+B>H_
zlmu^xA(L+12YR*6tAo>nkL_UUv9{=P_b&GJ>(|##J$9=|Ef?eWCfrv#MJ5~fhm0ct
z;sC5|Y;K+bT)^J5>kA$W{RCN1_TmAUHO$QdQn2^M%Dqor$SEjH`!bb+7r~0l*Goqu
z<Ku&ZgYlcpzg6b|6VSh9eD(uadJBv^HDa0C)Eq|Vu-tboVv;J@00nWGF@Q`7j9i;0
z>$kP(d->QgHRi_S>$WXTO$hWTpW?B?OF-s6=jAmOnl`{W=(NcjBq(Z8`?Pz}WWcBa
zyNlhkQTG^&T)(*2Ke;JZ^;X#C4}|{dxd^B7BEzDhqToxHwPDlE@b?F?j0MihI&i93
zOz|XyU#ny6?GFNyRZx)Zr{|%|sShP3B^YM9xPw>IT7(+)?DwazJ33mUr8dcoSFc@D
zD3TNs8n78nR%hz47OosK($acr7gFL<`eLYSdjHvX^>f#~H~q<%{hF#`XL;EV6S1L#
znzEA%08<bITr8Bkpo$c5fO}7$Ub}ho<{DxOq(&HwPa^|B?r=M=S{cfoSm<0eEHpuU
zQb`Vm(^E;eR0BxX@Xn6fR(7s3TmJs^kkCfjeTNO|?p_Dj+F6t3k+|nG4w4LE_c5SU
zNI6{3!_vl~&Fhfc`uCx&UxM8x`zr%*Ai#)jT8~*1Gnl^BpjS&CrIIQY2FAUTEbd%y
znd<#^cv!cFvF1(yA#hW=!2;um4!#<C@6!W?-Gxp8uOoZqMByv9T{jxRl-8oj#@tVq
zv$p|-ocS2Q_!@A9J4BLmy|cjU@CgVgr~(`pyAaRHO}<$T-~$c=7*dh(t+`B_zKg!u
z`L5*K0bua4r;nwMrjQbQJ$K+BA|EBd#~;J~e&ccJ8!#S#KXTbLVhn(BcWQjj$xrD}
z*x4<d*RN|}KGQGY=*c|1$;a3I(YgcX3tzRkn}m-=m4i+;m++R$0atBIcAfb0L-lZD
z90A40#|N()RyY-1dLC^}1Hk7ieEyq#*jgCWC72+jo(<(Qz%I4FI%M9HCVSx6lDMc<
zBeh>TdA1;uQuPROUK|6p37p%S4vLF!Mntdx^F=gx_T?MiKHYVDX$web-~6No>rZ1q
z(JOw(kB1#kr^md1|AxxQ4Z&Bnh0&ENZP$$*&LquvFPwgRbXU#I6d;?5k`mtC2u2uc
zzP}hsfByRu!aarL>fNapCf{$aQ3#nb+wq<+njB~AcD{FPZ1P&jWJ2#=o$y$_akd@d
z+X#{x1MjnZFF+-ON4a(KF`-jbEdY2~+*e=+((ERy2`>#DJB6T?`=NlB-=?=aJy^G`
zY<yLbrp#cHuUCR?*_GsQIt)@V@^fWRyZ6C}<>$|z3nw?*0arjkIof#|)|cJB7qbUN
z7sA%V{7`rS2Z)#eA|@9%$)``B9_@7SQH*Nm8{h#>^4RuA)9m8<#LwFmF6+U-ah_k~
zf%k>X%{_tahk1!^-3z6D1)|m*5SQ}pX1etV8o(T-F3%jFSXf-|vaw6inOT!?6nS8z
zvYK>yyqFpl6*avrzy@QUP0Y>8l$24B-<tt@uwUw7X>!>Y6Sbd`0YxZ1DB510_$YO+
zhn1I$meJJp_diljk)SyocV!A^lw460K&RsfqRAaLYA$`~#hq{;#PTZG+VX-FzkcYv
z)Kj-ASK+wut1d~*ajStyde{$@wqaHd3Py5rauYDw;PuVwQ(jO;edI*U1_9)t08;{~
zi?+R^V;dyB%CSjn;n@KlV+D72v6KDPfc4Qb^Nq0|)RG?Gyw|2{Um>@L%_FAaYC>C^
z()PzZCEK-YEPQ-KkIQVEgoX_K0Q*f>2%&Ua`I61=x?v!((|WZW099mSVhEtB6B)cy
zz^JC63<lHz$76rR9~5b(7iY%_n`LcJ>Vc`&$LC=teK(BOA7<5ASy=!mnSCyfEq|p*
z(hV89GO(*8Egc-&Rjv!8=tYTrPkyJW>FWned7m(Yga)%gh_iVIOCN;G-?@TZ5YYRq
z5xmb~XHH4taLjROxG2NyfL$$CEJ57)`pcIud-5KO<Qi5|0*);VBbG|CxcmMXTmS%%
z4g<x4>CRjmT#*F_2ipU+U~S^(WA*CRc+-ZraG?t-37scJuuA|oHgY*0x7gX<u90L}
zV$mrFyk_07IfYupmj2@6u*UpgZRB92446n36rkkhN3<Qk9X(I>mfaSUUHLusp8bj!
z!~yAC_s7dD$qx2pzlRC8>Hso@gONXe{)`LP@%m-9VQ}#_S?vu-{c9a*jB7&%j#q?j
z#{vtgPw)Z#S9tmIW`*k}To{;-l{+jA<WRwtGpMg;VK)0vWc*EIV`FoTEP0BY7}CE!
z(Z~|vx-q5)^_;0DU~zUrXLHf2<Pif2<JUXuHM=RR+_uvJ5rZjIe}DgQnJ<V^40QQ!
z*KLMB3s#hcV<1mqLEB|D=MQ_0_Iai<GugIxyQM=wEgbmf4QAB4cdTGruLB=rNI3;y
zMsj|7NTsBt#LB`V3o8~d?vm>1=^c1m3n#{j*rmW0;!o^M2Sj5gr=a@fAKT+or|lU9
zfv4@{y`WN!ii_j4tv<FWdfGu5(1OoGZ$Ux8(wOxiwo$NK;wnf|JZ9$R3LxV(0q)oY
z1O$pxXRy9dhTDVF`*<w!>grK$U=46O0182R`TpSJCXfqmSw5(oS=*dhtBSHHwXm@G
zQ>K@v4bbWA4k#KP0xA)#ou?lJn3USw!oqdN+iqJX4*LT-AwVSxuJ%3+1|bXApxd!E
zXHBMeZ%8DyRoG2RZI4b~Xn(UE*M|oHnLQA=O_0c>*YDZGsfyKnGN=)d*tOBFBr!mv
zVdh|w3g}Fq=RUAiNDvuE@Iq%|2-rrrmT*`b7Kj)H#C%|2VCGA93Mq(Bs_pIVnUmE5
zsvMmHN>Iv^0y3bzwz}+@-IVBf3PRQl)U7b%&&<pW<kI}~x8Q)xfcpe{i`e{OYYu1N
z1{qb}+Bz5b_)qHFF1a&j@2Y)y4a(Epa`&VxEG<XN*2gP#K)9)aivPNldAHPQE~wTE
zp7o~t2Zn@@=@>Zp!!?6~tn91NA(f~OX8H4eK2|;kQF{pR$MTN((Fw2Jhh?Wck4sU2
zg=T;{aA~^iAfU1>nqwBIYXENCKPRTY0LTyXS2S?q5sn+gSij99B;SHM0N3B&U&?u%
z6bqvg9mHwowby%3)Nuio|6_rcI_&P+a1j!qam$UevHGcq*JfZV;XIgMK7;>0Hg*=o
zPjm6pj;*1Ziw|{7%lJr{{u|Mb16n%MfYR1UUmka>iEvnhh=4;%#nMebPa9M%Wv5`|
zi14SkXvf1>1!%)$8CDfe-X$f4UY;#V!*wtqo;|?YFgyc>@O`tvQStH(F|W=nVx`*m
zn}uJ1zO^+EED!z8R&6jVN_#PsuUlyHN;Ri~7>l$Ljrf%yM9>n>a0k@j)X>T>*a?cP
z>)!y%jphg$b5(57;}Zcju?4{R+RIc>ZMzAX#|6GmmqNM$CpbMx!6Br_b#`p^cUk`<
z*1z2MC!m!k<<7JzC{S!KRaPoq%g-ejc0sliRW$J4<QrB-#>NJ8OJC9h(x|pp0+Pb7
zRB0x#@#UaOEV7@LL($%9^|`z}>%KGrh&){FeVdS8v=+q-gky!OLdGQxb@f*hKRsuG
zXxLNbJMdL2PRd){qBFrCR5>PqZ2V0UKf7QhV7$WFQCIUltSP_qcg9xhGT|3}0s(y$
zR_ct@)pkJ*Q2z#xjNfiT;#ZOw8I#u*!pzJ}&B!SH&maA>WmVt$$%_M#&P5=E+;w*T
zZuZ!~AZcMoA6BzHD2mLX7gkn<FNSy}PS;9;*MUscc(6W-fIvJqRJrd${a3`L?5EJb
zr&RwK|14dNR+S0@)N1vmFfCY%7^z%ZZZ2y5&-ai&dY2@05G6Tzt<`sNp3lh%2`jrZ
zTQKDU_5WH~caoUjw{N%3R*f&M9z6zvIp+~bv_Fj)Z-HomIMpo|-Vi5@Q~=9WO2$-F
z>a0k1cXlcOW=l7u24r>o;J!W$r?p`ckYj-;+X|i;2J8+vn&^|$o!woA;@8#hFxo&l
zaub%_@dbM&ytvJ7S9e+nf{fq?DjlxUg2!d~y$|GW=zqfnwg1vkfpJFo9k2=H_@>DS
z9zMPnAn&Y~m6Vhexo*M%o(9yhI018PkR$^EB+YzEw5{*UdJq*I?FWchm3nt#N=i5o
z4ByAZr0bPfgn84zTPjO2h%f;O{VgGbRn3iuJ+Md{!)F}sx%ra{L|&YrCE?D_4qP<B
zX%gTIp;k29Kak@!TY=ubevjbc;+m!gt%P*`x>Z-q2n5_@V6{O(-nB~1@NBz=vBq$f
zS;$C8{F#hu;{X0USp5HLLiFGyVakE;Plra!(7y6h1n;OPDwzBQk&>w68s>hktFj}U
zb@S2toG3@GMGb%2`s-}dr|SDM=G}`Ij{ouT+HFG=ROrA@s;&mNJklelIi1pbOrRxX
z7X~})4iVxmTK;1#)Pu9Uye#UpM0`_F@Xk0nYkFZbidp^Hf3`t5pTt3I_*bye@J6Y>
zMSyVdN3}qt{@DNBJL$pULBd9v4n?F9>0-YPY&9OnQ6|TS*T?xp6g!gh)<2WwzqtCp
zMhZ1y;#XJvU+=s8`z<LG2DmwcZwYUsIQ)9@u78HHC&c|{j$m0lCh)Qp<>gr#wj%y{
z4+vKpc)H}`$_P?_%>SMyXz|Fs#gqB>`|{*Z!D!0{4o|P#RHZC3z*b;O8Iu242lm7O
zKm7Ic#@C(Yi=8Mh*DV9EuXC=b%Ro6lu)cC~dMr&@cOhu1(r@w37%Au`k%NWSzhUXW
z!i*0cKxF^1Gm)GI{8)-4iuUBpe^-f5h7C4eevaes6R)<J6qv&i|Lm^m>un|yl4AmR
zy7)BgP!M<<vwEBf6~6)-c!s6!e_B-dWJy@cQGriQ;2FX2r2gI%G&R`4PP3A9)DpuJ
z0G<dxaOMvN88*RGj0*T?<x75mp0eDyaRrp%1kReki36*e|F<6mv#w!$Qe+dwjpSql
zo8*tz>`NX*eL?bntW3F(AdWSv3MX#7PQDH_&^VRtuMr13g<b}}4){BBFq`Bs5@z+D
zzveY86adzz1%?r4XCiCDWL6NjfP&z!MuqFelYZo^KZ@XVgG~}AaUp0=%&Z{#-#_1e
zt=Ri!@wWOF7^zRdh?py3%bEk9@t?7ft&AqYw^r)A>nYRU5-YI4pMIi7wz}7noA%?`
zT2%e}l94z>Ca#4RmKWIje_s2AFH$~Ao?)wKh2-C%kFnkrF)Pp}TK!7(P-Z5<A_Uf8
zh&E;(k-QuAmIk;E*yl|1b`*0#IJq9|*iPx<5DzyuT-MryDt#v5MSk6=T@AwuI|)~7
zM=k$txv0B_2e1*g;^fBBYWJ0u8v)=&#>EA;x8L7e8zFO@aOcnmTJ_2&5`<d#>1<OJ
zYnV8)RWpgxEnOT*O-AO|*eDOlXlYU8YI9ePDC*2nDW=4eKkt{eI@*6gE@8^>H^%|(
z=w7AEx(PpZp~t>efHy4C|Jz#dKG~}Syo&sgii#=&Xl6@2ACVUziDv*sv(8E?>M=c1
zG3u8CDQ>qzx(VJYRG6NQ0hE+xAgz6Lzx=9|!w)n|I9q!jtknUL2j=N>b92Lo&nK$H
zK;8*hTeF6h<i+k(BJJIvv3FFjwPSxV<GZdnCcq1S;|z(y)IRw(6<BQc!Z~Qp)V}A^
z+-kiV0gGYv_5K9CMdtT_n%!tR2`aw5l>vf85j#Ssk{}6-S!$VJ6M%sJD!Mr6e89g=
zhVl)m#a?EbodT__2<TT(?eXrSy1qU>#ChXJ6JU$IKRsQuTXI3_eE$5)jC8up43A0h
z6K%RxFv|VLE(~5yPV|74f|b=*Gfyk4>p;W6fkeGeZhZoZJPYO<Vz-$fj-DJHzU-DH
z5j#dBw`qTLX`*7dImGqui|TXOD>=-NGC_4FYGagg!)5zG()T>v*gw+!@67q&!2@_j
zm$%+4`klRQ4Y{g78z9$1zj&Yg7D%|IKuH1e@qu+Jo3!*K3-{6&z?>VKnvOHv<r$+~
zqXf<}6dMEpK$C$?|G%;!Arc#ftDX~Bm63!=TH!*B^}5p(#jN@gwj9a?RINamu&qf>
zP7VVmWW9C`3)X}{HiuQ$z4cKlk#Q%o7+%BRq$GNYi=){vj|qSWaKEHbKQGK1PoB33
zM0Nbhy)X|AToNpJ&p}}VM5oW^rMIN)fJ~@c?Ii{`Imjs~VWQdE(Q&6cMY70cUEc{5
z%Kuk_6{n2i-a+AUyi&__SaV;cO8Kv|02qYocLLMZzzgoYMLBq?CxI#)L<VIhCCPED
zSEK|^t7fJH53&JZkS?nW$VzO)!2;+iQH>`KoX7@%rvO&=>Oip#6x+8x=O_LZT;$Wb
zxCyNB6{ye?;lHuV1iE&!Js<D1{`!S-*5s@&BEF??(^)$)TlC*}SAeMN!5(73p!rX%
ztDa4P2)y#&A87u<Soj{r8Y(_DYe%0xWePH=3dTnh5)d;a%HAr1NJVmqfATyzkG2a!
z6H~LrQ=;WxfcfWrBUAvAV1!g`!5qT)gxj1o6)5))?oqrz1QfE!Bhq-0u)+WN(%jIU
zy8CEy)4%va4V|D%&|JFKAStoNiO;qGpam%T?}vY9VTk>fEcq|!Muq=<BS`W14F3g#
z!1t)(9P`fy^hnqlDFb*-QfB03|IW#p2lF9`vLxOckWc;#jsa4B3O^M6_3EE@JvHIf
zrQTC#GiP}QL*NoJ=G8}&C}ScTlt$*XhQ0j9zXQ$DkXYQ~>?=f})R+DO^>G}h5gpfm
ze2`<kP$2$Tj(nrv9EzV~{|CXrc5uG?(*2y!k@?>b#L?t!{Rk#t11zGy0E~nF6|R=4
z-r}i!(aQ2~4yj=-2A&#bPD7nCD_>U+QW#iA9V?szssFVeQz7v!tdN&n<;22oIHCS+
zMvxYP1p$KZq(y*e?)%*R9NwD0iR4of)-NcM^+A{d&9(z2|Gy6Ml!~7ZK)ux?E@_J&
z|5}x+y_O%!#Leh}Zi0;V_u*!_9`d%Y3>Y$Zff4IrOIzo{4$=-a$FJ}z{I^5sjbJI0
zDAOXTndp#cfQ|fsob?GDW2xhIy~}LXFfYP@X3ST46#$=t-73gQ+3>{QZ;tDoqG7ED
zcBHgDY@vTRNQn59c|0nZO@qo_&KqjoP5j4H59-tl?eiW5KRHlhJ`g~Ew7#P5^moZC
zyaL#0a-~d&pCzUQ9rQX3PP}6IAKD4n6vC|tED>b>pfY4-0R9_#$@y-$qc#q9z~R3c
zxq-*Th?(p4a3#94k7r7)^etz_UkmTE5mWkoQq*|ChuI1HZ!^4f`bxC>tIXDq`N(Ic
zZxcD6{T;!a%07@*jvUf};+_a2rl@7oP0V0Rtb{UEqPgA;6UZh4T8uCJ*KNB3?MZPJ
z@Dm@V4eGl}aok5RQz6;^vHK^LBL@_-La_db+gnW7c;fIA3;YCeREojGlrI{7(06|a
z%L|8>_eIdBN=hTM2OomR{%a}to8VXn%ba1Y!@e^l29^kilL<gT_$UC#<8tHV!S*X}
za4TCmH56%NqI=Q0=Hpp`iz2W~?8$m~h!Zl1zxnm+mzsz_YZBNQ3y4cjF$W!IhT?Cc
zh5#$#jDwe~8}{`<sDB!lhGLyjZZB?hkrecVivKSg>JxaLg^34rOfH^clu2k5U=}9k
ze|H0l<OWC?87z}%7RnD6s^0V0Lt1#@{lBTZ(jwSMOV<ny>bv`pVZmIHA^IkIU-Aj;
zZ5gJdiVf5<+&Di-v9A34BRm7HSFFl+sQ6*VjsZSQCY2i&MB^<~K#|chV1driss+Lk
zyr>X<5!bQXXHhbwOfXO1edb_sG{6_~Td*JAmqN(5LpSfpMb)RrVevTs^9bw%T<&;r
zM<?Gk;t`pOECYy8A>mNOH@q$<4tz@3GS{%s>=|`ZcZ2argUP>xQ3dYCnbGnpB;m@D
z)BhbAA2K%&J-Epa!Z%8;nl!K<3uPaNR4;@F|3aHX0htKX&{ed$Ib`CRree|mw)a8O
z<0z8zzz=_I)^L4wD>0OxnzmVyL;>TJ$0Xw>vNgU|z7Eh|#q3PI+6C&XW-CY;7esUl
zgQ(_f5(44rQ@_QrjmO+{Gikq|9eNIY?7tHYLxh@B@%sl|dizp^FUHOm9lHJ)*TLUY
z7Qm)PVUN7Dhokrva4QuhM4A_hx#Vi5rDZUm`#0mKVOSr8oXrigXf_0Lu@uQ4eNq!a
z2Yzegcop~1nM%psMkd|<kxh|VZHIJvc`+qK{I~``u<e<Q5FB8-aR_)5Y2w~OJ+$rY
zBtrTsrdg6lNE2ghm9JQ>JR<7$^3G5k5RieK$P9@4Pmle2j#MvFKru(OhH%i^MRywq
zjOph)8c3Ggnu2K~4m496H2zu~OXra*B2k&0$Do`TKAZrGZU=B~${aK)6~Sp3=ih<$
zUwlFz$yuY!R*|*(E)h|LKTCw@9IK_C)ZE}(P1n|bsy<d->`Vq-0g}V<)c{UTz;~@j
zOK*VlRs;}ydIu<HYL6Z@LXl3>8a9Ry05<bYjQj;^nLZZxEu7j5P;Q7r^n)4SX9U(j
zS_&`Rz?i6V7lhAn0O1D6$8b}N@RY}>iW;<aWWBs3;MyNFj-D}cgJVC!iB}kIyP#ku
z$clx=EB@=X^yOc5^t82d%EznSG0<<@m?>dS#+r+)1#Qc{jd4I+x8E+DgPun{IBo$S
z*aC+saQv{Z)B_b9NC6qc1%-vgM3Q^NfC<rtT9CPC-e18G7KAdlX08WDTSA-bqg#HX
zjSl&+GGJJnaWA47JefREV^W8q9DfrlLShFOg8K<a;G_@SOZt^4LIkN98iw88C`ZC9
z5RE_@NEtgj3w_PWSuX>0@O<lfAU#El&g^%l2&7s#;{E*w;^p`pqp_Wp3TUf%PPEGH
zp6aPfxZ^ead>~s@XlQEs!M%aG)~Npwc;s+lxvk;tm4?P3vT;U7u$9jSbK}pYU8pqE
z&UR0Y*Dk|D!ekA+u?q+5L$3PcOlQ)Sf2$@qI+$2r)02kyO8`23W)+5b{x=2Ad7S+c
z2*$eKttjAAJTD4<Qo<esj(TkF0sW&(UP&qFyk=}{3|);KY=kv`C4S+}Fm)U#j7*zH
z-dy`aASz}wf*6QI%!#RxqfE!K*6{pf^}=OvO($J;J4G56nu_m$17AQb-F<Ny_>C>W
zeC<*tQ0Py$UN|DtUR~>jf|qq@!vVolXK7n~A$6bF`&b`e3}Y#ilanMOtOveQn3<Wa
zjYui^_*CD~j#RWGN2J9AT5_gXGCa`r;SmD*BOIb*T^9QaG~+lq!3u9Bb}s$SCIZdC
z?BQzLaq+Tf$uYfe&q(_E`g#hCYwj`xw8p;~emhsyYhrRp?VtSh&s#_5(QS;m4+a>}
z+yD;x%fk=g9gCvsYW~ht_Wxg)HPD&h!*$=jk%N<CFV;GkUlLP|ZjI*tF}w5ojV=Ug
z>@9VzRK(a`2bQwOB8Nc?eSl3jswBeBIR*iz3VTpF#eMkT56*G@vFnZ#KtlY_``CDl
ztLYCXJo!L#R=~209vl+*XxW{jTj_EG2><k7*?~7}Q!Z%@=&#RaiQ>K4Yc+xTqkXb?
zry>Mb**gq5bDEF)D%MiPngidnm6%=Xo+`V-42pOVF$FrKNinpMaC;c2cmx_DfoFH5
z1`A$T-L&5K9{@ll87x3;qV7%gfTq@%@BL_5at;wRdMm^=E{*DO4P{|dNRYeyTxB2w
zl3Ed@j#unh9)|kYrepH+0Ma>ie#T#b(kgsVQzJ7E4s804YJ|UT7^D5uU^0?M)c5<h
z1Ly{V4&qxt<v)JC;XD$3YPW>xS67=gr8pEytMg8$`ZhibW^VCQ*^8}SMksvt5@H@~
z)VL>K2LMPhAhXD)?+J0tD;CM$?GR%Zw?cD)e?mw7kFNhgc{b-%r7VGRQrzWuZ3q`S
zemYQS75dwDQlic@@h4$gj{lpWRy#O*N0y+ND_ayJLjP<tnT2AXVzU2Oz5U&On3Ar6
z*W@4Waz{g_K#cv<#pN2~)$^O;L6D8yGmE`^tI>W_Iv~@(4grf2C5Xf5WP}pH-km!R
zVi`PApDkS9uG3+?d)35R7eCw(WGYyO1A@-0z5Q;>BEB+eh&US`x*_~=B<n8u{Void
z=L1AP9#@q1jp?jlSOD)9h36MYjG>x4W?qxm1z~_(dO|ikcV9zoAr2(q=bQ8$K`QdH
zO`ueOQZ9FxFDg80h9WA}KK+UVgof)j#ukRs$?V$us<bG?t=mDAFbOILgC6U-f5+-5
zOKrrt{W8*6dw#%Nl~Vh<El0wt_KWw%x5Nk|_nKFp91G|1nv_JN(&Ehl>ITD$#Cu~<
z)Xr-%!TZ9axKPfa@+p}h)Srh<+7QH?soG_Ac<xovX!<y}1Z8r5pP9!+1<w4iG&&bt
zwUvi<9SN;6d)i#6kUnx@*M-Uc^PT1TjCq)wk5|HP@7bg$P?ojp)3BzRu&b;OLDE0_
z#Y)%kWdCKet-614hq4iGgAr$}d8VS}+@gD)R+^Gm?)iapa)P;A#NfebR|PtH4Ur}!
zlgGv?2GYC_J5f&+L-(W4>LL&8QOwI9nm(QN{#Gd+wA&=?jH0$Io^%<+N}|@$*{gaD
zSXmm77UwK?(z<8nnPu|oe~@}Yt>CG^PWChvC)a0D&n{i9e!D4@6@aBhpD-7tC%{zh
z--ikYJ5Xs7P?EjP?Iv#^?0RGhAu_{wkkoYMYM9<T@9w?~S~x+O2nhn>Z1S>(fW$qc
znY9Bt8OAz0@g6dD>J|<@{q-sCWL6MjJMW=gyiRnaFA~<W91=^r;n;`86Nbnw9X2WY
zWAaklFd^{lbt*p-sJ(uqw?ZDhSOx}qe)T66&(A9}MGc@%gX<wqtt0#aLz|U^MC1K5
z<{qW$P6)ar9y*RN1SYy&-vvpAMT?aAI|A$G!9F{(e51#G7_%qi;Z5*X*Yeb88O4a5
z4*C#DWbM(!I{%`tM?JZ@llX=zZx^}wCS(4bs7yK<u-G|E$|fF@ZoludRP6{z#BfAo
z*GC0|Me<Lg=9&wW<#@G~#*NyS;Yc=oQNZ7K1l>Wd3K2&)QGAEcF-jJlr&%GKI`^3+
z#oS8CA^t6q8|p78+&mt+26#)fK^QXc#IUc$TIrz@IB{W@jrST$6``pO`QS_@lfYqx
zgDwK0B>u?2cVeP%|K{1|6t~dfj|xM<i0}_f3S4n~2Pt(?j!%zvKX0hCP>@DB!VaY@
zwXqrB?Q?e_mxR$UEtq@tCi+GaCR&5iAFNq5v%XWwp^J}0#MQ=MXLC8s_DdUY#5IP7
zXw($bWeF9;Kh+*X-M@cRHSvM`{@*D{$Z&HO)D=)&-%1>OI3}gi4wWi~4pjZ!!AxsI
zGAaZ!G&Yd!H)?)uU<hN+0%aom<A9Bv^<`MJOyHSv^2c2#$*GWi8W~Gaq@W<BfJ~Ez
z*XPElgQRkC*5=J_%Rh<$G&J?ggBrJ+5?=<bN+PnXqUP9;)Zj2srgUFB!^TZ?iem7t
zj}UR<R{pR!$pBolQ@&=`>GyXSf4U|$>P%~B>1}ST!QIrxGo_1-&b*^hxL}1!%cgCH
zJfaX!fECHG(NZQX&t7KT{eW<yDm-6~_CD|}Qw&u|QmW(*Gl!HSuCMqB)nK9-^Kp2d
z$fi<#hzV2#?Sa4ZdNkz0SeRv%3$cRQ)8p!!$fwY`2zFt8Cy$AH-b29FyntfR+6`m?
z!>7<dJYn9oDK@ZBJE?K!KT=M0(uTFpvYC}xQ*#YG06BMHIp#QCd@4u#hPAP}O&zks
zB|ZJmYxCpz;L|_1!W=yefP?*v-#<p2)<plhphGV4BA-Pmc!LmTP31=dmjpY)v?1aD
z^G6e59TW7;{w4wwEUqt=(S{EHb_+FK4eh;m0Z&clQUz2ycu1m1L8T<E*^Qwcfsk<A
ze)qeDy;;0FJ?;lN{c`A|#GSCyg58eoOq#81L$ABgyy)Y7B3{-V_%exfj&n4kYRYW>
zHD<|fMCN-$t_7qhuCA>$q&3PTTj`Qj=HdE`5NhT9ETgUv_WK-^q4cD(b968?ecDLZ
zia=DUEnJ73QV{P~pbcrwdIweVn_Azs>AU%mJ%#vz(!nxFnUY3bgWl_Sko}f*5Ew#F
z`huBt2d?;}HC=nZ@~QdHcL~zr6?Hv$u1Hfm)2{E8^kFoRlGI)0+<eycfvX8<Z43j>
z;R(gjZ`K^Y$xG;e_~!c0IWJblJnJ~D0D0o$f9%MoxLYmuI>M*DXwv0F(H0woi7qR9
zB9iVepE45rIJDYvh}*}0t@8#p<n``D$j)^x<Wedx0KfM!evSfUx>DPqI}ru4=?VHl
zT@?2KWeZKH;AJI~?}ZanDI>Ri9NW(2+w<m&P5F{th@bb<^z5H8RrCBn1NWJXaZb?f
zN|inznXcB)d8myn+*U}tb}&C>kc-XAWTCFF6;k6>%|sWmaGG~Tz8C$ECh~*z%KeB4
zEl~4!iHG4_{`~#X+TZK!QB?p<ZEE*>{#{wbq7?<5`xi>ze#lxh<~_p62>-4W_SSF1
z?4p;JgzPmPo93)`pzhoeTuzUtFG$YP#6j<zlNvmMt`y&VAw)@Mbbj&ds=m@(N)D~3
z1~kmpg_*%*1>GlI{|+TvLN{{H@AW_Xizw}oI30CCwOiYv;{cX{epVH;PvPwQf`@E7
zfm%Wcc`266vF>XoY#OdF{X^!b`d4kpq7MIJ$E~&u-&pd1Q+@V$hD!8PL|Tp#vxq^y
zgi&q~lQBL|&eu=#*YuP!iofbZGMdsr7=uW$a0-|~V#7i=cxKM52b^5rco9Rr%J0HA
z0yIS>2X+ae;>7dPw$=zftQc`5XLmb=@mB)qT#}_NS^#Z881;GkOyp&3Aav_}7Jp(W
z;drf<kaobDLaw!ook8wu8m|&1PlEZG$VRYYi1=eemc)4OFobo=(jqDirv&}uf=_)G
zNC!J52`4xCEuwo_GIX@(-PBj(-UUuDB-D|cKYD`v-fD@FXa@Q(wHi43^R%+@lP%Do
zE20u8Xb_tf&)>Z4jA)4u4b%+#&HcL^-5ei!<n>2}8ilT#63?-W!SXKp1!<talnhYl
zBhpd@(S2;hc}x$kteXLvY%bt01ClhaiH`XRn?*Fq-HbY$EgZ<9c(&;#a!E%!v6hV6
z)x53Q_|EwIw37oFM)nKO><*!?*FMnd{aC=Ws3~16o~y{3!f#J53jl;Htc4plDplwS
zWG;aZJv8c9)3kt&xA(I<YYb|Kq!*`|CK>UZHD9l66S<go6k+^&>^Zz%t#4R6K6-2W
zg{MYhGx9S5l>EWd6wGZF$my-!Rt8Rp7u#&R98pXWhP4-26l3zyuY+mJPCr+oYZ843
zD$0}&Ufk9o?c}GqQaFr>-~OWKlU`MW6i{{g-IQ4X_C3P=vj#x?N=Xn+txQo+YsYnl
z+s1Fl((gwcP%u{cxJRBf_DmiD*#OGuMGx+e*DgNDxpipDAd}uaOYx>Pf=4^i+e;&<
zZRrrm<4~I>6}OfBYK^dY4>#SJi}G{Sl(&dey=F$m>3X?U?0_rs_7y1k31ovp1xYl6
z-C!DM<yBsQa{Eo+J5!Yq4A-#UK)QW8dUrmV^5pOh6nx!k%RxJT!P+?OBY|jJ(>wnQ
zW`@g}%5IbSF8W79vD()QN~dOX4dy;P!+HB%Hie;TIhJ5J*-_wgx6&LRs1)Yz?Py8;
zLbUY=8MNx*^whBOT=;oC`|=Rl@ZrP7l{hh?bV;rV{I$mVt0udwK?-P|?-D&egk8Im
zd~`4+=H<CX|8B-?OM0lZMKJXuROoa!Nvg|?0@wcc8mJ9T`)*4kAeCwfVNuolbN8xV
zAN_~ghjAhS;MJ>emb|Oor!Bg=twXc1zk9=Q`;=~Lto&Bvi`<>*PLxU7hTO)gdE0jr
z9JB(3kehD{*ZY3HFAH|qw~}BKsvI}QH5vL+VLixEsWLl?41WdC#hP@A^FgvCAK(HJ
z_qr-k7wD<*D-w^>CpYKocUI6TkI88OHZn^}GJsQ$a^Sp`OzL9lBtw(24pZ&$P$}k~
zd#i8ecg=Z)WE|H@l&0-QdOXRV3x$72LgUCK!*j!!kCbbNj_tw95@jb)%E=RBnQ%Vi
z>mKdFJf0$7vl%Z<${I>{PQIg&$%tEt?+t$)Kh-)q-{C2XAXr#4aWAXjFkGwKTk_O%
zv)xCWEzi~_{hiRt@PauN1W|DxbMqP~4!xod>*Lio$M|WeFukL#v^4qy3qpV3KjR*b
z-I5e*g3!(ZT{v?`cL&6l^4*FnsTTqo7S;8xIwNpgP-!PEeF{YP`=M}}TElE!GgWl+
zQk%4|c*td8<;9r3uPFZ_r2;lg0<SK!=2RaII*bk0Ja|x$I$L%w&br#Kp!<V9#(4Qg
z$Zy>VLSkZDxhXDg+MD)43s3~&(UsWGpAUFE^|(XY+jDM|zi0z}Kt9KNr`{Sd66V$v
z7<UUM9nyQupwH&mJ|EvOET^0Fmk&M~5;kb8_A+B0JWC3=6sgJ?>SlRL^@IAguY&H`
zVr^=}EQUaXp@WwecHy`75+h>wn3D2hmKVA&z8_0N?<J`M%+*AJY&{3=>8Wrdb+6I+
z4dmQ$+*|1x%U<LkP+fe|DR}v=CQ%NzK>Jzm)tu{`4pf<LWDSkjZW!+>&>mj`qG{$H
zG`1P5GD~soL@-5flZT69tF_dhosfOV49Oj&u3ZSSsF4UDH_&$**yeUelj9YAKiRWC
z^s7sZrC%bIU$HjS`(W9z*CtIC%=-sAlg$Gh2j33PIf_yQx_K{F*?rszzf}DSCsR-~
z{&9PzIp06{%qH{=LB)wsdB`iDcHW(%;ZG11iz_G-hOw-RtZ=qpwZ`PMKq~{`Wd0v*
zF_y_;Ch_id5p@`EaZUI`zh3C9)GpwzWWd?*3!kn#%h$S#ug)a|7KVb3=u*uPc^vCv
z@$7AYaH+E^-SJ+Og%Yb&fhXvlG>s)un#;<p+j8TWeO%aRamo$z$oi4yjN>D&N?hYL
zNxug|8<xzrqVFQi7dzInSiUtwpT55cmh28|>~KA>dNkTU-r><D^@ePox;9cMqj!)=
zJOaoLx1XGS!;mp9X?fKJeMSDF4NJJkph;Rd_iB}5bj}${cE&-<sF+@>mW&+wy!PYl
zeBLs$o`mDo5_LmOk7$xkaNDLZ`EhlPFGrPAi;&9Uq;CZ#5nsXRWui1821K2VdUsg5
zkxJ=2m=S}8IA<lbaNZDA54<;#!#%V!(vJu}{q{EWB!Nco`9sIUtE$b^M!&1$@5ovm
zvk|+y7J8tjZtRLc(1ceZk?rb8bGER)A!eNXG@3?+!5oACR(J}x=Geu{O}*ID%a_%r
zmv6<<7k07o3{d+J6Z>Qv-}JON-jlN2r6TdlEQ-t{aWPimS{fNujIY2i0_G{^^`wu9
zu38odu4XRIG7ikuv0Kh*0+0M`j)LyoF(e)uPi2uRHPyn@;eK^QAc%fNk{>N2HRg^F
zw1P1GoGaQ?dVG09bE`kdmh@^<Fh3RD^_wZd;S@}uB)VA<b{u!TGR|9?Q2jG#VkkC!
z%=y)+ASY#B)T}RJ#2+~;!hn=9@;;MgWLt7qtND}LO$0%A4L*Fu_jrH$yh(<VsXJL_
z2(iZrg$jscqIayzIcRE|BYii3_ES$Hh!$`DAR~9@Tx;VYc43uI+x0xX+KYOW$&|k)
zwwi{zx%!i=e#2UQ*~o5l$T<>m>jxfW7>0O%Yat*(p~bKk5#G%u2dJ5by78HzAzS-b
ztL>>8*FScuKsbY5onvTs^D`cvXpikKxHO>@Z;XahO~vR4n~ZM1t`*`99jUKEg|<CY
zi;>+nL8TQmH6kSmhND9h#G28VT9B?yZweNDASyXWuX&=KE4MN7m~QJLToYQU*bn$M
ziPrR|kYebLW=e~-%B4R2Ls?b65UJWNI~{>Y3wJ=7Jo_xjkzf5l`h@GAV`T|gl}Vqd
z%R*a-sQ31}juy#6YmS$}*SBknlX(wAw0Dm5Eb$n!=i;-6sWVu)vr_0Y_Y=NVpe5s{
zNa&{YYF&*d5$qWVzlu`19Cn9<)BdNB*1Q?HVJ+hmh_1I;pF<(rdwYxu$G*6bv^y_<
zwJUYs{d}E|@We?X@8%<`Nyc9dbtPGDo)1d@YlZ?B1XuQBf8?M{obz0;o1*zljKxl!
z3$(byB(kZI)a*1e$0vQCz+`&9Xe1}<7bxWr9CdKmx|~*l_$sxfjX?5+)_c~}ARedE
zeEp$n9ZIsz?u=bHeXi!xEAF0tw+iJOF7S&lXK>7DW}3FuYG)JIa+2!RS$O(ZEu_|c
zgusG$CS$@XW33O}#5f^0=~=c7{bGikAMsQFbr#^-hY8<TUkjpFYUGUCu7!Gf6Koh2
zGf=1m%j4|uEv|)(>haUI85ua@YQ|WeJVnKnJZ;=lHp;BZCn%q$|NK=e1x&l`D+$e)
zO0DnDo_;*PAtj8B?Ki7ztYjTp$MSn3j(6%t3-8o{Na@Vf2x-J04yKm^8~Zngo!bxJ
z-ON(K__?BgsIc^%6GMh$l4_@>q;mQyc|16j3J^oQ+t(RaFPnE}y^}-2=a-`~ePg~R
zRgv4QLqTfyM^xwvQPH^@3t<wM!r+&X;n>G(bJVY9c}`h7Wy0C*?An@%?(Gz{v`yC0
z4f4|?-IOSqb#3Ifugat_yx-9Iogm9d^`eoW%y@lI?BU%zJvv+`4O{sT8t25d*e+xs
zE|Ast9O!C?h^|K8%Qva2u~S8dF7Ey8$J;qpHsyymCSwg8DWR`^ZmMla8OxvhcnhG3
zG(w~oY^}}&O@IrI;D9pLM)m0iXjP7J=Ir=%juu;OGR?XrJ4vl-dq;i1EGFoEj?LeW
zFW1rHd3=?HBAoknt00;(@ydr{v@8(upR+u33?+%iHrS}Sdr}vDwSv`g5ce!UH4#$l
z#p2maACSt|&hk$OU8g^pDKfv~_TMIZtA4qBoAS=`Jn1PkFKgycG#O2PkuC+D!Pj_U
z;kv)GCv%Z;>Z?9?jE&f<+2eUxPZi_lkt5Q;FHD+hMtS9&{q28>I5;qAWH@@m$stCG
z$FE&WEh@cg0GY$ew5emOdDav^=K8qeDiP^xhD)21c??+QwD5I+I0)!1eHJLSX7V1^
zq$>-d<Wdj@#oIc$$70<+kCP=&hANB;J-TU{g!4${9QF<UR`4j_sl90i_nKm_wQv>d
z>Bnz1(fNL_VLx8%PuwI_^;JI7ztc<C<zsR`SNN5$U1zE5<3)09v*iV23SUo4b7+2>
z8(F(VzK@s!=S-r&shSmwXMGN50brD-F`)nfaPvA%GMg`p-s{WDk3uPHL`>^Nm!lcm
zHxX6BqRE&VOi;MD4fleVH!hRqytrgDvcqdLzc1vawDC8&ctp}(m#S{{Ih*#Ar21B0
z8<+A+xPG4yxwlp#e{Baf#kcrDy15@Q1-wkad%%c1MnqY(r(HjORq(~t-`|E>ugqi8
z9Md*haD2y&9VDWN@>Th%hu8R<=B$phcEi?SGvK!B^rkUWJ)oNyP;*r&7GIuZ`cj|s
zfj7&g)c&DmQ2iZnKnI+~YChVY?FCn)KqoZU=;t5PRB)*v16;lVS3d8hN_iK83#i23
zZtbi|IrpBc_*m>u6X-~$beFV<{<)r$yx7KfDPu&NWSG|fRGM({16XDOl~)emCk_gs
zD^xdpbNA9>Z}RJx{OBeh`fzl-G3DNL@PM~DP>fF6MO|2+U5h4;YI7-w%C?4<OUa_?
z{NoyZERLWC_7=$4_47B#)<;DcJ8`B=S)PU2q;!RAxgv1P%t9+sQ>r8FK^*$8l6Fzd
z$l7OA;E=C^0+z?&1~|d(c4zkjoPRR~-Nrbf7nI;$il3|>wkfz_+uGKfKmJLij`7)g
znA)w*SOMW`gVynDqE?cM+}JlC%&ZNsKx#FX5x%XJd0sc3@lX-^rwt$`uTl9OCWkRa
z<Gj`1Ox*h!P0tjfgyg*k<Y6#juNQN&F}JX{zLq`|*#6Y9KAP3X^<9ec^}(%hO-tx|
zGn;TsrA6h*>4PM!Z#!w=fI=@9=w|k2+=DM4!52-0HjUxygv#-+?=iY#>KHmxQ1TiC
zf;$^#ZSSs0Ny44R`wt#aT&iy!y;P53+?|cug)<esSoT@s&y#cYP~Ad2^9wry?WYUn
z-YKy)Vy|_oC0FWpJ;onJh^=~vYds3uF0lJPIk-!N>l+y}jy#J-IDHRINLiB}SNqhc
zh21ma-Llic4^Z>SY3O-`&!gq%dPUR<(D0UBiM8a$%n2%+6}dDgm@$H7P_ymkL(^Pc
ziZ#h35m3Klb=IA3bCl+j6<#sR?PsZq|19U{)_|<CUdpmdO=@un|AA18hTsQAOmx>p
zJ%GC|jPSeQROA0^3)EYD0UU7e$<-4)^#zULy?dI;eZQ#+7?0i|-dE51)JPEEbjQqW
zAAY#TXmq(P3it8VjJQ&ct!rik^e4vlaxA;)YQu|NI%kFW3<}QeQK#K*+6_FUoPy}6
z-&kv1nyG6gm!kurT0nZ`%NGkpTHfF8=QyRWK--l|G?`vz-*Y}J%DSM+h(ch#7c|U5
zrI`V~Te-Rr!F_J;VK&c5FY!3IWn$8Q$X6oV`$yAJjy39&cn&9#oaFvSY4p*wV)f{`
z&#5*xVag3>P9aUbSt-ZG-299)Z~Of_uUT~(vC&vqSl}$4;=_kp)~XL5Hi3VJV6E&P
zxDMvI@dF3kp-=$DVr2aC+0Zb!E2XZXVRp$UGIY%B>DzN%J&o#PEAO$!noe>j_g-3~
zpLE3~M!Zb0VAE9XZpPMVH3P;Wo2iVHZJV??wEr0U?!-P96IIOPN6UKxpiwe!4rL9E
zl{pfe9c9-YZS`Ido)Rrj&{n2a4y+2}ygu`eRK<<LsT`oB`)ZXJtXI3UN$5IW*QaR^
znb-AE>-Bx_I2k8I&KKV$H;tO^ksmz1OuD$@;_=|vtBfYjbvfLoQgHdMYu3D}sfmSG
z#nbRUzJzhJ`<Gb^_q3_p#BQe83z-%opjX`r3UU6S`sNVA5k`J4Sv7rG)y7!<{r$uC
zR_CIN63}S@N&P}Bvf=`q%rJ1!Bg<~lP{qWf02W(mBmJT9K}8U~vkB!fjc8C6R^@*h
zv{X7jCkchrI)U8P<wM_AhLe$bWm&b+0($)2%0DiSTA`E(`sY4E#rz^cIWflNW`<;3
z*@1mV1V!+0J(DvuZ|tP+AI)S?O2%E9+Ak3^;d8Ae!QhetBTClC@pnPo*@h}1nTnjO
z)J0qL2gWfx7wn!yXq=6Jm-j*bDTzmM8isVmlGOWdgV^ZHbrp)iPHYje$GmcubL;Of
zBXulqOOM6-0cmyAPP*tN(M_Klm}zIDR_9l`K4?$pUmH#C3Qa!ROm_3!#S37HO$kMe
zxl}=}H%7uU7Tc39pr4mEPelr}^X>+Dv7cqS`t*%)6lCHrGi|uINgbc2r3|}(_m|Za
zQ2_R|vf&wV#vviKF9q(G{U7~t6g7XEdW+G2cE~w~0a#Dxcw27AP(Ao6Mi#vG&JK~b
zQ+gJnlG8jmIE&TlsKKU}BLOaOU-5!^8`s2NF$LE2c1+UOHD^$54OEw0NSDXXQ9bj0
z!ao;)dzYv%d;Ttz_8^Dtw5D@kp+JP*S=vQ8<jmZuw0y(BwX`a2Y>P1u0l69sO=K_b
z<G3eXXSym`t@F6ar$fl6G4Je)?Yr2U$j>&hUw?f#^BR7c3-RSkuJve6DPeEDdV0GE
zNCpz;%Tn2smZPgkt)QH3_wU6fKEL^>k~YdLd>O%NxJuh<y82U7uiV?+-d!d=vuBfL
zy?-~V6dX(8{d=3_%8Q@i!k78x#83F2y8(BT_|2P9%1NRm;1+BXd~*=ovIGCn0uH#=
z<(82m0>m3|g94m8RsgphX-{t^?Hulpep<nH&}O<{;Khy1@CrTqREA9b+2(<jaVN`W
z7c=(%Ve2iUqUzqq?=uYLNJ&UZNSBDTG}6)y(%ndR3?bc03JfLPDAFyR(wz#@-SI!%
z-}QT5J!|m-I5V^N?6c3l`ubd;9aofuHW(53kwY0M=%0VV$RfO<>z}eqY`D99cABJH
zOp3Q7gV{)nect|ha{vs$ZNWANz&#B#fPr|w>j8&b-yx#{VTHU4d1mch*`$&WXLrvw
zf6>GpGe!2`MeCNK?GDAFpS`tHb%8J&wBDX<FhONjUx5Opw3G#djBmovyE}>)fP*!a
zkCmu(piZW&r4{zp`7b3{$qE)Tb<O>-83WH9C9YZK`{9*d|Lmy`c42R>TYNMa-t}~U
z2^d>*1-3w$P$G7dr&r^Ve9_47PjW&ng8iAM|4sl^Nf|YNOv<@pe{FA;qt74{ALp`~
zJtmE_eDpIIIqnO~+=E^OP2(M(sFU4(2Mvtp;ln+@-Xjdks=CyLFszg=@Kc(IpeYHa
z=a(F6>g!E*ndKjlc$1>4m>S?2?$}^Cy~H&>n3n@44hP_LthW9cNS?n)1@ng$G6ll7
zx2<>QemwoT;3T_6^1}G|wc^J>;#b)zD5Z%<g;g_^chg7(tqFR*?w;)M5BB5An|drU
zBczwFhupvz5h|3a_*L)a4<!z<zcNprSiJL<FZ?lAd9T!2Omy$jnrLQ7;x14kZ`c$5
z?u(|*p{m=n_of^lnTub|fW9WPBFY|C>7E3#`(7{o($WE-`%<Zs6@Mf3eOW~oo#U`G
z3Bly^bmpRtmT}A-ciDq1zOM!OXS%<)SrVLS)p@<jF}T0B4dZ>n+Tk9kv3->=Nd9og
zTCX)d{dM`H@C%{XW9z4&qqP#;<dZZwf6hdfIQ^q4T>H(T)jUwCRtT#evVt9&f-TJM
z#}NbW6mgq0Jw;bVX7ldTt7MBZEYu@!*`22+zqmpxsn+rLo5}^BFL;-A-j#Q+)$5qc
z?143B=N`+3KE6L3lOf1#XPt{Pw*x4HFBUsj?`63(XnGnmWp<-p(|X?in>_FrwMs<v
z0M}JGr>#FT+hlEGXFeK$At_-YE@a^Furc-WpK1JOsi}I<Eq8lZ&|X*VXI?HqS$0eb
z?2b}F3iJkXlB5HyY*R4M;w_rK2R73k4ZpVfqyBt>_W<n|_dmhYZ&;ZPsOza~vP3@|
zI5Np6$xd4NBQp2CuI)VVfK_@Zvm}FkM|80qb;lGx-xS-xd5CX+awFP?cd1^0j`0W*
zX2}a%tsQMWT_H=3;EN_SpR=9@f6^#;;RA-kHOUrx@6~$m>khEMf~h?Xnd+hRW_`Lv
z*MyKA``4d$CCtVWl6q$M5BqO(uZT34tJL+)f#zpX6qQF+Z!YKSIKNx3<%x$vW^{$w
zZuefr;Q3E13N0N<U6`0O)x6SioPX^Kn1x#It_m#_>DnKIdDbFc?(GPgin{Db<;>tC
z8tBaL&gxexppe)fQ1bTkA-QL3VrEU2kBX;h>;<f&brP340-dJMM4R6(5|-Ye01hir
zPLnS^U$v7&4y{utAFrroKo89^z||ajRXdIKMUh0=Ju=eJ6=K2qS+J-SlQ4#NU)HY&
zBMUo_`-%AZ?0JucQl^BZEity>2-3L=o4;YnG8(<e_1%!(D9uu^u#GAGA7az8r@AK9
z9{2a6Yh_>{{udF8ziuA>raX1*W^AcU^pDkBH82;W594e;3bp3|O_8JV_qsqTat`5V
zG&e^tL<vS#gY0ul5A2a->ycZuCWH9CSqm{CtD-sM6IntBli})yH>-1a5gYPP`-s}A
zn^uG^@L>pqud=is<YHhOzuKS24a1PZ37+X^#FXUg#D&TRhZwTqnGTy%c(Fg}(#+mR
z(jcTe@8PA5%)v5TkNFb|Xx~%I+nhI?zHp@L>C=_RYr3&JkReWSSSjXhFh{Mt7aVkd
z23R+@wiL#d{nSynIcs`a*X;>TMkw_7+m0^irFOx{;xOA>I1r2O2H4~-*8{24h}%Gi
z4?{&QWozmCDm|gPS#!Kz#!{EJbWB<j0g48tF50-F#%FBME!!d%e{^$?4Hm$~-7)T=
zT3E-YucUQ-KRpub&&yz?Oy)n6&vh9}+_VzgCrt#tVS!0K0Sh_(l8L2p2Ag^3Uv!II
z>z7!8jb!!NpvZ*&4=;FC*T##vtW~m)!2=;8u$7S6=4Egt8jfRCjQ>Nve#{<5^1IRf
zx-{gRGsj98q(pwjdp4MgtEO+JkdmQd1Go5<#;@f{aJ@Z#5MNzXc};yZ2{G<XZm22j
zZheo^0imC@{ff!JH+DFXSB%V%ET7Qm0jr#{&@FqOCCzP`TaC_}sC;3N`uXB+WfSuz
zy&s5xHtt$~kBCzA4OVf3++;_}x`pv@zvx`KA{}!+cduLhMX^g*zx-;btu5JKT;q=S
z+pGb37w7@yEsc5kaXx6kjl01vlcB8I@btmXu@T`^&$D@h!VM*A!Wo^VI^Bo)$!<<$
zrbiMT9>>;gmSH4=9nQu>K-~e<x$!R^?shus%<fTHN(=&wMuq*vTM5Cg9?Ya{d!37E
zJw8|S)Q?x2>iZhkHn!i`>%<Zmf>#f3{tQnayGdn0@ZV)!JY8PDZmn?UGZAPlh4D09
zLPG|Zfv@t0U9H?k3p+ZCF^ng{r__7hp290D+J8K%A58zwL2o^VFtUC^4D(EZ<^i8M
zTcp?i&O|yfRuyST6TU4zdvD%?hc(a8&9jHr<`b;gT)vA3cXRj2Z|XH^v^x9p`<SGp
z7(_wsUuJOLv$m(;vW7f;bMGi7ZI50bfd7t?N93K{6XeR*PqGcd#4KYNS&59JAe!}5
z?%LI_tlT#*daT{R%3IZ+Hp?R?msM_V{izuon@yYEOBaGQ6dsCruFGQt-xu>AE=tA{
zzg{w~c+?wLLJNe;7eWZ9B%B#x1@H$qw8O7eZ9V=l|CW!u4hJGD;>?wMh&E_0-0b1`
z{r)%7faNF%cMAzOj3Nz5z|ZZ{piz|lq}*i8mt2@s>%v8M|MTcO)U2l}Rk+duY96)C
zo_hu#!tciSC(v!ac0Rn;BOrGaJQL;ZT_gPHlRb;D3)$uS(Gyg5^F#Ze#9fjbjMuZ@
z3~OB)A&0`&vu+>peox@O$($Zo`Ail%<TlO7@Vb1QZb2Q&v47v{5b7Ex<7=E(dD}97
zcyqNgoZjL|yD_v+J2uGY#Qa^9GQzy5tr=A{_+eMy-p*f#evThXqE4&97xeOW+3QGc
zCdt<>KBitR?q-;U2noo?ES>Nf{=&(ovGHU=iQ^rt@|W?qrwEpMd*=I(zB9>o11)a$
zu$dNY%BK{kMy@gcf|%r6HH`}3vs<ohZNTkH_Thv74P<l{%cb^{@WBld*k|T&*`S5|
z{^7Gl=R@ns)2uQSOOBMKS2oDo^bpL?0TBUn{#So$_k$czQ3C2HwM8jej=~yTa;T1P
z@^_3^t7%fr>oi+W`V-1uMsjuDt!v76smDqY6nsY0SSj1r4d^?K$CHQ5wOgTe9en%}
zNJYG6LA6cY`kbxgnwZfPjp*BmmA(u!AN1wo>B-?7uI<ctM0pmjvT2FzaiwF0TWW>z
zXG3(Lkg4yNHg)k)w}nyX!n9aEJbJ=`IxqrP`NGes9sTz3*NDrFhafwj-W1Q2g<>v%
zy2}h_@!|UJ73%M@<WPEPExhmJ8T8MuG!nJK?Sq2<&o@l^$-k-{i`O*O@Dy&GzI)?b
z1^L3K^*8vQ343+xEQ=YP-2b9Q?`mhf^z}s03AItGKMjdwU$yZCIv*m_7fjTFA-<sv
zCV1Z);EFG${~Pkvb?@_Ed#Bz?>rl%P2{_m2+MdcNF}f9o9A&YCE2>z$sFAv3`My>O
z<I0QJsN;?;M?vuOhrOCuvg2^A(2jkKX&0Ggw_H*<)IO<etY-tq2kGjqlrp#i+<tg9
z(gp)xjP`DR`9rUZn9pu<^yl_WZ@(e41LH)s)RCb%rc+v5YMhG}d-u46D1!<Yw0I?q
zhCkv7h;6Ev;64RKM1ImN3hLMd1;y;kMsLqZYw+4kl7~g=Sc;DZNCa#HE$I*Y-`I>2
zyH1630{lqX(wRa8TvR_ea3Y9G)oHz;qfD4lBh)sx4BipgStd{=WvV?9(HJeRvo1E2
z7yc-tv-p)#3AVIX+%`{NVX9BXR+LGUkROgW7k|*wGSjA`I*Nm&bXOk4Px9C896DzB
zr@!}>0w{PW*zFwL$b<qgqjKw2y=$%v`InY`0GqGa_1qebfiFvE{n&mJF-8Q&e#K5D
z@8k{xuY4M3Mo;3!WrDvUfeYbH(iFk6WTBPc&XBqH?S3p(kuG8HYgs7ObP-1uidhMH
z44&R*#|3iDV{4N{%%A_vy)vvTkIZkq_~~lcxvvrJSbHbJFErx>P}shO58ccKZ~h4q
zX)(=LkH$|({E97p|9ASsAFCa7!;DV|T3HLqVCVvVrB~}b_V;xP($4XpRGr_tjA1jI
z@m%o}sbA3NFbb?M8^}Vb_eA_+E@vt}%sE_oe9t7M(<f|Wa@))Jfa1_2x|8@peA8qZ
zuOwe1iALxBU9!PXqn>t?w2e_C{4eCUG%r5gTrrEBpcx@rd!t{Vx|PTH=|2z}t}~B~
z9NY?AUu3$rZA*3f6FR{t<8gb@*rJUvxjBAFD&wc$*7yU!z3V2+;?~2lun%0(jlBJ~
zC^}^Ga`ovBX2q*O1XAXO183kVJz-QEfofERS(l>0-3u^&7BWD0)-LEQwRZKR(Se!J
zs=qU)a#QF#?l_wIT4p^p_58%Yr?Juyj4t#1qauzO1WT%No1~RtY2CfzYVSf3l*x9_
zzq?{->|^7BUFm&$YQvuU6Q7uUpMM+z&U-Q;oBo#>LzjO@4OYItY(BhH{~Ubt-4&T_
zed#AP;>3sih2XPM2F{QERoKFwW>A=uz>LH6EbkI=S5d2vj&o+LzXx4^owRW+#~wPe
z<2nb0lu)EvT(gvdWz>g~L^D>OL&VGk6}OOt3#u6JAS2wm>!8?fEFj*J3mtq45SqV9
z;DQp6Q@+O7*R3%^*fl=>wpqPf*T1Dl;|WTST%-ELJg9KG)LE&f1gnFETUs%N{XC)O
z7**UkG<{2__wDBjau2_B8un`b#IoH4p~10Jq6zO68pk7Ua{LcIl-=16;mwUSP6a=_
zE=Mhv688ndzl?H(_{z{}E4RJ;s@hgsR&^03^x^1ph7<tEYI5zga0nG}K;tB$Dc$<<
z7K_X??-+E4H>b1^p|5+q4fdw%G75`h3WFkDVZ77y>s42H!OLcWNQKqrjrIN3A<7MS
zTwdwFm>k4>@2x4L(S`ao&caO)e`FEC`~>Rp{2p=!-Frw}3F^RneO`M`lb@|0_qi7p
z&6AiYVaihAIDnSw*Eluw!Fg6|h+9aJN|Ch!ILCp;X#$(kB|YiNKk9s(C<aXS<R~s!
zhk9Odpv|*l+W^(Fujhy<xW8=f)b`;XO1pa%{7k(tRcoe5mfy~G4v#mJ;l2DZz$5r^
zhll%XL1e_J1MaAW^(BHw`1&GCyKROUenJjN4x&829df-2o1sJ*Md^z`!m`ST?_C|F
z%0J-^PmiMAlVeW~$CLk9X<g(svTNTSM|HaM)?MwD{@uhqu6VsZwgSb6SGTN1%mH<B
z(9gCFYjFc%22VuQyR@WS{nSDCUHmI@iZezP@CciuZ5Z?c_9jBxCnZQEim7pa=jA}U
ztSeY#ct5(33C%A?EH~E>!+$_}6(RFa@OzO3s*z<P?w6n?5r+pHsN$%J<|*|qg|{Pr
zOg>q;5BBG-(J1ZTj=2(PspD&FYDpw@o$b!rD84f)CoI4!k7Br4y))t1Cm5S|rqu0G
z62#@LrWU{)JZUzk9Fbi1I9e|sknB<z@!0WZh@eanMZQTi&Jld8JwC=H0>?(Gy3uQT
zChuR>nrQ7(Io479p^F)=K>EHW-Nbz4<x*EMP)*~s%|~9O<MD=CSnA{L@lL}WSzP5=
z2*ryqs+ar?S_o0vzmz!lZ{1MeHSjT|<>O-pmQJuBep)cW6KI|sEGqbk@WZkclgPg5
z8Kbc%Xpkq+I(iCJ)2O9Q^0oa8CI2b}mz2(N+ilA~d_XZ12@v&bS-_p1rb$2}hFe8x
zKse_|W+wv4STG@<xqp=*ZRO)tzlY+5T>|XXNBaj?lsE+gM&TNUN-}>{>W?^qXceKZ
zPPQo5ezH+g{u~QKf+~BpBzQQ)@*BYPe%BMY`Zq9aD6NSVkF@o-d6l5eL(;N<M42)y
z<IHF8^0${@Lhg@%+3UGg%ETxZnRgN@NnOwSidTDXZe$Yh{nz!QcV88<DU7BMT2Xd@
zsciXg=8E*Zh2Tf*fXN%fg_di33}Pb^A`LK)MkHSpM#KPj%ZF>pCBkV);75Z!7Wl+u
zv9ty045N~VXY(;6)Z3AK$krG)F-IAty=ZZoE;B#$7qc{hpoV3+FC_|O$Z!8SI}0TI
z)h+8WVfN;GX)U#u4|vf?l|)z8>t6ly;V{+4CCQUKXoyE*kgfWGO`h`QZR>&-goYjI
zKl!|;$-m;iClGkiG8dYZOP_K_go3k5fHY+*9GpwPu=zJq=IEb&a6{lrpYLZ`4qbCz
zIZr`3XH~w=8A?c*iKO4UBfW7T-7~aX>uJDWyZ>>zNH3vNnYjX)A+7Z@F)A*pZO>_6
zOy`W_AyLxDOA_xG@t_xjDs`nxZlg)pW`CfxA0+pBk60hIAaG1{31t`P%iN7-Ci;fJ
zOUn|!qIc>?d=pFI)Av8nN@~eAG{i=iVtawV>NO^0N9to5AIH%)(W3+Ry>>Pd|3v3e
z_o#mE;dmB?>g1-zk03))N{U~Pw<DdmN#)eY5e<DNsoJ*7-*<q$6)iYx_<n(vHleSd
z9OX|sl60{_zNjeWKa5RiUF{XlU!m{p)pqkE!zft@BtOSR`-N&-GSpM1uCG6b9bB!L
ztGaj9HCr1ue?)uiB6J-4f>Cd8bClD3haV6GGj1zH<kwTY=^WM1;W{Wv1_|@`$5tB$
zVavqM!YAZd4<YL}NsgONZgyk6ACfv+7vcI6!wN}sJ~n-!@vS8h&PQWp91x)3sY+X+
z?kT^wazGL@44ro~;DTzb_QKbhlaAY(0F4=Xm~SE6?bOcKMc+`cZm(a9b+GM(2*(#0
z!>@FsC=q~b97@6vP#hfQOe!99W{=#gK~Y1ohV>=1(QEfpFB)kTt&E!WPt#8M|Cb9e
zelZw2wt@$q>pjdmbV=Oo3sE`ylkFwLz!#Gf2uqO6hL<Sq=r5wY=yAB+L4C{fcpqtq
zi|Fz1wn~3yEu+d&->|Qbi^U2UeH*y@QI<@^{?6_r`OT&V&{jQp?ZP^b6-+?Oy*)LQ
zV6LJ1d7HC>q41NsGGvGG`;+wc-dqNlev1_eE0JobR5?yTs0_aFSAN_AjQvkNn~^(t
z4BvtYULi@aJ{KP^)Tk`dxUY6ADA)fmNgRKKCO(;BhuSypDg;MbPt*h!FL^bRH`dN)
z#T@72^y|#Mga__EiBh5xTzn%{>_#!!4qh$_M^JnfhMaUyt`}QPO%LOj`T*X^5z8q2
zW2eQ+=3Iw3*(6b(+^7xJEVC*!o|g?cHu#5pW`>us$M<ctiqQ2IOil%~kPPgoI<{NS
z!`Zf?hbM#H_(Yy>942?lE2L-$RRF!XCQh~wC~*{hj1fpO37q4FF#K$$zvpx0+D^L<
ze>Sd5$E&{vcWX#~DqaY~_;9aO;$*<lOws?;`H%;iiv&ncTphK2Gr??C#7|b(4w9w+
z<eag1F*K%i+B%0kr^uQT75D<1Zu!pq_dq_b4ZZ_g-qshZpAfcUq#5fKy7rrQbIQOX
zQQcF8{m_GSK4AH+E1dOT4LZG_QSP?v<0n`7bWZcRst9hJW>1C`MX?hwql_XBI$aA+
zdt&vk<-F6QX?_>K`6U#3rAkWumSsq3Co+SNS=LBcKJi3ydi9TD>1VnVQA#&WB%^va
zFX7wO=EIe{zvZM=<{hscr%~irUxyBj?d$47bz4}jJ(qB$rm0k(GQzDe4xqiH4A!~B
zuZcG&lI1=2_FD!EkJvJh+#Ed5Z{7G$;wXDN!weaA@ff$iUMVaU&yZ`!$g+v&JQL)o
zV5uh|3ifZjTyQQ`Zp)5aigRy-y_eENkH}dmY;S09a<u3{8;uSa5W?d>D3kWA-d>UD
zwh=%L{})k6W<(%@kiwmk;BRr#4?wciG*T^q@99b<(G2-sp6pq1T~p%pIl2Y;*jtJ6
zl#_~^i=Y~kbTWAd#38jTDos8u!_M*(<|*9dv|bpz#unNOndQugwMyURW46H))FCC%
z6O|yE=y<(oHVN@IlleR(toQ7iuHh5~#yIu%<=z@6EQLoPDB}KY0~er(9LPIG%*na6
zCk~l^gIi{Jcd;u#K@ZD{*dqH^{}&;OgPsd`30995R+;n`ZJHy37BAkHsk&i`xV8Si
zwd^Nv?PK?Es-hnl&V@g%TS#l?rE#GDjrVw@`@vEu7K^q~4w#8z$C|yYa=SYx=vJ^8
zd%4b&k34i4;Df*Rh)e^Nq=_b+N+S`^IUW`Ts)Je4ie9LE>cu(gPspxFAr4SturDr9
z{Ztw)Te9W?n<>o|7$aa;djJ9;mYv}GW)HULyJg0*p^LLCLaDuc1hoLp6r`A1(TJQa
z{I(Yrwz2{0m@MZ$ZkVMpVGzsa)?vUj(8Y^jlZ2vYv4KUVb_&US6|>DF?<3X!W_{lL
z6dj{&YJUQg<Qp3i1(eM)8{n&aP)aco@Le?-{>X!kFfTLs3d;h8AdhXSA9ux5|6a9G
zR%t0J*@SZEN-?&)t}>`Y1HLYEavDt@LqkAlYJ+aNh3)|#lBn2&u*d;9r2?|u9=+u!
zH`t7qe<O2%*0}R4kt2Ku7acYlHo$IJs4EAM95EU}i?hR)O=*`ii$7xb=R+C&gl!0!
z*0*@~RjMusqm5JYGdf@1mZTFE3R@wABk^KMM(WtRHFM>TCP;rQgLadgNlK@Sy?Jzu
zb$SSLL|m8%n<?=mhE_qqk(a3|HC^h?L)zF<wq|V;YXLmyC9ORfJcoBj&f^nnDd42s
zMhDB%E{={VkF&~@<F_(&fn_mMv~4Z#7r@C3rX2fo<=&YZR-x)s=51m4GzKZE)4JpW
zX^F8?Nwh<pu8bx?(#92r9>t~VK*H2eHx?gC9C)s=fLaJ6KNflCbGqnAC5XV-CqzpF
zgnkh(diNHzy|`3~?eX~htJm33{T{e>KaS@k&3bMTwVX^?m*Ub)t82IJpqG0)xo$X%
z+PjEfb@VhkocxZZMo{Qg5kxP06UZ*j=Y)HGeYlj;YSgRj<%L@aEnu~^<hU%V+F+bm
zRB6dWJWUy?Njh_pprh*!B+|5*sFZ;2Ljl(_Xrg=qbD$<53~Id1Qs0Zb2Qfry7?<$$
zM8_vzYuF2AT@Budx}u1Ae3CIju2ThO`7CT;T7>}L$L{rK3`yl-69VUcK`&ACAbT}V
zX)LL}SPFW~y9$sRXw<uW%vXIp96;_oEu+t}!m_WQuN}|W-s3}^^)w+K&Ed#-j+4Ay
zNt);PLS-WtZh1vDsKE#=kCR-zkVQ@|6uXH9<tr(|*Dj5E-%?8CGZQW1NT<PUmanN!
z0WrMNYH+7Jj^27P>DOu2x9?|14LE-aH5+ns78Cn5jDJW#e})323sh27`^t$i0xN4z
z&T7j)TY@ISxB0*E>19U6xaf=>SZ(t9qLk^$2iZS6Bl~J(h9tspnU}Asxm`oGg=*L7
zcIH8y`I*>k%!%*=t51WfFt6h}%Yh3_YeTVwgwg^FyTEz10KMYh@DY!{wTbC`?^3v|
zL85gOMy6llCjkB=+KjicNb)2jgBC^GDdo!>1w6<33?y<5oerNE{am)5Al)D@1LZGx
z|LC`GSKTM9$lH7<D3BBGgY-MdzMz3dP%re-wM63|dhqGJVoo9wP-dXpB=yn2&n|&@
z324l<Daa8oWq!R?{&AJ?Pdu%zKsI^j&U>~>riR}=zjqWm{`%Sx4Rg*EnSyJuRQCtJ
z>BLO7OY7W45#sM~LtJ-O?|QkJ4}EU&ggVU)1^&=@g`a-5<IWYHCb8jR*5ozLnSQn7
zPCftO!yawQ^KaWT&RRG6DfNUYnf!SxT0bFL>gkb->D9fR?twXG%?k|h-4Y|I#DSw;
zt*hqoDm#CE)&u<?5(K{8XN;Z8E#5HeJI^!|dtnPAR-$pzPU}d=@qbXqQ*nT6K}hgP
zuEEjBv|_tOqt2TmgSW>%<hR!eW0(8Yhax%~1-GL=yH`UBrePz$NK6$#sqqK$MM*Cq
z^yl*t5t&TFuOD*u%15Y46;_ist`7g^s8$ty!_3{jEAQ_iiQ0@Q{^dG;!_Sx5`$8Hb
zrz##(=ksOtP0+8+SyZ~Ex9J|O^!zhNt<dzF(Quzd<Muf;wk8ZU)sL-`p8Y4DY&pXS
zjd5!+>j>p^!ZT78{s0nxa*}?SErK3LC3t*EqFz7hpQ^Q|kC{<Sw-)NI)IbCVlX8v@
z4|3qk@9x-?gE>sm{W+2d?21?6x3%_Pa0^@|lV~Rr|K?^3Tw}~K_O?&XZDIu@?s~n1
zo5+a?B<d}9H9D%JBXV@p+3Pen7Rlyu^k$_v<CHKNo|P*9Q8TKkX)R_tFJTdvP%f7=
zJM}Uh_gdD{4omGUQC3gbEcV&`VUAhRy@k4Sk=`rU3&^R$oX^;O;#9@wFrFhUFh%&4
zs}&q_oo4okGBWx1VOi0PD{_Ia%6h&TKVw${Al~txt6y-ZX}8ihhO|gSHV^?i&`Rb)
zaa5dI3=u!GI77~Fp&tUZa^ZV>vHLp>vF~U*D#!ob!b#cuxp&<a5ZQ~2hPflQ88t!U
z*Utu;pQWOJRw8Vf66GDt<7KxrWSx3RC0Y+UTCRwAe@h?&x;zJc^-6KENL_IupE=pw
zFpSTdY`tKOXQFxfCm97g^Yl2d?B?+RNG~&d(adC$NHaW(EXknxc}XzenJ6_lcuUUm
zXgOqyb0jl^%kRpj=6#iBZE{WA;_?%6cAzF?aBs|u_Z>>y$>w@4jrw=hwxZaTYMWp_
zE@1rtYN^o82uJ-zz_o3LvX<8;`dNHbpY(l*lwswJ>l2)!>89*$w>d!^HHT6i(F%s|
z+`!ech;KzxHU3q`=WL+3v8G7yH|?ELemfffVM_D9!uUnyjPmf6bGNYrp9YY+W~TL7
ziZ>V(TM~J|G1=fVF(}(y<u|c8+<90Q)I?qROIRS@YnJ3d=hXMK>O}JR;TW^HLFyb)
zJ^*CT_$<T>M^1Z=3y>&j>XL=*!XybqYJ$g@ZW{|wHLo~fStspDsyFI+Kw5zreN?52
z!cdDvbg_6|z^Y=Oe&?oR(!}Dww#4S2F2^=>YuMmzc>i+q%L^D|gLsW3D&>4jo)}hv
zxEuuSoiYl&e%KrzV@SMT{~Lz#EukotE+PpCl$)Tf7oRN`nDZF~#jU+4rQDs-i5f|^
zCNRlp$Y>ngKy3T=0)o$29P?Us1_ZZQ9`n%AU}99bNIjhQSDSSs4b`WUz~3y8#%qH<
z-u%Oi1aLzo=cpJN%y~#b8ehIdHDtSoWfithbA||-a<XjWM5(Bzyh5>%dZzBLT08!f
z={!L;5`#&9$YEX-g$UH;ZD$!HuT#%@oYtXR3!=ie!u%*NsGhRDOc5lR3Xu(l^#0^x
zF97JO8I7ZLsvo?#zo4<nukzD!7_g9^qbP(j8aB2lE8G$^zRS+L1VTuc9IiO*___+e
z=P+jL1}QM6`>WjE?y|fgPzJb^+aio$)X&G>zxGjgx{+=Oe4lq8MkV5T`EPLx-{Cul
zf+SAZ2Q+Q!%-z<RXOoUK7lKIwpfkbrO17pVZJL_ih1TNSj{!Z;LJU-6j~BMd>MlMS
zSl2Gz%}MI=HzUf{lQ~Kp7MgZVJ_}|XA^h(rCLP`QC8;G;yJ8LI;M4a6`Vl3()6BZ}
zgN#&WQFaer?^PMBCw%pf78ef-el&c`R@-HO=P<6c`Y++iwAplZ6ily8`tOa|EjP64
zoy^*y{irB|-ZW^|Fb<wk_Aq~C8uj<_=hxTzN%~fw7_{%>&Hm0X80H4Vun?14#hd^z
zH}}Q`YNHrW9BB;IHr5<Cmnh|8Q6laqI#>!#8#$&tq>*w=ww<`F{4n`P;|qkWp5ly*
zx6${8j_OJN7X}oS2ccUu=o_zX+$EE)nW|U2K8ln5r0R&<HXhpisz5<>)i@ebp~d&6
z<zuQtV_|25sWHWBi?@#AR#bU-^y*1OQu=y4vT7S}crnuYe2-kz^!8{FF}Jk@m?p8?
zGBCFd8m)O3F@{gYQuj1g9$1E&M;jqtoIyBAw&ZMCcvwPpC`6tMjd2?x@|RX8y;(^p
z^t+FKhhqX=t~QqP-|bc~*%MV!FRGjJ%NPyy@GH+fYAu6|eeUnPCGdilJ?2CrkcO$)
zOQdA+5~}di!-5uNThEw=CtVEP7zU4WhVtKY7ca$<p~dR3f-TP<9J?vg-<FavbMN(x
z7%Vd^^puacj&rgxNx~Az$m?ubSnn5afR5;E1N)SSiWZ>=qaU=Fw(iW`++Wsi$C@EC
zIvtD|TgxTr70c{;$$x|QbpL|x)j+FgBIQjf6A4JU&o1=~J~}K`B)CO|AY=9UQQ@_U
zE_V@j=3U{egTFcrC%X%%%Y5>;wXO1&CE4e7C(^JJ+}hSkOiK{p)Ofx|NEqGC(>-MZ
z#xZWM50uaDs%3mD{VtwG7Z2x@V?o0(+onIJ#(<&I7;Mb`KKIHJe>S|E7`LopMC_RR
zoqVPLDIB}iA2lX(E28vLgxBJqtLow8v=p`LH9aBsDyU5n=@}F&yS&brfl~|;lV9zQ
z5y^~MRIhAEF8Fj)`fL_W&MedE7k`e;^nW-XuiET=^8I^e+nN7|d0&OSH$Egp=R2*e
z0<*_L!q@l2DJ~aZI}Yv-_nuf<?ZKz1jcmn=%ro56koIRyHDV<Jp!?z(Gru|Sn*>lT
z7<#v19<{sI1Io)=R4Z`+?J>qPy6061=O#>{`IO8{I$wa{DTLj9@hXT#c1u+mE<|vi
ztSCz#GWBH(m#hZW(997_a^e%(8_*1Y(=J|y1ZRXYak}=|*0Y>{kQ_HHz$oSvTnZaI
zySJyMsl|H#%3ryh&;qMM(lz}2C)40rO48Us=|KPVi4Ouz%w}+zx+SQLrT<su9PTCN
zF?)5RX7YxSp?Z~ra0mJJ%`np>=xz$V+f7;a0+q0c6k;H^$>%LOG5BAr^FOP(o6gPT
zQ9G(3zXQ{Ixm)yY2&C+qc^Z@6tIK3&tLT2&`Sk@tkTOHm^6?llNZtz!iU_bCkIja=
zsP(yKxfI!b(&mp36M(az2wlA6*4ejB-4C<K_h){fKf9G|5qG=I7B3_-EVW{~TtCaG
zvE4@dV{%*NK_S9#=DckpLFRPKruU?)O<Dy}Z<X;T1<8C~uSR@SAvrKIQ><yx)>ER`
zUXYX>;K1QwR+)Jmv!*w0#JE7LniOT1E&_M7{NF!(d6Q|A4uQdN+VEGg>UwJa2Dxt{
zGo+2d48(`VPX~opG|lxwvHw1HfuY7s1%BbOlW!~Odx~{EgE+<rXyZg0!&Z0?Ljd3W
z{&A?yG2pbFrx!J6?Khe$gS>akE{-aSYKXcfdFH?_pJow<uN9mT5_UkeF3+OcHp@FV
zsc_VFpliNNy0__?IxgpHU!?&0l*n4`tuseVivI@ViMrnaPGfa$s8UDhNH1}rH#1l5
z+1x5gv*bkw-FhYQNGS!5Qmblj^VwG0s5j5rw)9rf1UY+q|66HYMmQFgmb0%yKD^WH
z&475jd(;t~a0sKJJT-zgqs080se5Y4I@@!$132wXD7k)-`XS~h{%HEI>imY6+4%9*
zhNS|x{=Zt)jC759mZBW108@6}iBjZ_t4+up<uh^})i%CI<I+uI<(ZKH%HXSt-d*V@
zAzs|3fH|dkt}Hd~Ce+d&#;bhQZ>$2W7lxR@^oIoLr_hvs6hxhWru}Ixd3vc;nT9lc
z;wg79#sr%=zQ!6rybAi_67;ahWse`&Do-F1be^aVT&3kjBE_s`j3tJaoRl8PFw%Md
zH;O$fT)_v(AWBgP6j`{w)PD98%&NergNAK>K)4maqcO;0S?Za@)F5)Z_zvZo+|3~)
z)KdAtXCtr?4@Af86KYnU$r<s4yW7f8MNrujr1-@$WMj4I!6==aN3n-$%(X{H^_;p7
zgP(fL@WRPB{6jSpC?#!ml3!^b%-%#z-dCoz{D_!Jr0r9ujJSq%eek<aJ-IL`0vb+x
zB6NlMi*>s@h3RHRAZjIn+N5?CuoQ#ms{AM{?ee={^d{p_X=kCtaN@8M$~Yn_v!P1<
zGEx<|QUYioTqc?2BK6+0R!Ks1vNM;r`1G}%%sd93hkVm>@qOdY$BYuE2aD-Za2-h!
zmCDbJMS4yO-$t09{kDXqobBykkf7mvn)a!8BQ>f=FDkx#4GO^j8wiPZaH}%4@ih*N
zb`TikNm;60AL!-Yon139`_egx*`(5Ll1MnThhO<6i_wrAbn3Ctf6ZurR`e<r@{kg>
zz_7bfu;IHezC|Fi$4@%K$tJ*wk}e49M*(%7qEGSifZ`H&tuxp5xCCAbk)Wne_1wqA
z=4K>B^03Gm`#GkbTra=ef!=ao0DQoH^)F8{!FZdPf-d>_e`H+G4Z?XcIX$nCp-eS=
z877_KUkwOk)>MY-in!fk$1s9rfZehGM*7FLM<ixnsRCC+%I>-XNaP`#vr0gX<7}?S
zU&wbgV3P7>(0<SQZ!{&rj%e!=*Eyc$X&G`69#h$j9V?<Aq+rlwFVFFfDj=A!Vg3ER
zYxIiDU=wA$(gG3OE$SstA+>r*t=YpE*0gN1wh=zEanIJI9Hgg2#41J^_LK*iCBa9>
zm9KiY9HVLja@wyV9$l*<a^h266tK;~0ao986e<qOflBaXK#clI9D$Ec(U9hL`I4$0
zwa4QbxWGWCkmSxhTx0NRV$8PX*OlVKB!BJ5Km1&L9ycGL#tD{krEPrX{rx>jIe72c
z?SqsSu7Q3PURX`8_ezHWqJrbCTpHmseQil^3z5qV>xaY*(pJU1ep6JR8}kjR5<1)p
zhQT)inlaICeYRZrphL~ekD4hWonqas%$|xPt6(Sak*fxRReHdFba;McV3eCJOEcU2
z>$u#ZFEfnIOhjVCIYx7e$Wxf_BQG6SlF9oAm>ii>0O-rLL)`2>;{xE$l2fqOij&PJ
zKe5YMt9>yujqTO(s0*xHI4W5CCVlkX7G<<&AvJy<6)-9-$}>vm-KN4wK%+2u^-Tdr
zIj10f>oMxUmY1;P^v#?6Y<yu*36MNLmujiun50{K{UD_7EPsDlg=xlXVOpnP;Oa!(
zMD7fRWN3B;Ty!rpNRXX6i=d7w4^B~q%wu~d8u#9>$)o%3`lCOFNb@ddyd~lYRbM^L
zR!`g8H5oYZJltdSbv(NC0!>#a@EpN~{P{Z3WE&2X+t-XKQLvBN{dhijaNjL0_s<}p
zq~K!Q_I2;k+VZ30oQ^ZAfd<=MZ!|bO+RWopXWe#rz<-E1KqmT6oyER%)SfDga^XC(
z&Zrw0v@dSI6f&zFQ8Xpb0~#JX4Qkany!@b}vwCq;h8^~3d}o3!gLE%fgsbd(jDYi;
zUafnt&OQbU$taU;S;vb7y1}LcJwW8(x6kdtnd*z=z1o^kamgA`=f&3LL*;p`S1`NT
zI!qcfk9Z)&BHM|uJ?Y0~&6q)RZ?0}X;%a@HP2O5($w|_BVQu<VIVf!)D(6u}@z@;j
zz4Z6L#|I8USA>z%1MG#+)W156)EKFi8k)5j=aAKc3STVBQkWuH7SCTFa$+z*5IjA7
z&<^T6B{zQED-Jz<b4rg(dVu)pVqkzP8EU|mj|I*tX?}_n&;Qze=xXL<$ra34nvO&e
zFDaG@W1HN3v2yx;1O>E{9!IsQt<Rv-OcpsMysSKJVD#9&v%7q$DO96sz}Z4h7+o<v
z;YwA%AmAM5z%T6V00sck4wxgIj#`()Yl~X%dNeysJXsM7XvvEE)VKSsuU`;fHI{Qy
zjOYnZHXXj;x^I%^_n)OX;!?Bc_b$wJ#!G#|2q#9e0Sh#to$iJ({Gkbi4nfrG&@BOS
z?J35j)jzGedMaVcSF`_QWD<h=v2XZI2dbrZzvQQ4r<=<wMJ{(+GHK@>R)y3{foE%u
zc*8D#7An3~{xS=d*W>`NZ}I{O{3R)C#cYCCVL`SG<;QhmFV3JC@h@SN>7eWgX>+p&
zJw2|dSsfDP^`$6$XX2K?cCt{ypG&$nE^GL#%TsduUm?<gYb>_WL-=gZ&q+&O#7J%`
zV~TKtM|eO2t&F8^gqnTrQs@LFR=`8zvu(l3o7tjJFzLzMH23K{MR?hWTPEea={N`d
zZxm3=L9QKl;zJp{Vf%zHF>YC}{_-+Lc|x&2fkq8j)K{G-AR(?yi_yoGYPGIevOwEi
zujEgv<OY7t`^Iz4=gfO_>V6@YHXW?8&al?4n*QPSdSc+xbnmRpVcuxI&pY+gdTeHZ
z7z!I{&`{WEC`KDbfS&O0e;?Xz_h0HmqCP_y*tZ3$^L7RW9W#-*oqk*Rb7&_8KZ*Ne
zn~vmmv|DCNJmzUCQ(Ea+&!{yPLA=Jevc)}F=d{+Jr7yV-Gqky86-rb;#Y-Uhl4^-M
zish~B^;f(B44{Ox6;o)a>IwMJ7cfMa<8Rmck1IkVEB4m;oMj>oNWM#2=X3*KzS|5J
zPfyb|k6=9=-N&y)WriwQZoLso8FX7>BfJ43(D0aEf)SfCCU-^1z}${byfhEw!&6=k
zqn)%2dLnFuO<))gsI{Qm7W|8xM8m-$5mOjbggjNF1jF7vwO)bhZ`FL#f9I+LjSnLo
z-C9#}ka=|=LlJV)f?(1w-w&UVC~*RlweZ&EKv`kIlGw}l?%cq<Lx`LIOsvs=AyN#d
zoD#|=wo)*Z-o{M*<OG60k$;F^*^4$hNsF03)^%fz&#;81go)JG!Dz@M+g|?!Y3%+!
z!J$zZzcP_MUvh>|*?6MFvize7+n-Zk=Xcy#B3?*o1i`if@AmFb2g6U{k2fTv)~Ogc
z-pYaN)k-xvS#CfJW07JikSuv%DO^5OV@29C&m*5#{OssQQ*t5Eu=<QqJ5bFFn|Yu3
z93?>OSvZngp&z{yjF)uBKv87p-8u_;Fun+t-s8kaH&~swpuKolNa22NJE2|7ejS`_
z34So%7cW+}6HxzDW+ElghO4%yLF1Hxb<Mo*bmR~bfdmnM6SP;mW=#w4Y3%XavM4d5
z2*wx<-3$$Vb*b)7r6GfI;%AF4{-LPUqBPLQ9USdNkb8y?j^Zoc&lA`|iKG1+d{P-!
zc&~vu7}6Mb(`?FvqQn<0dJXZ|Hp{FI`i<wraE*RlPV}yD2tzN_QVJqhh;4X2)hX9q
zB)U0a5l5K-0reiFd2`+&Xm73qEK3K0f6wN~gd?*lM*AxcO(|HALZk|*@TF`zFuVLR
z-Zy@P5;ntpf|_-&#cal!hDz5kwEa&4R5JfUqEwj~2aW!%T#T%}k^Nw@LuTx&UNlYn
z8@V`wIBYyo#N%B$5>bVpgbTScPFUzb-zKsnmE}?WW!W5Z@3l#F=dElU62pu+zCe3L
zFI7L((j7LVd?lgW=Jva-tM9#=GlHdx=|TL~A#F;6{qifN%99@q#L}h8;V|4>x-ble
z*iucF&he_F#&Tt-ZN3r=vyG(+5X9nt%^!xZm&;%{behV7#a30kPDLS7!XM6xvdMyN
z{O$zcyS(eWm!(K2aS1<cqz%)`rlW<1bHP%;NH^8{*PZL}W*JV;22>n!I-~GOmZZl1
zO&BQBdaAY=ugk4rI2z$dzsrF)4tzT*kpBU(^j{XgrotU|NJEu$s}ug;ZHr5}p{5lY
zZ0dCZ06ab!aZxqc3x8Oy3%L)Qh_v!rF+g%Pv4t<<2GbgaMdojLQT4I=adjtlTj}dO
zH)^gLgccgWhGDMX2wx|MbVr;j1AR`?gfs2mGWet=DdJ;5h3``^X65Bi=?<D9b{E-p
zlyx89#(rrkpJn!E(J$jgzE1aQCW3xEqk7!1rzV08&nvkxqY1*(tDT*;ZOd!^g^V+;
za@t}1H`$uFI9vpnIB7(MGDNKfW5stm7@>*D`%a8ZmC7tUoBd#wSnsh|<4R>Yg~rqn
z8?S6)<^rZ<Rn9~soxgp7-ZgNDTVcFTgB~0e=F)!jIZH|^?<IJ0yoBLP4WFEkl=M*l
zI{~_N0>0bGeJJx!r{!o%TNcv4V2MF`E=*)65+;i>YHfx43<+g)3f}>VK!koXe<k3w
zy&g##=bzLRDZHZT7#4y-9Qat%3|0siu_~ybe_C7robz>xO?s?@nb463u#<$?5QyB}
zYa2gUMKRL`xZB~|;>K<a<S-hR)I|6IcJh#dJ{1R0Emz?6e*XRbb$pD6svcYo7ah40
zBzZT1)Hnu4#`ZeL^+(hpNE$0>uzW&Kj+$Z*(=wFvK=A~EvM-B?LzLSV+|JWx(^ZE)
z11L(t@%^^U*1yeh!tkhM+rKGu*5P%+@!L+@|K(E7XGlv`|0F~{1OehkAhm64v5_Yt
zGSYGQ?dA$tvn}mfg{uJW1a{hXgxE$h?LY!6b{4J}KHEpr{)D^kl*QLybK#!}<ooCD
zK7jeQRa6Ve0pcfsxjCCssSO<OsPo_F|At_n1AIWDq$9`!&n_>=N}yQ<G54c;C!=E6
z*#@nCAkv}}EO$Rz@;;DgnQ!*y0V~GIu5=9yV!?*YwBL4%f)Rnyn(L+;(B2ZecHTcP
zkry`c2aO?0VpKsQQBlnNBLnQ%S)?eHH~v%fT-{&NWIi1O`Pd5cFCviWe#c{2-=BW}
z`O^d}HNOO5pv<>^AkDq*Pm+#951ISQdaQ8wW7BHyb6ZXx9w-j8(CkV{HiP3a#Dy5;
z1;($}*jcIBhHkN`dABqP;>DSBjD|9Gi|9O}lu62M);M94X`^q9@ykee6y-gEaWoYm
zJX1vS1{en6ksz0v`}Q8}2GI!uE~9PA>rnv*5R?4KGiw7WYf+B{#vn9giox%Z4GR)U
zF-NAy#sXquVtO~HrlxooBy2s*o<KsarFk*sZgpss8EIr%@pT!%kT-|3H)&3cu&g<p
zi{>~fTD570mm3165zt{9B1JJ(^jz^<fm0br^THQbq|7A{;o#%vf25m(R2kVC->Fh<
z5co(77(U`fKsGzr82l61b9?Q_w0@_yMTVKj@Z;3{T7{1*va?@iU$n8iihOfuluB|~
z+T!b6r#pQWc`KL$GE*Ide)>lXkEo}jY7kfnVQ;DD&G#@56teVB+LBKH9Q=Xg;dF7p
zR`tfm;G}z+bf3RATkd4mV8x{W+v~PMuhzzQW^8v%F!@#lNAYvOp?j2R{LU-+`Bv>@
z;$NxzOukC0;|tKgHRQJ$Q#wh;U)Z4KL3UO@LRR3vFNP7LoI33*XQr%Rs-rC$Gqzv*
zPrbBknb38*?+xa+v_1PBO~moxM}BY9=#usD%W;Fzsnj>$^Siw01EtxC%Yd{h{|7(h
zAW=0<O`BRxP0hy&Q~;=$jx@O2*5uv$K8^$X<S=jdGdm~eoeLk(2GYG_Kyr(O(9hah
zx*_!RMYW*iyLk`JGuHN_VMEhK8pvbbQcZ~FI+&m}MRxIZpj-0Z?>endtIA5)9P8-=
zUBv<?rX3kXjX;MK{#20^-W~A{SquiQ+^V=HI<(|F*?wCw)$PCSzM1M)Op!Y1X)o?D
znfg-+2<QzBWRXqbYC>(qq9D}Y{L<d+Ft06EX@2vv7uUf}^%a;x7gn4g@aXC+U0qdh
z==({(wpek#I=H&(?YSqMzn;D<e#h_c|I0&;SSg_AucAn6zk`Wh!e%F;0)d2(nkp3e
z_$o7?>}H;=zV)c)z^ch{fN{qCJ2X1+FY3z3W^{)(wKbcG^y~3E>swEc^SRGT>6;Ri
zd5`JpgT!Q9yTcTEp}G}l)H`=udTFurgmUlfF51>XXK=@?JXaxWU+1Rxhc{W8@%9ec
z)f@fc9v1^!&S>yI%YrE0G3jk_X~w0OBjbk?H}gj~?mxv_3{HYlwd$`#@|H&IizN=f
z!uny(qe|sGx)PvgK8#+&9?$P$V)C=Y`Q2E9`^~NqZ;?MrKIR10935xU`q35M*v}lm
zJ2qWJJ6mx5<cc(Zt@9Nc*M(eO5*Qx$0B=blL-hql=mfsawt#3JRB#GVTP@%tU<Foh
zPeyt`dhW*^-TM0a-l3r%OK%|33BfgAJGw_S4JamkG*fnddFJjo%p-(<9hDb4^7<-$
zll;W^s<%B|S^2D@=%=ql4rNbdB)t}C<2b6i@f@XHPx8*$))zi%sDoTHg?#f}T6M-^
zo@UrBics|N+op14f=_RGN0*)eg{)6_z87Fmx~1WfoH6=ct3S^{e0n)CPNbq5^^)sv
z1TjvTj#~;lrM1Y(HldDxKxOvphVDGSzl<rjS3-K8sOp4L_?_3)NP{HGcy9nOp-@@*
zOUYZh9_FV6;vA@73nRZ>eW0eCX0}=4NE&ZQRHKKfm?^UklYR3?3!88{7C9=7dS;y=
zJVh)m?ug#;M$ipbS6z)u9!sBdUiAI@_boNiKuYvL$}roU9uoN5w{MAr{RBW}U-@i{
z&y|y01|L_xvoJ`RCj!C5bPDN2<DTG)!5<Y-Nkz!dA3u!12Iui3kJ#P@*PTc1A_&lZ
zYztVo7Qy@o*Z#jY|7l;pJ|Yu(Toyr!wJ~UjTiV%uT1NN=K#@gpkVQ?oiS2-L{brJa
z47U#(H6&S%e-br8d{kx|ZL)446b);oym7xHQ_y>KVj>u9sE-Mjk;|*cKl$HW6eLOE
z9Oi?Sa;DyoVE7PHduf&M|IHI@O}hh?21fvY!d^m0QSg!9J^m$5CkAGYJU0ITk(h=c
zgtCYg_V^l{V3S^e=D+v$vj6}8<^n+p`0oSb`{E#5NHC<Ak1wbC+_xn@S|lm|ZgFmo
zWcno2@}pScIuVeyuJv{Q)=nx`vIRS2qxYgc#x1zvUHlzi?9WD4oy{XlVqnGNW~^?>
zVt!0x=t^DT`_92MYdhW^x~q%C@bcXIb<%>$5y7Pq0qsgx=B=k)f5>BgP!LQ_o7hP?
zexJ;}D*y3=Kp%UuS#8XdrFrMDY8c+qg6yoKm9r~eMyL@fb81RM`}IIbvetGs5Cp5|
zH4y!`7<Gmq<#KDe)xS709#<L?6>xm)1c*30bKLs1?%#mC*ltew^uC@RuRS&WR&WSR
zOw5@o6SVj5-zWIpsZk*M%nH^y8uu-nnibE^{&`sRP`q_Q1`%oDr|Yxm#PBzi1FKf#
zt;?@%Sp>;rcO*H-V`I<e{yE$wBqV&%6aI`KBSXygK#NLlQgshheSP_s7LTljGowBB
zo4__Ls(N8^-*R)I!Z))c4mW#0sd;!b>qhcqJ{vbk(os<C{7`lE<sadSpH>WA=cwOx
z@iCvilPkX{Y7rv)_;Belc_tU)1DIri)Annn0P*7Co*UA6*r&STwx70z;$JE0(W9`L
z3y@=*52ipWlFi4xasR(-rrzMixW_GyPBqZ0e<1bW77Jtf1e$bW2{cWI9Z!o`QAKG8
ziZ8^u-<@Sj(}4L{DIhs*z2oU?IoCJnR7Fb7^wrP6%Y34!l>hB1@gy+rKr%Kn69zJ#
z4QlK_Zb+8M-sQX8!_&ZR-{Y(Xgb?^170@O8->t#XMkR)eYn*^EUX!`+R*zi~z<xV_
z2UCNFzVb-5EZ5qR(#UjxWaQX!1%6Dh&k0CO<y`WAxWi9MqcjfP77{fD&1YO~d2et2
z=dV5lgZ+6{hj^BQSq3h<d0N0^z`WUm82<gXwO^4{AkhJXT!1K9&y5%<KqTV5JWiFE
zD}u86>#m%CMt*NKhR|zwP>e>r|4atLITTN>K$TJggy@0nb9jD!e(yQhz-WEJc?1Qx
zz3dV04tqxZ2$Bc8BXAb|*$Y={R>V}hP<=Y-O*I*Zlp^^^qyy=9;Xd|ucE3QRgsN6f
z<_JidbG-VKoa_H^59-09AUX&DhK7dH?m&1q!Bt~z?Q>xC_wR(Jz;naGr2^FAo1#zK
zg7Ma1E(*1R@STgm*@UVG$m_23yYqUaKp%Pt6tQZ={ss3@CrCy=3>ChGQ21TG6uLbW
z14Jw=3e1HOBZOBv2{gc{eB!hHEH4B};eNq$@P-v<Gp^v{!9GzvRbKYH*_)|0dkU^P
zv~@Waal1j!NhL=K!gAX{0C~*v!~Ml0wiYn_6+ytrs_rIoCiFvT5&X6X;Vyv~ZTREA
ze+Po2BLUz8Q)xTPK6Ig}@A>U7#{tK?x&jazOF%&2u@Nt&q@xr0_piCnf%mekYgIK;
z@dfzh{H(LQ6!Y*vgMsQ1k@SdHe?*F#52vSYubC2TGw3FnvFJ76+f0>;s;c6(+$<if
zfvjk=;>hXTY%jzI@Y|q1ZohXh!w3)+3;}7=pL9Mp6A<b;h!#hFR<F!GOy#gJ2Jt8V
zueh@hYPyc&_>XJAGy(=w@oHni>dehUY;;lbFajn}1e`-27Ic6m$fU%jju_WixglXg
zV4@z-!6D}%au}Kv3W(^TUC715<Utw8I#(@RV>}Idf6c%8qkr3fzuVouzu)KS`}u79
zzF()!`uM|{ne_*lX(=huzWF*hY#SW*92~~dGUaD9{3+;hk**S6ItI;-!SbZ=;ZQ^f
zosFKj`HRS@@XKM(F8`^<Z7$C$qVit_RL%u7-3^<F+F;1oEf!B9VR|>E<J{{xq&flW
zZRPQZf(9bn8?Ey%zvY8?k_iwow(Ogr(W*ugn2tpG%*l;iIzBED2m}uSj)@)ZWT<dx
z!$uI0+^9i2kI#SiS385o2tfgtbIm_mTuGsK3tpf=LKbn{yPA_P4NOk5^~S2y1KsIs
zOM8epd5O)Tsi`lLJOlnwFN$aKlUgaA<e+A%olwHTRiu&C2_apVuGkDlb~Khzsk4Tb
zkp-E+_nj^x-{Ifflz#Tl0xsPrb_-4Gh)s42Re^iM2Sx;bzR+s0ELOmhk@NcY{W_mJ
zZBLNrrn0qrCln2(?4Xn@g>X#Ch)IlfPJQ3wsfzIsn~xIy3Kch5Xq#BrP{UEH!f+yQ
z!%HZ+6gFK~D(TWkFLZQtY)44k4wlJe`*L!6`YU<TTH7OCdgR)weXgH(u?|DozTDj2
zfDuV?u`_<scB1z<YR=ns##HgUA9fe}n$~;Lw5#7Z?E32nGlDPyw@dE}1OWjK)chY4
z$05pAQCXQif1#zN5Bz3nZNZ(YZ3OQO{B_{Y#hH_Y_t=VQL~`g}#HXFk%1@$wOhGXs
zk#mh=DO*x#9rD*zbJR0JO`V4QGo~f=`yqB>_15t{@yuQ2S<B<!#?e_U7D0gf1By;{
z)t?A10hxIExZkxdKvg8VGn|Z<wbq(P=;m%g)@u%f-`LO`P1xXsXk`CHgk^MT@^yKg
z){T!%Cf_meod@)K%{#|jv*+`&vxQ(l|85x>9_}$&BTGXluQzhBLtnib*f;2@dfVdz
zb8;0wSqTM~y2i#=8+X7OlBXerzoP@=*-J`p(aN+p_{p`i%eb){MhlUC(Mk!@bIEG*
zmp$G+HWCP1FfiO7wanhW?00>|gBrM!_rS}n<eb$u%P+RcOo?RZ7uaLmH`Cv9@C+(P
zPh_b6xTYJ=iR3V-i%vc;ixi6B6-7wPwWXYKx#>bvQ&TA5IfO^5^%Z3xO0|~U``@jT
zN~NPe9QFgQvJ9kW2kI*~?=pCb(6FcbzP_P!e4Y%az5qR=>3Y+0_w|7D93i(jYp9^|
z7Q4Wuy}kWZP{C&h^C~#%(Wt%j9YXGUs;7v^@?_NlhRTD)TYW-WTFZ>l$jhI-8;?dV
zz!->{`=wFeA={r-i(&pXfXl@tB_$OQjoAz4cRwm$`Jr<(#w*p<U@w=gR;#t&c74sx
zonQ&w=fV@15mAlZQBQj)VZ+8J4*%V6{Xg%nh9uhFvd1klj!<|-MZ|`;gng*|8~jkE
A-~a#s

literal 0
HcmV?d00001

diff --git a/ex_figs/quickcheck_2.png b/ex_figs/quickcheck_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..057b0f1df84221e1a1fc841c0f3b0b6045586c71
GIT binary patch
literal 38176
zcmd?RcRZK<{|9>6GczGVNV3a{tV(8-O?KHkd;3@+Wkq&Gh|Gk@455tdWUtK3WOH8E
zeSg2_oZlbkJbvfz<Gxj{>+|_s@9`SX=kxV?i@2{Ue}?!xF@hjx6cuFE5d<59AXrm`
z_;5wIcWe&+!gaZ&s7VN4zJzAs@H3H<f}RV4kej04SZ}5Ctl^@V>m6NJ4Mz)C4^wAz
z#NO1^$<EQ$&iWyXyScNAwW9;eH9q){hsDa()k&P6|NnXcpQE!S|C5XX5(HsE6lHH}
zdZw+8dl_i%oJemBmv$kn#GK(IeDg2$vy$T@Y^_uB6U_Ol%{T_N1eJ^KZl4HoZ~HsG
z9UkLKd}pN0TX*T>!nq`6!y<$GS7p!M5z$-NKE&xZB@=8U{r0SLVaHd(b1_XUZNGCv
zY@v4Hc*OM<Ar>oq#XX74CXtbaufMHGOn7*>!ZlJP0{&)i{{QKZgW^c7(=_%UZ^<Y|
zv7oUr16+LkndvZQzQ+OCz2S1t&%ZYO<c?tc4*%ZF>o=XO_jBiZ+zMaQ&qXI>-f0(2
z{r-I>xC!&+Im4rtaLON_JorN>mg{YY3IkR1nsbyB2kiOl{!pD4btn|GnEmp6vrn0S
zWZ}$@#l9Tn?@O+_<rdGo8W@<Fqlb-r&UB>+2RE6>{Qg-%$;1@pwlZWbl^18!*0*3;
zw6Wn75*9Xk<TG^ttGNK3)BcuuWXX80N-CF^z4A3d!ObQD=Icii-b*=&_p{}mJ0BO<
zNek*Pxr+O4G!ZO6${}m{@<qJ(e(T(?Us-zP7P_^bW!aJHOFV`og++o<we>GG*9vMx
zI$B=3Y#7z8*NFWY<hJ_x!FsVjkG#hHH!(sd=FBd2{6`~A%mu$lze;3OT3r16=H{m7
zc9$p?a^Z%1ODvl_&zcYa&w9TbPso|0Fc{=++I32}6Jk`-w!giwnQ?kVAnEg0*40(W
zduJIB@!aaT{CKXNa^Rg#Pt&EUhTCCeri~b^w`$q=6W%MlA8d!oJl4jVV2%P0TL^u3
z$31i#{3Q(xlupPrV|yGs;x5r#sn>d6UcR~ZX?<p;q@j3gm`laNg5y_bG7eH(rqDS)
zeqp`gR4VswauAH}^W@}Y@0|i6n(A}Dhrg@d!ZOgr7x=C|wr4Hco$w(KI2@g1G$a)q
z6hS0>{_<2hj6Y@e-@7?3;xKmI_h2bcx59=-YJZNpd{PhoB_Sb!4hxZiO;hfon^5L6
z_=ur3^28#S*g&$fvxRBn*Tz3R8vFS8N0lS9&2Vv%Ly1Nn+44aCZ1{}0rjioz6H<C?
zV`F2+8}6imfq}~|!T4uxeRBW3(S2h*6V}3WA>BhsLiLW6w2yDT=S;-qQvYM`yQv~)
zevej_Yz?z<eIwzo#pG+`%^05^&{}k-EiKMyraUzuVS+7J;!|lIQznY;?7Zvg<t0`_
zr&SEojp7Ts!{_hs4^!t5cV7)p;MPeIe*Sx;oM3u-dSiDtEHCf!d10H7*SEu11qC(6
z7lw-TlZ33PmWNBo;Y-N==h<?Lu3PTzmA`bw)5%Ke{+_QtIk5d;Ge~S?WMnm2FIj(j
z;*&~J@4BRpi--66UWU|irFPMS*S5pOdj~5ev5AR!Bhr^H;f+?>H)q}q9&pTknSx2M
zc(c9GlO1q+f^PkJ5xa2FN#Fj}V>3(3M{8r%R)YmJr1&^T$H%?d^5l$CzPjV`gn^fo
zU!se&@#l{$xXWgzV9(S{Yotis<ekWK7X<~Wm0$8)*&H?V^!&&{@m7J_LJxK^yx#9H
z%c#y9eG_;ty3j0aY`4oZcrz?--NJt5vnIB;x7Rp4OxyhA%<YE{v%1Xn%O2vz#l@AH
zG-0c3<RwY@i9e{ci-?v!p{uU0zQmNbw;<}ZW#;MQ!zL^|Xg6uv{N$p5z<tSAITkO)
zTz+LLC-QNwq?Hf-S?Zr&UyrVu^#9c9SCk(X8yoBN`0-73^|+ef{r99pRr7Ak%7)Z*
zSqWvi|Ne>k)Z?YTOFLh#TXr)px{Aa9D*a#XNgs#6=$qQw2w=4dNl2QbF5D2B*N|GN
z^Vyvl)G@vzBZI&)T{UZ`U}tBKU)fgsTH`c*>x110m3JKzgp*cSV5gmBt;oZ+mX=KY
zDu<1&ty}%MDrGP|y618`Uh^qsrp@@BDZ(U8O--nkc<-(~mp<8R&V4`r@iEQ|4wYLg
zLq%%Ya%TpL^l8DFTbhCiiln}PNnW++q}$uyZyFtCz#$+L`cxS|FrZl*#kFpKe6ZsQ
z{-#!9BsuU_4F~SF?5qsOzI^$Fh?;|ig$2RF!lDzjAnoh#7jFBrvC&v++Fa_p@0_~g
zrm9Nzt1C6{qzK$5`0ArCQxIk?)-54Q>_LeBUiL`d)7v}C{M$?LO|9gT-f9<1iT$}3
zDfafGmFS}0z5A@@cdBDO8jDLBPKX;1Y?ojYNcj2rpD{}t)e+E~9wqd6fdwUryKw~t
z1x+n2MS$DRwnR`{v^{(NyjA0#B%ZXiG#x3v#NJf!j}O)_M*QWYnOfPRC9omvPfeM0
zCS7fw*(u^GRvE2wlpiiOoSvV@V`gTCJtvTlV(eYrhR`gN*FT+T?B68VBN6<w!qyf$
zo4<Y!yDL>R%&0^jmb46fJ5nXjTrK5SG^C-@QUP>gk7vK=)_DsfCT&p{7Jrnn)z;R2
zue8@!p;9#M5>__t60`b!sGy?K`oX&Y`x`}SPfyQG|Ko!s39t7feuHsU6W%2C_4VyN
zJvvo&CkHE#Zua(OqX+FfIy%7mnxs$n?|roU`GIKCcl!(kqo85qfC~`uUf0Y*-rAUZ
zaqn|;GY&Y)XL9Ky3?9k3M%Y_1F);*FSjh8du@}eKZ-*vVHT@~XcoWEdx*we`!pA(+
zZifp~+da~4W7RHY=HJeNS(t82HHi?&-f&-Sd=xEdF%%*SL6!Z|rR3wXp%UY0_`qij
zqQ#w$7JmJj$<KRrtHh|T)P7Xt*@YYDGW`D1OC2n-xn<PWO8i+FCWj$j#3!Z0I5r%g
zgFJgvSC_8XpgL=9_bYda$<|!Q;`k>~G<ZXZm<l*OWd8j5v(xc`+q+3eP0h2*gYUvr
z)5Z0UZ)h#S@D?W<q_v9;BH%NzUR=KS1x+?iQ!?d#Mf(!#6Lpajbv}<^OOvDnq(mGi
zo@HjTkTQsL{(67s&Yh;g!NG@mm>_=Edy+e69mZ=yXWOFn{l>~3wL$RgU!8g9TTOTM
zA@11txbo34M1tuA9iyAp3u(N*zP=l1%4%Z{Xn7^+qclF8;tjyTk?>-v&#uGHE@Y2s
zu*0`+Io<T_?0D4G)xX1CNR#*KFi{MmWMEZIkk2+IeAcm$zw49H@F6qZ={Iz%oiB;l
z{XoF@NExqFFo-$`aWCv`&Q7nbMS;gXn*JOLxhNy0j6Yh(*pCL*PvqdwQZP90Y%{rZ
z3%bi-cw@C*8s&};AF>+xZr*uDcNH5o>jwX0fk$6R3k^RFt$q}=>~5L=mRR<%m6*r4
zo^hc&y}2<EN0@f<{aB3~2_oS0*KYMk8O}SM5;+Ji0A(E90=|MRH?_9ntqhm6L7?6Q
z^I(&ZV7zkWigsf5$7DfE)+<+tU^kocGW?$c*qRQdHER6+`u6n7O5_;^5fd0Fnn8Fi
z_Q5D01Nu4oJL;(2klSn30*M5T&!o&!kV;n@gYZZwXlOzKFpbsu=#-yAhTQm->iF<$
z{FNV-_6*>CVKA0y7;#R$D(vy&sYWbefg1l~asR{LqUGXD7cM++Y?S$6KRUF$yD?&!
zF%93Xoymged_PtvOf-tP{JLvU>q*J2UD)`PhC3!E1_Mv)s&j%*^PG#7kBN&5f|z6q
zS#I(BYn&%%7;=|Ne1CP)|5+X+ff)nnJM)rP{|WVFRg;NYFYSan*5O=qgT80pM1wBu
zAqG<O%-`RmgyiIH5ckb0Ph&3LcAE6tHQoNzrCVu7kC^Ol%_j+%Vx#kMtl!69uRr7i
z;G?nT-x?7SfyUD>(ag~h2VcWC-3@o1U)|}E^nw=XO8$6nPGU3gHW1eO!S<#8AzN+p
z^x)*=<i&})8}&z9%C(zc81~i!P9q^!GN79RQBcc5U;?c0O>XYYLPmggm0pcMOqbyn
z;h(i}28vtZM@PP~sj1xX6mG~tp}kgTZxN!9PadJTeDBrSOL~=d$q0*x{m(ZeC%y8_
zM94$%t7Q>{74ju~Xu&8IHFaoYWF)wne?*De7_0{#f+nW&g}%N%+2`klTVND9BTVPd
za{)%++}hqAwD;Lvqd{80NeAt-<)bV2{10~5`5%0+dM$Ob{H_ES*=(gXj@RvGlaC@q
zW+2fH+C@i4)4qE3>LILxcAfFX5CJ+;6zz+Q79ahNXYSvv^4_t*l4)sewVp6mPze7;
zck$xIhY;+v6LYR-_i{SBxU`-eAL1d8mj|wNt>|rg@fp@=A{4OI{j0lM-}p(8hdaxI
z+KG8!0IZyxZwLJ1hetm?&W08G`0;xAaM_=Ix78nNA)%qxQ59w#R9G@cVAji(kaQy9
zJ8|#eQo0LZd>U$MHh{Kwz=7`MsiqG+a$TP=l-Ov*Sw5*8XvRNtJ{;l<@x_q#?(TCF
z6B9GPt0vDOs8@zk+&Vg551?IJTVs`!G$g#(@|-~w7r6>fS^i$?Xrnm`LKQ17ugdt#
z#Kg-0@!8G`S!KS|^STGwz3=Gcb0`VS4l?33=|lirB#w@bJDVfrmb^^cKgu3qW0J&N
za%+lU;juB--B*(=CIKTxLL@WrD>M*<<u4g=gfYWS=Q)Msc^*Y0qgU-+T{LyxJ5<30
z<U<lVN84IbB{PQ?#9b8-3b1C4ssvYm+LG+AJ`C5LGA$Z9UR)+YW`JFNw+jH&9h#Am
zG4i-4{l;Y^9MaOuotIh+B6bu=3wV&w=%a@Zagj1u$F9Sxbfh+ZXK#gn@>nN^jJ*lj
zApStSx3?F@;xvVU4XCgkCWhE})w+)zg_3R!hwBZ-5H<|_?3Ny6rdjUq@6S{W8&NYc
zF+GF@uqe?x+SZX;0F-6ZeFy=Z$iu_qs{fIvoBp?#ysg3HQm8oyw=oKsVBYGKyz;JN
z7O_hLaCsISa9PUB%L_#&=e`;04d;S)P%<*|{H(NpSH1ftCnuuDZN;WIq1>_ub`6H%
z=#J)=-W+AxuD{<B`AOhO<%e2&@h)N_BHH6G)6#eh3=Ko`^78h8=M4NtJql?#J)S(x
zAGNEq&~qVs;4+ym9Z(N>o$~6{t0~9v*X<xK=hUx1dqxW2{vzs^Arv!^{;)A98bd#;
z{p(k%TW7~Kf`fy>YCi5A9DE)e)cW0+nwC~5g5|dO9Sc?hjWK9Uh}5eAyoW-79*ON|
z7N6p_M7lT-nP?`-soB|VDKb)0+3^}T-Y5&62&$stBK_gjYM>HR)eEnhAvFzlet~rJ
z1Ty4oPNET4MSsJsy#8eKXoy~US7sqg8w0|EY+u<j>w9{<3s413om*R50~xR`9~>v^
z)#X&k=&#{|Sugi6IM%97KYB0=iG)td_d4LVfrI`!9}fiYSWpFg5*&$EN=nLeIX_S6
z@C>t5t<qP1;szC9!>Rs`fY+}3?v>_NJA$Jw0tQHC!`@vT35F<={lTSM8=bSLBh$l2
zk9b8$uqp?e0drpjvWqzFZJ3%_SUdzQ3L4S<hC6cd^4Y*_HvazQtFWV|rw4zad*$_u
zzH8AyM@Q!wqZpM=iIK3a2=B?Rj@&A@TdS3j&BSeM`YOQd*ciR-h1=wkAwZ9k4O3wX
z8{cn}C)d^I*lOs_e;9gi{tbn<uqq#`tDAtxf?12)2vgdciR8Co3JVRr$j&b7w?2Q>
zq!H<wg%82P==ECzSaVt#;w`r@nXfqlIHz}<G8u3xX){<58i+$+b5ajpCv*RP%t*z1
zAWv2UZ(|RBm<VKgZT~X&KT)F2S8q!oou$4f@p@=?Z_hLBhDR~WZS^OB&Loci^y^kR
zTmW`r{&$zNYh$`ucsL^_h5`ave!wT-Z?n6T0Xosv2i1SpCz&C%YxHEmG_zm@Quz3b
zi;K0ZGUIoD{((rra`7S-MC5@neOt&uz%^$Ud$Uv49wGv+i}%YPv2$?DfYtQdIS#y~
zWb&G$eD~GZ0Kx~w`SS%*f48@9gPHZdzD<BjK=5ufp{8gcb<3cfYlZRX<Z!(=SB1fK
z@%zo~D*vO+Hetg-r5b#1FE5mTrqmd|)8?(+8Pad>=uj&!gTP=j?!FJp>9qL$VoOVl
z$xxvll9@Z2Ds1~?anTATIFyu>RzrpKDR3p=XqMUL^@|rTD9@fXb7unygl}wYECxJf
z#KGDu$*4qBc{5RNWY|PN*Vi9NfBUy@8uIeR4=RX(C_no0tZaD?NSj65(8><DojUU*
z&?(?pIr#CzCB|VC*q{KUuynyb1Gx~k4dPM(5r@ZU43%79Y*q&)niYTkxVo+kodGV;
zT<f)6>NqI{ccy^dU?JrVIl>v1&d%>!PrPs5yeX1c0vzq*<5{`<V~+TG2>?(ZmVN-^
zVEyA=_w&dZpa{G*Ua!jkWZ&}7?@?Nro5qQ8-PK#3C;JP%V0OR&zdg)g)?YS70n+?e
z8ZZSiSTRo!lLqUp;a?wpcKOJdrR)Z~HUXwk+|p$f%Qp!5RwK2z<Bi<O=mEbHp64%e
zsc>G6Gim^Q^Zu*#bKvIFC{In~Gb{yOz$oc`7J?P3Z4^pH<!rNYa<;G5jePFuxwrkJ
zxjB2_ckaT~`>uwS^W`4_o&gS?nw~b*mZzqs?kY~0uZu=m*;YH-vWoNW+Bhmz0XB@y
z$cTnSQ(){`m_MtisJOS;#@t(Bs{@9rcj%54e_W+OMF$y5@bE5&N(wHrypS~(R^;`3
zhRfL=tVr<u^N)Uql@>mmehH6sYfkikT0Ea|qv3p8Vm>(iuc6h#SX48l?EhB%-P{bM
zxOJ=_ec^`ocI~wlg8TO^As{}5m@5mh!LsKSh$`|{R-Az7DQRiL00C4ud`gxZ2kI@%
zty$$T{=G<FbZ%jRO+@6}>CuApRS*+U{c(RGBdno88Zr<5#Sjj{yGly8jEtDDv9awA
zC<<RQYAoH>FaX-~pu#3}@SV=AW56N9P_bbo@PI7E7aR%NPsH7prvTjr^;+><B_t&z
z!~g*Xsy@gNaz%MmsW0yyv#gvPE991b^6!+{k$)lng@oX)j#l04I>35+zkw|vAV6gj
z?~WIKAj)Gwmip{5=@$mbZ(`N!B5tMQBpKw`t}0pEfnq}jP_8}$qX>mKVhTYOFqJ3Z
zcctSx2<($i9v*EF8%l_t@TR*|_5lf9%#QvJBLMC&w(6z7{Mz|yL{yZVre?f&djZ4?
zPf+?$P!9T!zMBhB4iN(b1E=3Vh07P>dFqRX!$3B$x~7?vep@L9y)0z)y>WHt-ONw3
zNXYQ4z$zeD58B;RRxUrFd)Xb`(jq$sgI|6G+N#JDF()BY!HnSj;%}9V`t*Mu<=>V<
zRNkrGfpL?dVj=1h5N>Gv4*mp2N0W<ttUUuF26_q_CAIy;>>w(g<9!eUtNExs=Hic9
zFBeGnGfD@eS9Y}bk1RTqOv|Jtkq=hA*fKW_`IkJ+^V=&r{xEX$LD=Kt<4Z}tf^2Kk
z&eOd-Xh~c+Fs7WZ{wII4fe-?~^b(ncy3sCRi$>d$x?BfQNm0;BLs5B>fQ+#b5*rHS
z+zyKkKMl|+RCm9N{Wku|Lpl9k=infXVU6oeGc$H%3X}#l^7vP;9JKZ750<&RNVIYx
zRD+^01L*G3nxOltE~?|Al)+)_qneF|y%$QX_4M>|RPRebMj^bo0omiVMxLsLbfv4n
zt83=OpgmAfQkwRk$>QR??6jG>-f*&0;>=tQ@k&8ixfz0h@F;{b$L%tdcL2P3_~jY>
zxm5mhd<Nl=gD&*v-Q#!rE~f}r|1q_(YHEnety4|GYLK<m1LX7io4UHrK&Du(eBUjm
zkJ1X*gC+<hT`6;R`C+g@w^5}84II+`24|2%;I6=NO?tEL{3B?3tQDT9Dvt02Vs%~W
z!vj_0`7bfh@WCjuZU*C9P1IJatP5Vh{$ttYEx0CPA`;HV!GRC|EQ+&z!RsoXJ|gCN
z@W;Bm_eG9gl|%Zk&FSValxlWy_JHm*3yzR-d~&pXFIApbUwMhk?3*ha4nC=jv-33|
zBfLXum%itdLe7%|R|RS=33iF_>Q`!3$;->X>v}!3JX*yMX-S^(x)TELE469iK<s{$
z-s~?s-J1V~Vv>U@+ukg^wT4swG;z1DGnRe%8kcu|O#)V3ov72NmYrW%&}aN`-{4NM
z3h#ptG9av>x>1);M|*oCz~O<|Ci4qu5G^G62K-S-(B{W&uVTHSwBR8+0TToSY6RRm
zGQigSEHL2b$`YbX-isFp3iYTXY5C<q@AN(f0~1wEyB?^LD)JfxLmQEPUM?<T5RCRf
z4(jBS1K`^XQ3e%<vLMBvsVrM#9+zI&<}524+g0yBRt7b$M8G&(QMK}P_w<ArIDun-
zgc&ag$gaPC(ty42KF)ymngUxz^(b`vsTyu345M;3M6*XTEfH{8`a4V;L`hU0FM3e%
z83co9c_uGhYL2^&Nmn0HFfc^6G5L@J_xb4l`@H|b(q&-m=Su1i3baaw3-ttHWwH2c
zH-Zj)zRG@wJn*B!mKOP8{<#+<4m4%~`EFSqDMz&tnCSFCzQ(-NFVII(#%~Hb<=)}s
zX%rxH<{KVsEg<+>ZI_`i3Z>fUrbAHwr%2&A$&{a;|KsChHsC>Fp#M%6RZVc|=pFw3
z9SJPWr0<R5vQ_zeRF%~IU`2^mMj(8}dv<HJ$TRzeeE)tQRIuql0;c;_qYub^z}Ev#
zwljc4XdX@pS@+?pqzE;Evu_M&+_RRuez3R4!p@EhZ1XlKWK#<ZVNhlfau}ln04ARy
z<%g~rS|R`ptNY%J2o*G79h!bgC7EB9G>SaOD`5ILBxs4uz~Fm{u{63>>vg+KsN{Y{
zBUf|TcGL!yRyUf-(I^9q^FqMUCIKoOK&nhqd88Hh<A*K?r%XLkhdeX0vsZ10cm_(0
zjqO=+i8<Pt0kOmD(CWbq7&OR%rU_k?-hUR=KGxLCl(jLHA9%b9IPogfuOd4>+-;jY
z`70{%=lg9mLBPJ3d8`|eo);p7<XxH1n8<G|2ek$Pu&6?VYV~oT{ZD}`pkiE#OO?m^
z#PVAm{dN6lsRK^;^@-<~UwLAKYSao;0~90u<#kw9RCDy)pHF<e(0!rvl_UdxFqWW2
z=l$LN;f9k?&`GBO8MG<$*M%JHtjxmhU5BEN|NcC!-9#-tf(mpGVG|ce$}dj@occi(
z<>Iw#eFY{1FKzn)S#8WG8l%yakccQSGLi(1a<s<2lwcZ-+$FU^tE+atfIt-#6`Meh
z`s(XPOG^t95<yAr7BNZ@P+=34pd>*{GN5FZS`Eiv%~piJrMP@QkIInX%o{%*MVo?V
zWi?!U;h7v8D4ao1h@9>cos<Rmq$K&|-8=3YkM$@B;$&bXYWjrCfRzD#p`qf2s!Yjn
zB#4<$q3{+7vA{--^n#ePEGd(OZb(Q7VVw~_h%|QqHMOFFJNsVTix;h+dEWt?9g>=w
z3bUd_*GfK}6M^C|VR<k2Jbh@5^SsjZ@-i0{6_wJMC9Li_c`Poa^LTdRara+8Gr8ur
zyJolvC_ED7)gVR_LbT~Ex4iclh*Ot8>gJH4D8XYt_xEcE*$&-(c3xOXP^seqEuR4e
z$n!N;y;-3sfA+33f>DNBX838NmAVv4eJwHKJ>VQ!kTO1h`=$)uDvt{IN1(V^uvy#h
zOMp9YJt4cm1+vyf&{tbPI&6jTZO?>P_|bX(DNKR_Vj2xN#^>Q-9f`j`9{oe{*+vU5
z;1!~p2Bd@c2By|;#tXFG$%3cIf>JAdS00O2)OXJbOGc3R$DbXj+<@y*BeLLXLZb<k
zBe`t9&H@;o0fiqOH1d5Q56t%|M96}YLvW)=*q!czU9%-d-j4DfDO4E~9))Dl3ck;N
z(0case|rH^L>>7+D256S28eG+cy#pes*dm6hpfH*tawXMNm7n^1~c9RB#N}|&y4_!
zB?)d~ff^nsDBU)jjCY8#F&@8vUXqJqyaHkBl7yt>ozs)!)@qjpp$1!_^Q~a2sFoDk
zWI~ZH?ndSFclFBSvre14+I^F9VquPBA777502VEeZ5cx4nhf8q2&m`P0hG&v8aOIs
z%z%8|Tbl<!ViTf&%CZ0EZuXH1kKt&%Pcx+Z0edJ7d>$Ru|M>A^I7r5}fBEgtmgZ0T
zAF4npFan@#L58woHl_gzh9ECGh66-J?GWB-vH8jg<h0E(mmX9XAqMdSSQfiCj1e3m
zcVrzl6BX&s!4jjN1%(Y|6g(EFH4Yi2#_L^pYFq-#z*XbA#7c@E41D|?DmR6ZUFciY
zQ1?@V*BYP*5ZJ+OA;$@W!Uq*f5NQ<9iq5%RenrIvXT_Z_>IM(L{o{c@4;{R{Yfuyl
ze1&(wM;rFMHU5f*5L>PCc67keZG?iq;pn}E4_E;%qH>hWeS{RG14$OL86brGkgG*K
zO%>~63*}ItTdnnP%$UKz8;^G<qnQJa)C3i!X-1$V41qBa3P7n{uWsH!C4K;sD9b)~
zD5B9iB3ktz0*aoHB7<QNdgBX0;c3e5?joq#?=j&;kasVm()v7EfiIW_nhNNSVpTvu
z-2^Rh78qd@Xq214wDONXueS=jrngE~opaLjGX@|%AcX}fR0itLd`7ia{g|gd5foXS
zgE*isao3e-DuQ(?r$08x>$ess8SSt8tEVNB-TPM0y)*8;Hf>B7fT{1`(m=jv>vkua
zL5*5lWEJz1<OQLR35as;Qgf~y|MMl`^tm^~D`*w-Zi)~QFrA>pL|P{QJyRb2%DcaN
zeEBC+3wYn1PxvjnRIc_jI%g2SKrn~-rnU6YpMU}$5M6U#=6s-^dzaGZk^3QI>kmz>
z4yT`)(qUOn35(d5I(O9@anFWb3(kKfX1;X%qvj#u_#;2W#o4(tmz$IC`gL8w7anej
zX=%^E8r65L0V3vf2WzDm2OJO1q?v*Wn4y#&*L2R>+S**zuor4)kuO@b35bY@QiQMY
zS`Qbei+f!ePhG2U`59|go2xO10urTkY?YE>oza&v^3{3RGH5$Zf%mHCO=zZJr^m;j
zO*L`TF`tPA&OcXBP{4rF-2Z%$jg{3|`xYTceiva>Kh+jV5pEDbnhNlmq3vdIf*M*s
z04H3pp(ATTN*Mt}QPR`rcRSKBGdm^<d%|np&Zui>^p=|5z9Ki97?O?QW)T*qfx6yD
zmjyB$7)6#%(5@pH_C>V2=bsC#tQ`UfJNhFjdjEg_W21BUvuCn^vC%rJJZ&fh&2|d|
z;$rNN@uZZLYCFw`qJsZSv(FS!kY&&Rp<GqE5jGi{kh;E@MG>1`Emnib%FfaC@Z7*g
zHcDQ%eUwkV{!vKQ>|Q5AMX9*<;H#lLZD9E7Gb|R$*pRzzr-Oh0I+-Yjw^P!g_c<kR
z+&d1i=ZB?y|IY(nGgyx-pGuB-a<jyp>G5V|B^tlmqjs9%C?sCWE;~nvb}q5IE;tk)
zocewVko|X57yGMEld#z1$kDkICF5hf<cbNE*cp~^WJE<pK^(#Y4XhEWViyb<Sy?}|
zf93vqFfWL($3^|&4jd@ZA^~mH)%9w4_$rrvKW--yqae~}2@f)RvxLJ5<Lt<NK!8EH
z7!;5|(j`q-4FfSK<##!V&?sO7l=1rY>+7BynZNd-0v__@$qbY`(4xU_Emv07u&-Z*
zt+Bf;5e4>Cn+u4AnOP>_mc66x9$u*i5Nz(glAs6N^Erb0GO7)rv`nYiKyA0$b%`p9
zSvm@>D*-x%s&9qPW<)q6$Bk%u8P;U26{OU-!5<Ya(Gu+DK12;ukfG?L{UuP9pf!><
z8^Eb<jcb0*JtrptBonfs)p(6C%4a^sr-!%43en5_g>_S{AVZWv=L!a`HXC%;l;0?>
zf>eryR5?z{xw|(xD*#Dvg_7NOs5?_dC#R=BhlzHPQ>wYex87utRd@NR)+p*YA?xoi
zg^hzFJc@&Z6PuJoh@b$=$ZMVkd=jmPGoS7e9334gN&Gt6-@a<uoyNs0N*OEZbe@j2
zTFf4K3nFkz+bu#jeNa05k9Tz5MGHd-@@~4gmS0**%2Oz-X{+Asxahq7{Krni%Hr5Z
zk>w$w>owP9-J3~SV#m@%5LEbh_wHTFZw<86Y>royGraP8c!fLL(=~c;O%prUK^%`c
zn8E*e>ajd+Si8eVDnfW)C3$987{RUEw<&38pxy+v#M07I;tMyfU891XmIGOx+m+F|
zcJhR4u`T&!LSFSnDzQ&?;DK=uD!P%}yu4YM&+;sYJ!ukNRM2h%Ix8hwCx@DC7EtQN
zpB3CM`HiEvb&5@Z>(F?Q1Hyvf@gKjY@ne{i*(Dgdf6DB#bgpLHxGgZ};`<C9Rh--G
z6<{)7zI@375(Pb70TUpcmRWS2hnh4gaI~Zw9(=$Ro<Nb!1V#xU?y+X<64N8^$r~FB
zXYeuX&kbI5WQb7;#rhVwA#_YkWNKeThe0qh1(uR<Py<~lXaMk+N+o@FBU|xUEhRX@
z%*6Y*8Gej{l<Fu!Dc2oY*(rdr;Smv!UDe(O-E|D|l6!Q;hf0jLpI>?vM}eIgo+8{o
z%H$ZtPTl{8c||JLmkx;=cqV6YQ$ZD?K*w_|6EWD~1yk?IN79^eL=aG%A0fJAt2m1i
zcO8Aa7>N|AdI$U<2B7Y&PHl(JrkE^(#gKf=fkwBulvoYZm^aT-C^rfZ(J=AvJAaTr
zHOnbFOv8>)5Ftgr1^OZBVX;^&icgLG!{@#rF>k))3n*yAJK$Lgn&F~1|NCGC^;y~P
z?A#Ve1iY-2@j79PmlLa191RS7j1GB;LlAhK-1OQ72dT?J9QjFiahhxHv0wOSjy!}5
zx7d=Ma3#<e#tq=Y3ltw;qF}AQim>NDW@qpC`eChnnYtqkhEcSqpwM)eP%$VQ^PlGs
z|L{87u)Zm!@cz36qPH1g){H(Kt|moc<0lba^h&b$^t^EQ-$melwP%7Tycc*+&JsI-
zKJ2Nb)OD&PGt)UjPu6_&c{B~bA78VI3$<2Rn7Yim58*$bX8ox9E@8(lr?Ue8c~hH@
zmt6KE;#+2x21FqW4y1^pO>1hsCmXw7(vTlVeZ<^9V<6Am=44uj57o1}hIxiWy!r1{
zffbx~B!>OI#CBIiCi>!-Hx?qqNie-k&)T-)64@<u!yYlq#8JvOP*cKQJ=p9da13D&
zo~cMvY5ey#cl|jhCRi$>v*ddrQ)b`)iS+8+6Eojntc~izmg@wp)&C5id~eH>mYqnK
z7#TTDP~d~nI;0WAu$x`!?^+d&H9(i)Iyp;6IOLLlR-8#DE;i>uH|!5Bx<7n)^4F-&
zarj#dUXCNdX>^`?(AXWU(#XyhsnQ5`ppIIO0m+XD0UQOY_nO~E2x9(uhD{p&6Em|1
z>uxnc0oBB0|32p_0j4u3hBV&0xF3rzl#=D0)PJr|{(($$<*c>~Z8}+TC>F~nX^IZ`
zOR>xMY-#9ptgPynxA+(il}1QZZ$86%adgUI-FV9)>;6Ur>o$Q7tq2x1Iste+tCbvP
z?kL{d@h3q{uuS+I`f3X)^2}h^F8Qx%0es>Yh^aA*$Q=}EunR+=roU45N|m63D2e(e
z^}i3TC1-IAnmfY1D~N$Xz#ujMS)?<h9qm?eSgwz!g#=`;J%l~dIFPLVv@8@Ik*3$n
zX;KPS^@a|?Ztv`rRZt)V()epJD<=KQu7t;MkVYWAOkAM)x}m{Sjvxz2TCljt$;oa*
zHfXF-eE4D*=YAKBl%_2&YW9V^KYnh#OPRAukOShR5DAYpM*q`e4=4yh-{nOD&R1A-
z23G7i733^;<*>jv6m$OfsT5d`>B-UlU#@H8kK%h{9ExE_B;vhGW+fm}6bxaR$jRaV
zta6O*kv<)p*#=p#Wv=4|s!ya{e~b+kqubEI;sk{Zq_NU|6vf&=1jb&q=K6$Ftyfo{
zwLs{RINbOpKEvz8NeT}fqWEO)pt%<$HtzqCOc9gKnO3x>PzF7Yof*>15Q<wHPLCZ?
z)nI993CPnO=m<q=u)c&5Aa*F?54M3slyLZ@eeG#9_Aiwfv&<;$p+MMXcC8{oOb$fu
zrp?*5-YQ4quD{TtI0Hl%!2oe*lagWruK)RRhHnr^y45XH_eLgxy&hZtC^b!}Cn)f?
z@I%KfD@1+Mg_4mq|GGWmx&2E8Ehs=&MY*Ja00n5%^Wwjr(F;0(3bJ!qN~-?aQzf!D
z&z`u>3JI`NW_NI!(9%NqGt(?e#T321`#6*0R+2pqCeg?{zw^=3j`!lu!B?*H<2cW3
z@M9ouhAr(fno|(UH&S=BS_@*Jb<fGoP59g2OaCNTdm4KB2#`{;T^71&#L2{7i4%X^
z;A<yk>5xe$_?}~RK51s<A$6>a%TMGx^p*io3V-^v&o27u(_6Z_x}i}QC@BM>yNHE@
zBllL@#Wpb1D=Lk0VDW)jUWm%&Aa|3%@b<L=H)<zuZ}IL7=@O}^$jD~s7?=hr40x(3
z)Mn96W#E<}kO?0`j|{MXVh+``#YXl1I$7(j9TNyp3cX4!n0I}+oe(V&{%<LD7J7xS
z5xHol=fbu_Ca}l~s;XZ=fQEJmVZb{0AO(VpD1;h`0v3R539WS{-ybhdG(}`8awzeN
zUcl?*Fk!9MTtNgRBx*Y&hu$c@5U}ohqvZELQ+$HA-TRWP3XB-|X0z4%b7|s~LI77`
z|CnW1ZO<~}0qit0Nx|YtA^iDx{vzukWG!{-WhItQBu?TPhaW=nV)8$+V7}4XBrU9O
zIp+`791BHUpa4DI>e3o9(%xyBVZoX#fbbBg;>=16RA5N@S<(no%rfyY;*D)nWQfAI
zuWlw0;q4P>l!$i%7#SC_r0yj3B&ZByt>+cRLq$14k7nF5U1piT!L0P0JM}C$Z*ehb
z8V`XqUJ9w={vlGJ-1Jxs*V#We8xxs=Nd^c>1o0M@dEqrKX3c>S@9vg>A(3ncl6wtM
zHZhtPdRZvCaTK=hQ;(;RvZP`n%+PR`Yk^ds6Ht8fmWYlK_ZIsVG|pYZdF#P{oyK|V
z2SH3)#uDdLiV~941?cYXf0Oei>a4p@QoA5AW&Xh$!1~WS=NoUbbl<kd9^%F_;nTUz
zvXD1$IJ1oqtA?nuqd=%Sh~d?10i^RGKD^J}Z4gVCL=dwt7x?K2TlSw92w$2u<%C>-
zMn4%f>e9q8ZUYkergHiu1tv6c+AzzILlE{#D#k19U%G$0XHX^_ayZQ5kC%E1X8Reo
z*FGtBM|E;BF1FJ(D^~ieun|$%?o3~wf1mnQ;PB{NOj^2t9tqu)g314RQ|hY(2EmbW
z_P3!H_&_g%W`|%jJGhFIMt>fZdB1&~A|x$UobZhR&fB~A>E4Ne{PoaenJf~zL?VxE
zIe!gDp}T0$HNby@3Bl;okC1Vmp%lYLH?hOj%0^H?5Hq*>R1S;w4T58{h=qvuyD)08
zv%VLEB@unW@OnZ;?@S22jI%?ou`kkSd<H>x6AyiR4ynnx%fTF%b$rh{$vh9@m@<Qy
z@umrhAyi_PQN>qrhTu+W0yt3|g<so<IJs#bL%kIRAv|&J=?>N(oog6+)Nq1O{Kq7-
zpcYTmoA2^E$I^w16rN{`g>#3#aWq-XfGA9X3SHq`+bph?*I+W1mJimkGAk1a3|#R>
z9nG(iD6r$8wh@@cF_kq)0oLmxc+M8v-3>kuYvL}r%D?yR@ncqA$1y$?#lpcfc-$0p
zx&{-7gT-JyYj*1+6IVx;C$#{Ay9cpfj4DZi_@9SzKP)2xU?sk%_!=(Lvc#T+&mW7w
zOW3H^$R7QW`X<gDwm?S!4eWU67%KSV$u_QX`XM8@ce_=(kQ7|ZiNTo=1+IT=#F%sO
z2kzZIfudbvbFu3vxxzu$(wgH<rB2hCjdNg_moJAXPKMo5qK2)Jbq16d5_Fi^hwIMG
zMj)5Xc}yE#P&NZ=gOMJ_;Q-(H7soWa@|u{lj!9(8a2;aBQB${a1mQnT!Mx~q5n8~b
z6uXR%nQyGXq=$K@_G5XA>QpdKsfYx}QkTbLX{KOYww^1POY5@cwSl#2*9!WoNpr&8
zZS`xud>oXSP2*aI<f%Iu?fZbLZuPp-orHEZrja~POU~gi$$ZPqBRD179Sv6b?L)>3
zX9l|mZ4Ai9VTBMue0aMQPZ1ue0FEj<HNbbyuOT1o?~@=-2PHS6c*xWes9uU=WpUzk
z$2R`w1NvmzhIVYQk#1v4{Ft=FoC*vUOQU=;@e=@x-BIWxgibUZf|c+?vROKkX?Cy%
z9LPBfPdiXM61xG*&#S`R_?f{}r6*_y(_g+Flj!IwTNYkmpy-DTc4YPgHgk4|jd<{r
z)TF8Celh{YR7&Gqtx_k#6)c0^^hR%fzQs`b=^aU}gaH=jU0_T<_XI&qe<KCzd6XZL
zDJDN#Ox^H+B<rGHBOM=)_3Tq}pk#w_l}A~4dlcBl2V9;Kj|cttDUjR=b7}#xytouh
zur2+->8UH<NYo`(R-YH*$X#XS+d#<Bt`)RMW$e2dnjI<YZuS$`&YQV#N$)#(j7sBN
zVP~EU|J3~S6Vvqb*e3XC?IYC*b&=c91oauz1L!Ibh0`2p8oK?2G_aweAv;pe-J*v5
zWgo8ZjTgt6?P6Dq{SBUy<KTaPt;TEj?0H-Ir@~X&?z_vRnNTZ$`YnP$g%@(x@ZB@F
zx{KbBnq0^TvM6222hP>qp?iy226?NshsSy81;iCg?IN5;Xw^5FY78VKBYOh<1CZCd
z{^~(lDMiQ{?R|&Z6{Cdb{cT^+5>e=dmb;*>5_+Rbbv8Si?j9m36ND@K=kPHe?^g}k
zSwC@FU!p)ZcECBnUFHUWgC@ns20{U17K(dSdq+@YMH?W{;|G^j(`3;02((I7?R;lE
z4?3K#yDgucnVCV4!5B1j$2y|aDbgY2rZ5S1qisGZ(xI3Thc(CP$tng3_<yU0jMPKt
z4O(L%W4wNUX2smn5>9exZQt&0`XfizLdqgmokf~i{lEwhVoOJ-c_JO_Wpriw=H}+6
zKtx)Evp5z$a4HO~aUhM*v^vPb2l^ZcL^2>A|Eq04Me5y&5ESB;ClIabI0t)3Vt|Rn
z^0TZ>I&U}r3{`0q+n0L-p^%-`a{dF>6v)D88w+$Hcm0Jwpd$(!DK@HmiWWSfTe~eg
zTIwl!UJRNPxi^!bP}7)x!($)=4zF<ek!w~ceRbf(no9pv{W0~0V-G9k$Q?x&6Kp(<
zradoOGpv%auOD<`!`r*l`~i+lI)zd!SHf0;R?)ZoAHb)?%PbaUaq0RCo0)8nV90&f
zT()A9QbIYfCfxZM)U>#)S*`AbH_$qm&rK;gb1)J9tuSs1Z+A!&!J(s|K%n_ecr;Hz
zN$J`*;-6gqQ#L7#!w+0^t;q^x@mTx$Nmo{@_=8#_^8H7{lyXn?8`m2NAiQ;1<H$g@
z7^E{+E-s~VNvJ@ZnwrvxyUWVrcBhF2CnY5jUkGFD6he61nKcQ5N|{;2UcbMFDOwrC
zMST7KTzl_4awe#q9z-z#Ny!T!=xL@to}HO-+L}{zOGyNf_ptRT4SJlb`;}xQR6eJ^
zCU7TRf1Cy6a5r;i_vR(!t^$zRZuun~-CIXtl;mNV#nDWneeWO7u@2*MILi&zkefhf
zK1lelrC!0w5yS*qgweKc5T%~X&6#zk-=Kv`<?U1v`%;$$RVZd@F1oS(qwPi<`T7eM
zcPp(B8aKSWjEG>pc)`Vnv$!<<RH_K4Z0!dP<Iy&U_lLK(4eu#q4gU)zy$aCEDo8b2
z%PVpq6|u5n1E<*7F(+7BR_Z4x{gE#4Q2+s<CN1{;*)(NEN)`-$1<A{fyInIO-(|bs
zJ`lyC)(WvW>r}Sz6kEmu>DUkNvk2mwTYIo!Rv-J~CbbwF8c_RJA8*Pk96!MGLrNy<
zN!T1Qb7o)9<B=l8e0V+czZibsXJvgaislzUox$!*GQXb-v`jJ>F%>KHbT?-xTL0yx
zQ={RwJu85GtUfo!w6(R57vBC4_yH7pG-hZ>abBGPaax_>B}jbo);U3#m<|~O?B&&o
zp%%LYLnwMbRK4EULXK>>Fai|f8!|%-2N4L5vfU_CR_+RlccS)v8zGHM(ZPN7oE<Dz
zOKO2wjsJ*9?D0GbME9x+7jTe|$*;NwDcbWnaWa+GalqkX1^@FR#(~(FOJjI`e;jNC
zb(amhL*=@gpX!-R<qBa5;wa3;qj6o9o?ZsD-IOOm)qNUpf%-}cLFpXD_pY3&zcJrH
z=2VS<@sxb95HP;yfi!|BG$&gK;5%0pw+l#sWecQNt)CAXiG`6)Gq9!G9mvbdvuSZy
zW-wZphaf*N;?6Ka+E%V7I`H;FOdxsCW@}O3q?OqBd3Zk8PNIY6ad;Xjn)6vG7BVNg
zQ7lpRkkx89%d$aolh*%uMU^HoB77AchjKb`cD$br4L(LOU(cPBGS*L5l=1_}>xmi{
zDaGJ2JFl%n(1qJJaA4#l)+`LLy}a5ue-U}+-|7#AwFUnL`$pD&h)*S9NNOZmfYH6S
zR)!^a9vZd5G7BXBJykd>Z{#>jcp4FoOqTkdcXhUHdC;Y4|D#E_e>U^S`*Wp1kUADn
zG;iLeLV+B7Aj3sgPhWI3&M`jK#=c_DJ2ivF0!XJD<qxO~Xwqf&5&8c6L>$3n(>hLn
z(jd|@;L0kn>`G*=no(&SK~p`TiGS@)_`)<+W?2KR%_7Om5Ffv0BF4RKIW5}fZ<X{a
zs0e{aq@4$RrL88Y3!T2{xzYm-WCm~FU!xoe6{eQ~NTT{2U8%FYc<Q-hM_D`f$<yzB
zaqA{g7)%oOLn1}7EK7-!+VcTWFX>U9CKrXB_YlO9X~OOO)MLa?%n?z5m3@t<<_vvf
zn#3a_A1XCBbc@ZE+eLEjxpd9idaPk1ulCw*K0Ca2ZdBFTiBOAKTLst9?yQqLlMePp
zXATr?43S}DVG=(HH~oq*+%)^-jbLm&h*L0*=5YQztH(J?ra-E2>2%r1?#f3yK-g(0
zUD6%tG9eb#p>=gw4dXbOKrpRp3qoD>c1Z5BtZpyv>Sr*wynob7@=TXfQ_}DPMVXsO
z(hJ9y^$2T@wugiYgJBhHQz}=%v_Us}iL-#^O17->J>{&)#T+6!WMabk!Er<Eu3Uj?
z;pz#6W`E|1^o>M6csgu!+$&x^ir`P&Ow}bO_EY^j>u*P&cOvS-tnkv(0PpLSwLfVP
zbVr}S+P<s3s?W<$UT!b<B|!7J?&$H&^<TsRj*<v0{y#7H_LE29aYfx&7qoPqNh+XJ
z8#sO4>9@0nS49N!Hafbv4^OXQD(_(Y&jDc&s})n<G3^2;s!QcYSVWCkGV&c*^MH1|
zxm2i2K1ijk(rCaDSwOA7Tt88?DAJT~5}rl?gMJBv);JOF#AzfE?R%W4CsV*ts7In}
zdLDUCpVaP!Zt+M8R^h^mbtNzNaQGF5d)n#|;RP?RIp#zryvY1?Zmm6#MYbDdU+-$O
zF$@EI#K4$5O-v3=kjCDRb6+{nsU?(G90teXb#|L03UfR#@;`;v_<}T6*TmMK?)C6*
z;}C+0?=(oer?gPO;@VRVOD#b35AqR0Hz6^|g>3!;?u<>J!@0S4UsEjl{@V*6&)bt_
zZR*YcGx|b<(Yt3elv?zrv*O`j6{dJllfXwKg1S@p>)K!$I0e*x|C!gDtN4{9Pg38J
z_^Jdo_GFzDzM-PhI-@^~U95F|cfRY{D%TZq$`NLCVM?7PHI)yfC_+YXm@%ogO;<Gg
z`&InbK0haI8Si`>e~n5DPk+Tf_@@zqX>gb<Dqy9IO$R1WfV)}sLG%mlOXfid5oc_}
zny?=!Sep1!hUnyBK5-wo1ld0|IGpL4Z5iNU&oRGA7+5~6zBv`9ru%r~;iY<3wf!r8
zNRf&^EgmLW>%XLvSKl!xXa7(2U&77ZK0BYq^-pDXr*~>U?TC(5r$&0+=+-_84d;rj
zaBEP+h$2&+6$4y6XDCOg(V6@t)h5C+`Gj9l>Zwam{+s!D<3?gnmXB&Li^&RCKputk
zqHAL-1HFRMw}Q`cl(BWN>DD9O)I7>6FJc>Ww?<B**RC!TdUC(3FG>9Lvuiqls1ut?
z45gN3Uv07RF&1t%Q$=n1N%o@lA~)P)--X7^(v!UCt9*uW)V)r%_kb9+dY~+dC2=#-
zh>UFX;YPdP$t5Z2!XJw!tn!>aj7d!u#P~t%Z=Bc#Vg-TQ^%WG*jN15kX?6uha4C9e
zHvz`@kImx;YFdy{Y;5yMBAwme+Ey4;O~%W<K}|+>=)D`S2|`$`Rf_-D7=+|BV6bs7
zpMQJ(;CGW;g_7UNEb~dDM;?{H%IGB5enLc{W+3TQ#SJLN%#pq!LuO}Z(cdM2YI4v%
z1l_9TDDz-1;KW`X&Sg4Jn@atC;AzGT*DhX!oo1{QW_WFE{;P<rs?0L-9ojLWG4Kmi
z`Y7N`9N;7twqV<jUq27cmlm2kd9((c{-x7y7^semzN;zrwVy!XyF5AVg_u-+Cqc<i
zHpFtW?rd&|E*$%XzJ38OM^Ir=nGSlN#Xf49@!t_``bvg{b&p)<kCh&6;yDSuWq+^z
zH1?F$63zfizi|&2x10C7g8VT$Bjhn}f4&>Snu0S7=;=&oKDZ6PivbGN7Kc1~@6jH)
zz$O#4C6A0r!fI2utgH-b@K&JTY0;*xT}@y<vF4@gt87^?dT;^fqO6*WQ0#?x5Be61
z&=)tEkG3t$X(5Xif))*DIH2i8QtI@`C9(&4VkpsGtU|qVr<oQawDlg;;AXV3^6=8-
z%a5Q6t6T3|E%{Vsmk{d^*RV%=;BCu-zjy%kJwJcK*@kBo%6f|vk2l87FgC0voeSG?
z5AdwH?T0j4_69s0?P%btxGQUB{Qp5|Mq;4Sinbj?3oV@f=HA%>Z8<jGaA5b6aDp<c
zmhJo^fffzG0y0d{rC9?*MgzL{kx)XXtKPCiILlLIv^X7i!*bnf2TxCbPhP(IR)B>)
zYdP<M<^Myy_pDHG?Vmnnv;z*>iu8V6UFgXmhhJE*6V}x_sPFab*1NZI{+FW3>Qhr4
ztDIvVjVo{;ur7uFI^D8Blpn08gcmU9ok1KYY9rwE6LhuPR1)ChvtGSQiJ&Jg>|lAp
z6z)Km4z%|Wpr_z2F}c9hpsyk70NNsqbJhb3w7ukRy&PU>W^P$5zUX=KI<7%ztjiSP
zB<UAVG*;9ocw;{QaIvo$jWNlGTsQL~pL<|Z@a>VLZ#VDC4r*68U{}g*a*$mNsre06
zcsP}KjCZ$W_%^h0*zC7IUL7Q$K=411e8IRkp#b(o+F?pJ&1(``5VpSKVs2@E4fX7L
zt$f{oUnI)7{`uy3%F7zZ-m^)SeutY^t&~%|MQKljJyqBXqW<5RPufBky=JSBz@_}x
z9?f5LY99DTsWfg0oT;_jBYRNrXI0|eu2#d*58a(JYsD#TS3F$oZy=Q8f>6PR<M60m
zXJusxjb>+MePvu#dkNKIw0j#)PM{L|P@TxOl10>xsFO3(eu5^Yy_k|YZ_tal4kYGn
z4O$ohrCjxuL<E>JHUjkde?@}A3my=)oS#%B4sNIlv(VS0Z+x+NMPW|7Q1vy}0rK;`
zgub~-kpF^%YN&rkIAh*}T`V{&e+P~}475Rk8xMKLBzYb^Hwq{D;fTyL=t7%;=1r^q
z+%xWLV-av*mblIVI@O`uB{rjn<I{3s?TcycCj<<GUo4x&O$G`w2JHdZA4y)NxhKzr
zJ(XmA`>k`bcJ<c>I>Wi7nW4_{^0$Z^Vkoq92E}opj$K@wWpV&r6X)GBfFe!9uU3I$
zkzb&uvI$L;T6=CIxT}l<shPpK6Sh=SMP3qX-*oMsR!F=*`q7$ola0|!?>HIXHenFg
z?!)wTn*A;6-t6RqQ+{&eTkcE^gw$H{@H=AI3u2t3LJE1#9M_h*8_>G>X(34`8U0DS
z$Mk7W@Aa;xTE<;E|F?t<lsm06{klbc*DxGmW$w?Hwg!%?m>eT;9BDq(rYS46MfDHc
zN#Rq9F+muA#ioPp_2&fnkZ-I}Zr^(EG2K}&LYIac!P|<~3SD0Z&5r%6<AQE5`so!V
z|2=$Xg}<?}pQ^jBk!EpgX|=Wwr}1fb;7I!<iJv}gXRe8OzA&X0Gf>maY>?(%N*5$B
zIbwc0QLVq<k+qr*zP`RVZqVtlIL*A?TJ?hI=Iq0+c1_77kzs9DiuMhfHUz=n&060c
z@sjgQye65UzRSHep6AdNMB6@flps4uB?u^73e)}ieeUPCZx1RYmHj5vMHbvG_;MY^
z47KiLapl@$7pFb4m_K)+{e$tVUe3;!sniJq#4X>lSps5W)5C^E(;7!l#eT<_{|<la
zi(-b5Z27>mDbjCvU)eA}yO&|_*uS2xqR@JwyIGqzS>#ZAh=oF9B(y2BmviG!+-*;b
z5lR)OTr(E#zSF^SC-p_<)5P#~)k`Li>?9n@rKq$R?65hk#hjUBG<bt;W=@SfDQF$v
zjQ;Vl9`Ul0_Mo-caSXM*#gOS<=7qO3p_Q!C_>xm^wn2TZz>xHs^H$kPKt%Yb<kv%f
ziNxjzXiD#@)(b|9$G(OzuUu5CXe0&3a^?EMRJ#|#n-9G_vj(E|Bop4Xp(6;T@=5(R
z57VJ1zk|7x`o!{Qb&=w0DtqGRs;)T;Ww*I(KTE1vI(t&RAi+1$j@&Pc$V`A%(FFZu
z1*NXfkn34~#qE0!%et<c{M*l9uok;VE-X17dE)3Kj{=Kv+vtK(i7|tX`_$4rj6b_q
zJ^J5om7eynzp-2}{j5`FwKGBf!=I~(Z=Pc`xw}uDhQ9GzM>1-lDfO7){?5zx#qEjA
z_sF;L-|Xopd%A32@%7-s>fxJpy(7DO4W->aMW1TkMUuVnnT%ezA*v}DkV~IZ4}!!}
zLNVPBsUI`zr>~Np%oFfE98O;On3@_bDw0;JT;1UgUSsf+F{szFgpKVJC-RKD^B~zq
zcl6y@iG2!|opzpy&_Tl7UB<ekLWk&v1na7ZZ`)hXJuSak+ll!P@|DPIjsv>1)*AAD
zn_%s{{!3TjLwf7A>a!00pY){c1FC%)hItEVctRR~KVvkgjg7KSy*Dy89K)4S20E+D
zH@aJC$uIIwr1|t1kfF_9wo;2LF0JuRLdfda4|2rmL{8j!J96S)RjAKZoB1A#27Q|P
z8|j#UwwBqhuDsPPZ`r{M1c5OvTvD_tlKbaGC_jB%oLbHj{KlAG?@geBV`Ejx`*VlD
z`8l@ouHpSE=sx?Ba+O@mFEC&Bi;@kt$eBtehvhJc-$#FS3Tk-!&7$g+MJupOzOiHw
z|Ha~|@e6q}@TK9=%;Tdi`|{(cvlh6|<**W;J`=xk+gbduk!mD}{$F-~zju&gb-p=o
zG*jg`#WQ>Ocv(dG=56;^l(ddE<vyoX3$+}3IG8t@#bZ-e2`{mPlG&Wc_rlZ~Ub?F)
z_5<8|IAW?CeBI7eJSIoO6u*=`GW%3EtooCa)Rm%JC5o~MVJO|E2zTL3!T&W8g~&@W
z2CW;fKM|KQ7aeXL7g}G?8pCNakLJlO$oY+<dos(Z(#Tg6B=shjtpkf?{0+IuX5(aY
z(L=1v5}n@69Eyg%Ln^)~sjz`UiNd2L^-!a7p<Q(yvaDqc=rpQMlN;NA8;hPXj(y0Y
zFXWO-kF)2pr3mlCT?MttaH1`<?oD1wA-y3qNrc(9)}L097DQABGsy)1e5Eb=8-HMm
zF;I4w*aFycH~eOyQ^)nr=4`H>lhbrRj#0B<cJ@`Da9qMnRu+~l&Blrxb)C$jOK%A)
zX@2V`{`D&>+L15neHwnYbw>TZnwi>2tRHnVD+OU-hzw>mxuY{}lKF+;cADTqXPRDI
za>rQ4%FM%GtAg8eY3nW}c!nBh9c`_%3-8`y62TXsquq!g&QngH#mqdM8a`jxX=|ED
z7R<LddU{*bz4@a*{nM&@*Oq-9&$9eBVUnk?Cnl0%wV2N0A(D-%U1dM|z#_l-_oW3D
zW|~9uT%&xVc=P>bo^IU+>*h}^-|SqAc85}UbxBNEgoH=^9AznFQ)RGY!u7H)5w+01
zvy5&DH(L76<=U**flcSG+eQ1;sIlWQbemeF+45*A#N*Axa$g=u5vt5W{tv3&I;^TE
z>igb@F6jp8k_Ks{5kWvgq)S>FrCX5hMrjZM>F#b(8V=o}(jg)F%=z8#eZ9}~7njIB
zXYbiFYu2pzexlDi&4X<06d^)B<vOPKm$HSL^xe5n(*!)|Iz*_F`)wIHRGJ7ZVCXim
z<XY;lTI^{};@b2%ZdACiIr&-Zpm1_U2n|Yk|AtmxpJ&Zf$zr2ajF}2`x23Ruy&N4I
z5*g{oUuOn`(NU}8a=I$CF>*L!**MV{-a6uT!W{5-{|C}g8?KfXkvyji5-(JYqjH_!
zj?u-$f-E2*qs!0IUF@dkTgC+S#&)6Z73u0#ZL^JP3wk=7v9caij0jyXu@JZYi=s~)
zS!74!O?Gq(vZ;)Rn8MwWsF2fH*c%Bd;Vm?m+8M|?JLQ)-YH@4M^YudQ3O$&1d5MG^
z;P(M{`Q!!xZClkZGXHRdWHz<>I5+Ip(+`OSnmO*Iph3U0Y2s)eet7uC-^0fQk}jx^
zL7~%wFnd^;posMT6coare1T8)7~`C%29t(#p$vO>&YLTgl|?Vom&U8ybE8%srrVtt
zrt5H4$kU_H#ljL3BP_T#6A!OtML8nnXX~ihaRcEKwuxu(&!=2VTYW~Av5v1vZL!zx
zL8>;y+{bMG?|P8=SZh6=7>k%3;U|j?HJ(|4*E<HQ>$@9KiMyh)y=`&0>;J}LF;a(L
zGr|<{CFx;`eVi5iyc)&#+1cah(9x3Gm2w5#?gJ8CcdlDR&^sN<>b=(53OAuvWS!lt
zvkf7d&JUGw+Ggu;sg0;!GTx*9v&-$@PYp{Nv1po71Xan;J59{HKNZm58di51Yjhfx
zhz0(t5cqa5mgajLsuhuDvbrQUuF5iPX)4A-dAzuImG{n;+LiKxHswWWXm#OsZYJu{
z57nZo(k(12?nRE~jy+q(yVg#e`?~|qPy4t}&Q~O(1Kb{S@&_ey2BNG}?V!l^aZcEG
zu~RQ%U*HTAO#7O<mCZBVOugW>wd9g)<XQ@PnYL5CUNR9xcRNojV*cIgX7Qi8zyDiq
zn)VWLXe~yy$hmnmvaum^AXz3WJ|jiGyos10wmbRjw@uZI8No;Rf#037vg~`l=`qG9
zSjfM56>R6lh9xtSo%0V8i&?O&x3_I5m_O*kBeUh@UEJHDGc&s)%xRqr9L=A7TlRG9
zV6Eyr3=8VYPAny#7hm4IVFGhP*aF66M(Xjf>o*ZMdkR)oqG}CX(oL6)iF}EHcO^Ha
z{xY*=Ymc_gSEod$aUpW~mto~D1;_NmN{8NGVceH)x7E>?tVwTceeBA$OZ$+9Fr<TD
z#6&awlBXYCBh_!w=0y3CZBqWWwVcC_tEtpR{D6HS7A~CaYJz?0#bGD$IBni#{yUr1
zxt&?3LPjuQ7Df-*f3wsGy~=M~oN?l8;WOOy;$36Yipkgd%V<#EqHnQ_BNSdEdSCxf
zKvK%}y_f__pcY~H!pP^(qYSId!C~dcg9FF|yUR&IvvrBp8;P5cc~nScTMMR?VKEG9
zYgzwbmw`4QxU^TG-rN<$jvtu!r$QFL8VR!t$`uRTk75&mo}ct_wI6pqFgjYVbcCQj
zKD=wr+6n9RRkTv45ke@ltgJUJEJW~kY$|*$;+`7+$#g-MSkwcKi?qC!iN>`P^kZl9
z{%^;T`Zso`kL=@>LZK%{UH?R_$}$CIt5n#a<;s}o#YRCQ=Dykf?@R?-?$m>MCG)XA
zTb~<^eEvpUt@ADNsWw|}&Y%@5SREa?<XsROC#U@D*Yviwwx&WEeuvWXO#56>g5A0O
zIfpGTs`g4*pS_-We|q_|m}dOahD67@+9xJz3tKTRz^!&qC})qOx0XNR(Zs-^r0561
zRUK2UL)SLp&8zW;x_d}g9e)=|9gpWSH$~D)b2dfrl=rP`pLy@~>fh?ZhC*@X^sAZa
zGEjP8LyG*eMH+c}Oy6y}gqE&$tYr571YD%8hhYo;&b006YvZ(;W`D0}g-R#>WgoW&
zBK#l)44YZ%`8HB&et<UNl{GUAUtjy+VIp!p)}^P(N4xn{qBPz%gUG#VPOPO?O^Wh_
z7nTl0U|7vu4_)6&>z}qio-o~wOOx}KbeX3&G5h&8izIB+s5iaip}JGOPdM8n&5!!;
z-&Wk?P?Q?7nRY&l-OSY}jVE=Iei~CHe0(S=8673qlioRoYush+Jdk|13lnr48zF8A
z(Nh)_Uz$5is7DhLT&&(79BIn4{zS)}N@w=)%8t$qt@OwHf}eszBSPX_=o*<Qgc#oC
zlmYi|B*EIh@%1FrwRe9Hp#<v8j<h;w5WC_>D2$9W=?E2!n)U8Zp_xy1y(ssWto}I<
z_c;GOxh(Rw18q!Y?-$`|$M2zJ9|Aav??u9E5q*td0*NVNV;7qtB|a|<X~_(oSn#Dy
z5<D-D9#$7x=yH4kR_JGC?CU?VkDCOAk)5Oj1lD$0!_U$B!RFUnTiP+0^>p!gU?8b#
zZjW0>-+oAn^@4P3?N@Ir<cx(JARkDGom)7+?kyc<hki0GAowg<eQU6Lz&zVXI4E!a
zK-wY6`X*Q8d^Bz5?~s@EWM85K^PFdy=0x+ncfiJ59`kFrzB0b{1>cR+rH6N6Xh#nn
zbW2%xk+wUP!H$rUs-HUg)A4na>mX<!q|{Nuuf{-l1ec%t7tXe~UpD>u>2y3v)(<VH
z5h~JMMq*oNOv=sKdaSWlI`n_LwIn|&df;h3^{D&mWJdwV_qD`<*SoM)_`-rSMmby7
z&6q>eLC9H?A*-uPWPjzPdfPYsDCO^$vUHF^^{+}oPe1V{Wvsj7QyKsLktm7T$w8~!
zQ`y5~67-{$7alIBil0pJ<v+^;7ejR8FQs8cEx)pvF6>I?W$m(^ok(`#$XWXJ1U=NX
zI{R>NAG<Mk!gi1<<Hg1R>M9b1n}zLww~?kzW`0wep~3jt<)uovU~hG#KGe0nDLax6
z-pZdiyhoDfJte{8QXH7dGzso+PRikdN7&M&0p>@ZFg8V`*NLW{Be^0RS^C`sADkF|
zjY{J?e9%_y%^pmn8n>T(y_`7#1!B*rdM5sPU7Bo8Qa^XH$$aCs?^3zgUR#cWJxiSu
zop9Y-w7S`u@A)*9c~RN9rUrM!BlVl(W>iBW&c>cWYwyST>xi^5R;%a!tiESkY|*=E
z^v#k<`}jmGL>CX?mffGH)(+_BD=hV;qSTxujU!m^-a$WS4(4RuW6zB$g;K<$#?$%A
zDbdNRRQ>$0h{fpcGf;y)mxg|+Q{aQasaT4<cUf(F@0GK7Q_{~IpnQ*mbC|uO-;sC8
zu(GNbdEt?|u@XdGIVciP-o5MXRHv;+cYH}`P_&1O`)pK)LvlsRUvkK<2p>{ZbGC+=
zr2a0OAZ?R$;Qh3pNr$cXC+CxPdC-sUb#*;K+>}@5`ve{}iUmWcoHG<5JHUE44H3)M
ziHpP%^_8Y*3$Eocw;CNvpqOSD>1}oUp9}l*^_kyRKD!9iC_2_X;OVH5m|s=q4E;>_
zXsfswH*SuA@s)Yprici2W|-SoFxg#{J#d(rDE-D3^-5QtYHT|g<Y|^Xc|KlM9bq=7
z9kQ`s#&2nA?~a>7m=pKko=zAj-@18mFA<#=u(IaOYgp_^T*iu>OQp@W^ohfj^^+^5
zjo^-U6nzrMzRQkgmZ`W2*PbrBEv26mlN=i}GKa2b?*_+i9OGe#8%<7S{ufL#yOgLp
zl;F`Zy9jE&X)tl_A3xH0yWB<Xq2cEwtC5bncT~)Xq1Xs##E`{6>_^(p2|u3sjHpjp
z^nOKt(O4kjl4rcdPe`^M!#t<P?YLU#K=5sYpCRzR`$n@$yED}4!l@_q6yGFmPW5`e
z`xLA$CHOVy;Wg}n9qzm@)U$z=HO{Hm9TQWQx%+yn|HhRHs3%Cfah_toJ-7!;30U~G
zfWy(3zIzR(149SEs=saTG&dzZ5oa~G5pG=UlnWw**$||@PpH9Pd{_?u`GIZ$n;4lK
zn#SUOTRpdg7bagYrO6OWKzQNKjGZg6km_!{A=?AtJB@qRF`bf?{(vPJ2k(Shy+>8Q
zxwfH*D(~NRZvG5B!X%ClN{<9pg#`5XdH)F7)GfRo|LQj|a(r}E*-}kmmDH9}4t^>r
zl*124&4@_ZBn`LS?TY&TxEw?-YI^1V{bZv3q7dn5RA#>EoqEIJgI?)^ncG`p)pwyZ
zH=Db(Mk8%{P^A#>+1*~|TB}u>_&r(0%acU{`7&uz>3k!NZVT*_ijfwUJoN-`(?0f<
z?BGwS_HRn`-@ACL5!5*E&_(a1aO=(%$hGYqa|wx)I!VObq|I&?v>)K;x;!F#$QY|T
zLb;1&A*!brh7G6W+xY3k`)=NN{uEhOSYXiQ^7?9XD8QxNeqxRBS82lihWC?;KLnM7
zmaGh<84p!eHe{2Ea~oz3j!KsegM!Gr6rX)97U2k*YS)F}k2_Usm+AF#67P-58DQz)
zTF%Rz^i{Yr{yqViq0=YeMPGjMA_pVqxqeE!bZ9`s-@*B++8zf#;P1V#;Od!vkxQlD
zQD{Zb{^t#(DZ;<$iHDAbsW#(g!@a%ZJdZU_maHoU*2YX|P@x;U+E&xLu#i-*<vIJo
z9XYb9yvzKs()3@?gGnm=P8lnN1U45_M=8>5xe${JPlsdCJQew9%qi=@!kxOhZHa+K
za_(pQ5{~$A)f_dF%v^D9K_q!Yqf;CG<>yRi#<Pc2o8`*-nfaS@U|9}rrXf%EkB~Wt
zmjC6)%r}w)*82I^9Zt`;{W9MT8{n22UY55m_7;%`Cyq~zp@H2(+DyKO$ZS`?J@7_l
z+-n|7+W7`}Xb$BZeCr+k2b@G@OEjuF%HMSd$e;dFx+=#FE{GQJiLTZT9%PbC3Zj-$
zeL#wLNYVB#gvH(eg8ZB9dA?epm=?^$2ItF8ok~i6=&X>4HECz9!+0Knek`gtYFv_L
z$UU$_4=o?+T@-1){YcZL$^Fkp?yLIa-G%)OdWkpq<{r-e;CL0}Omy3U!m#D*z4j#a
zP%J9n1c$(tIGkxNtiPG#!SAeua0dqAh5HK8$eA#nwXjq|sz=A((*_c?^476CD=lPH
z-HH*bYX0vSg@{f1{PI8fuqgahY7RNmug@KFr)F#_WIF$6H%ZZ2PfA6NJ4(_zDA}vb
z-TK(3@LK@%f~>TwX3(WVEFS%NNKe&A61->@f;578-wi^?gDDyOR@T9s4z>aD#pi~H
zTt9&4LfjbovL|ql>4~!(8Pnh&KPT$KWd2o$wjPl|c*j2t{?D5^t9Rk8Xx7-?7}sWl
z!y}&HJ=`2F!}FvVn=td+Wl5!bf<<X*`m!JE9PeBay>3N|-DI*WIj(Z-d6|}}O)>jp
z;9XPM!WpmivJIz%I19?V5{(~!6J8vPSC~A5*KpQi;w{hpU>qIm!w@S^hNg>ADOy>q
z0s@xys99B(UOyI_Poz!Z|5&T~!(B#+Eb0e6ekEs_v}yTMGwhjOm4`})@KMY@T1eMo
zWBP`E{`26zmtIreGkx{J2n!mDb)3iP>1icRf8!0GIL*%}#Chj-q&Ikyou?W1eh(NK
zjQO^l)V5rD!<;<GfZ+sOR0{(1(ne-HuK5^^fMvEJ+@E?KyPOApr{eJI#E~}Wum7e>
zLUnduNba~x9e<k-yStPR_KLq^OA#kF)#p{d;VFLm@sd1i!>$g_z?I_HD3aGteBFY5
zG#OYSs*L@hMD7^vR(ow84*WTgAr9F%k<nDt%jEnKn@beeX35-0hSl=L?AMLK(N!J2
z3>j_X9CbQSSbNW``{>o#<mOJuSU}fWno#cE*Q)7poz*n+uG8J4nTwx^*Z+EzOQZOH
z7e1(&bqT^?1_wGl6@I>V<w|bmkH1w2Av|Xd<K+)YDQ?A%_N3DRK8`88$fx=Rr?2<z
z8hFz5emezBo+}J86?|5P?@AI{iv6YZBCA>$Q^~*@Xy?$GZv3LrBBy9xYZPy}FlYZP
zao8coVpq0CTgqfJZ2uXyrxiz3+~B*CS%x=R=V6XmNeuZRjkJ`Je^HQEa$MR?y}rzg
zk`~&kU5$7YmEP<y^!A26t;!B96d%o<HS66dm&HDGN<9qg|F#9s{oh)E?Jn>uyc;tE
z-I{h3H&QB6J|&F@+6L0HsdT;si)xZR94tnQq2b)CZlwQsDh8caJXRI)QjFy8qIUTX
zH|1)s1sAs--|5zTPoNkhk8yp$=f(N>w>-D$Y~*GU=Ft*KW6D#*p^RrQv;_6`ZSXOr
z8{BC!p7coS`Z42#*CwHNRMCWueBcYwG(Q4SpJRCaU^~qs8S1iT(1O$?!UgxqZbfDE
zDN?S$dv!rE`!bi<pSuotLnkJNhu?0tF`wGWn;gk?OxQkKIo;*m;h{3P&6M8*VWJ>R
zAxE~kcc1<`h(GZhS-DW*nF}!yH}dDW$O)U-j(1fy*dMj*ShA;Jf`Y#|+pFRHNsP{U
zu)9}1nmgg?0=q7Plqq0raQG|sCy=u~-aO@dur#`T8ictypnfE^Xf3tnMtSC^vj2n^
zxwvqRrZ$fHnvC}uPIaOi&AwkjWhWF$Xfh41xkvD`4O{C+bPe(Z(7flQdXIG;ZKp0a
z`Y-CK4Q>$>0?uSq>byM>H;TBnR`gTW^WFmCdezh&M8M|p7u8n=OTT@)C2y{8);We9
z3`wPFNi$U<ngn~K{Cy0h=H0v<Y!am*j2Z_^)LfNlb^|R+Xl2!lbdcTF=y0SoBR*|^
zZ><-rD=DRirrnI2Eqny45L)`XSWPdkT}Pl;6dUHa7ur+ZKED5vCs^EL<o>O9DHqE?
z8)RX}sJ|cUE=oYxgRTcLCQJExqku>lPX)JQ?YldVfRA+76HF8PdF2T{m$hs$arCgc
zmhh1-CjN&U$&#c0C_j`5zTifys@9u7yvtesg>pE5DO;-A{Hictzhw^x+bKr-+h3ti
zOZ`Y~i<cq8&D{?b&8liH$VJI`Ic9@G)O@YE@}|4PF$DQ|ARutz`1$kawT|lp?2AGS
z#n`|yDL)J+o3|p=)K*-pVLEW-LB-)eo#>9gH(DktpE%RJlOiT1Nv$!iiGbvPPZYmG
zB6EJ5>4U?eilh|banL((F|3O8$pRRUxtDKIrLN%~mo1n1CLuv1mOHqPqAr+Hw{ToU
z$l)es71{Z`M&k73;idb^kD5WM6)uwA*2c=Y4#l;*-PX$`tA7=>m;D=AiAF(})!%UE
z(V~3HDC+Hbc91K6w&1fQ9g|86iE?DVq6j}%`i|X&(2xQ8M%WjYp7@xk=_KT<K~`fQ
zIdo6`P>t>0bTca)d`Zi&B>fw1<h9mjdfV5g((MyGB%xG?yXWyeby?#?9WC?h0LrVE
ztKfL{GD`9LVdfrO(BJ<zl6y{b(8rPUZvzC+>8)YDHp7m4qVGs*rRL%!tSgY5(-Mwn
z8{uZP*qmSFosWo6v-&)~rSk4r2yHaKdRBF7L9&u+gUu?aQ_|QH!eEknW8=joDeftA
zLfzf7>ND?C^)#<PgD#oy)FTI>J<X?~x0u)P**vlJyF!zdKW=Pq?8`Kly0$MfqwA%b
zomwMoUGsP|8M(U|AHxr~<KDHCkv(G3M?V+0DxV7$V>Jhs*LhE^(R}dn=4W?QDBYke
z6wECUEi~Ovz$&|6&);fj^So9vW@(UmjQd#O>hQ(w;T`|*Pk4PFMoqGEAEeD^Brzu<
zXB^E$xyAL5Gy{1-sH*0fINU?6e|9ydzhPDUzC6HIz&Mk?%Ut}<I8VEjuEN&B!?{8~
z%|4Bwq%TLI^dX0PTK?YG>x;XMDudsm77O415uPFfz#B)X^LuU?#Xb+{GH*}}o`=)*
z$)6FqfY%08&-p>y7fQ<W5FkA@ZJr^#;)x8j(ZVKd<Yi=FKp%uMl;e!H0b9;8=Dxf@
zw)-PE@`8Ou<>uM0pS6E#&O6{@w_9QD2-sSDTrPZGN13v9o)G_gGTRh2rpeRg%RENG
zi{KZ-ENS&y+ma7a7C-kLbDN?gB6x!<t6>hTAzXK!mrVHO3Nd>YNfvJ?P3+lHNz;N`
zJ@rWn4ZFnEOpd*M3tn&;nDYs9M6fVpsh4W~&ZG>>B>vSYNfGgzU4txvi;Ssa|BZ*U
zqHXm+4PT+=Iw}~wvAU)cuREuhR^n(wm9LfLUFf8TJ$rb#YqTc!Y|Ysu$+(yxkWVVj
z07hcDsH1}h$k=+`p23;n0YD@lp)iC18z4l-fTUy(5c$#2*IyXW*^%0#^>3p5M9Woq
z)~*p>jI0%imNig1^3+~$!gf0RaZ|r>qDGWS%)xt}Y;ub6f0%2XofPjuoETXxdN;&_
z#$8`WHzCU=xzWEoTY|6?bO_#G{-e7|{L}xvHHJ&5HP<+YxXm&%?p6E!6?eMem!iG%
z=ShJe*u^Nyhw&LJ?0F&KG~HJ7J9X9@m$wxb$`a6}@44z-PFWVi&%~6S=pL-cCBrm4
zeR_FptlMqfQ3}<bf8B~VNY$`*g{-QvdT8|TtQ~JbyjE6K>sh}VK#c%JuVkZpp!<wa
zo&%CTERZQsJV&UOuK1p+a_Uxf9iHlOlIn6g+8L^<?lvnpMB8a`qp_>UzM5W}Ib-E@
zk~R7JBiSPW&Tiw^5B%86+W<Us#WZ4-Q-Z^pY5T#YI9#(I7bKAehL$W8sPh>L<qAg+
z|H&3<W0rA6C_K!*RSM|8CdwvuRh5y^c}<9H=M^D5WT10x#nRGoANI_(cadwE<nlQ8
zb0*dy()QS!aoeNT#p`tt>VDGjsw7a)+-D;$=XUM1$phzmU-zV)WaG={K{C7Rwl`Bo
zUW)uX7Omqr-;a4J_+}Loo18GX0<*s+<k;7|W?rRk4>y79-4I^f-0E@;eX@5?))C&A
zPg!N;>JIE&5?7zR8aXt1+e`zZDE(#C?YzbBYD~YM>J5V7(@!dHfV_Kc>&-wH0}%TS
zGX3`flc>skFg7r@8OU#^|FD90j%G<jt%qWXKR$b@iPcZ*-(kZ#OJ>qn*XZHBTrry8
z7Bi{y?A!5h2mh1S9iN;<|F#S2Szi-am&dVDwm7;QVVm&Iw10{bzLZVLXW3m-i?W(B
z>=i0|huSTXF(!P`Px=iBwfWUa*tJSUO|2gk%0uZt<iRvayDn4l<J41e-#Fnod^B8=
zT6!n@NYI5EF4cRc&rYLOa8d^umsOiC<B?Sxvk*&T-T8CA_y!Kk)!Wxt_D}pV5rN)g
ztT%XAo%fsf^KobWWG*@r?!S)H`NqP8)p6)1KFQakj#B}xvCw75X9^0Tre2_pn4O*7
z@X)DsUubW&llJWF4Cuvn15qem`#IiWbI^233z}Qq+AB*+-Ut%R?H$n+5#qopGxJB%
zqRK0V9bm2^B9#XVrINACA{G7jRC+RS;w1?Ys#5(LzZkI?dwSDHeGKm%zmx}S7Yulu
z4t94xI%NLQazdnE7Mf}$UX!FMa#(m-!?F8vAaa&jaCGL?0Nqy&Hig2stw(jXuZHv!
zV3Qxa+APCs0)Ki1Y@dm^mAt5FeMU%ReAIZ2#?bkW|H`7NokFwc)BPy2%XQ6Bqq)o5
z?EdYpLjA@t#e=)P!+?gbGF6!*fj8CjK2my6g!+^tDqUPt{*ytMAWKkG_tKX$xEl)Y
zUi0at(`#u*q1}5s_y}zSsbkjk={owg%h($Al0p2Y+oG^=Ru4!DN~FRc%l_D2%XMmg
zpK9dLg>a723cO8yD_)xV5S3V$?i??w;cg0>dz;y>#s(pEKaZHQOGtAd#gGnIcp70O
zyKYlU#=%y9b6jN>#Up7vWg(}URD+Mwtg=vt;z2=)jxIA#$G4Z0&N7Xp-h(tGM=I?V
zE%pG-_2l~i+J_{xf)|YV0fB!%cQHf3H&eg3!c_^A6dMe_r+9PF_*#<**rw#h3j|1!
zt#+0n!G>A`mv^nXR^w)BpKY{IKmJxq#dlaB=jkagyd)&8dlE^PJn2}J13J0KZ3K|)
zXdx|aZEs!F+rr{v41j#8N$oK$aU_<!MB;<Eb>HDEs%gJo84UAvyiThhJUY`*BMaBk
zri6(7ouSs}nQqgbZ6TYt+qbuKMsHE$I$F^KtDco<wiFIVI9}bQxDgoW_oV2`dG<t>
z)BHL1Kjfi7-!w3M-$^~wTOYkC@|%fs&ev?nhJ|YW4a{DGjCXY(<p@WWE_sT6LX?7b
zMPkN0%Tx1ci>rNEiB3iz^1{wfetOXNv~ke`WGk!A21n*xZwMQ5%HAHXzLg_mt**Xc
zTA^g?n9bq2!iA1*tffo`OW1F&+WoqHi5UjoG?GfU8^;9&AtAIAfnHXl-9OOJ;tV)M
zXl{EwJ+j24r1@XIpjp-a#StWT)nEU?fltPJcy$soX<qw&Ds0pXZAiS0x=2^+nR-P-
z4z<_3(vVUF(UYzRKu-bc59i{V){%ItUZN%P_XcI|VW#~su~T@!qA)FaP~;Aa>kGFu
zEEnf&+2foGyn*s&isG-0W`E8J?Sab|_%b=9rkH0J>;5Dn;rBd!_n9%bf5Op^?bTQr
zqofB9nQL0-&Bl#1H+ijnoibar@E{x9$*Zx|$Avq!HT85qy`QyS-+VKGjNG877vHW@
z`&doZ#ZmG;6q5$Jxw(mWO)ksifBuYujz=SJYm1N(3qn+^0Z})EjGW76jIbFHkYeNF
z>Idu+(t*<Rm^nfGq3E-v8MIX@885;CvM}y05W?PY)1%o%B*DtLvOlJ2mWio{J|&#F
zXrB=L@rJ~#6SZLUT2B8aC83bHW<B@{`H?qw!vSzN-d`eT2FRI}s$cx|BzL2<V9m&V
z9lOqfre02{|MQHpUMGsb^w`^75&v&up;+L1q9QofPGL<|r(0z#CK%0%;ipxYrrcyz
zap`T|A9bxkD&nUY_Vdc7!-@L^U##Qr^6xu;p;J}<?jWT?oIV;K49N@gXrF%-!<Jky
z2=xiIU<~#yYV^t1hY-5y!0@}Z{~{9jPpiwQg&5THJ$?0x#Mt`}b8=c5BM%QADD0B~
z%}v2#P8u2nk&%(67q!Q&ntga7(wu?ySer!^LTnjICm|YYH}gjBVLSsnYI(nlB>(Ow
zlJ;Voog3Rsd&j}{vexTr()AOcDqOg?Tq^uk0Z!^G@~Ot)y$<<H45sr@1L2qsiU<+^
zYEq)pnkNI;RE$IV8v?c@IAVWx)k`x!e$yexjuw5vigAY?NUuWwhD3Ly{-|lMKJL#V
zEF;$BEeR(JQdri}aVJ*}Dz*J(n9&!UX!7RMIZ<5TcJt_-pXdyt9R4%#FMdK2VBkTm
z?2*}RGE39@2i0T9@!~Q77qK#Y-H)Ekj#51Qlte_JthM?ExD}iHDI*9~3DBd+@%fj7
z8(GyH(p;pf*X{5MDjkMZ>TZ5F%fjWAUvgLLrN{tUv3BVb!=F&R&Gbg0IJ&9)m@CxP
zB3e-Wb5Qy6kGeI3%0wN#uHPjR3*O#VH4lM*pOV4zBOwA$rvc}(+dL$YIhQ#`^<a~=
zZ8MH9>uJeu?}xq&?7}cAC+ts%1Ecg8Vqrb90~=$5E$qLR7sxiRtxfGaV{3B4)I+lg
zW_?fMSmZH<pK}GCTwfp1ZPq}czgIdh_EJ(ey;mIvA6b`e@O>l{lD}s7ifZcflzM79
z@(NFfDj9x-{NdQ=Z7LZ$`cbK@hLalWYZ%IB+8epnFKY+g2W7vw-?(_v{ct!|jeu|F
zQMID_smgDy{b&cFpWj#Q^PQzWVHVhN^bOEAU51yNXHA+sHyH_^FbY}ON=du!9(??3
z3j%G<E0{f)RHV5?dQfnG77AfHx}uUIUZ+y=C+NdYEzv$UmUM)4U$ujBL)zMehsSl>
zdRxn#i2wPlM}-O>Gn*UiRK;(^&%T+`O6W^?eUq*&T@y;KO>=TRTk?E{ylbvNLQs8K
z)Ke<Ehm&7tBsl|RgOtc$qngsqs>xEG`}_>JP){+e6Az3TWUqD|{MdD_L$~vs8@{<J
zKFanM7En`(gV#}uJrna|&b5v3f@dLOGL<rTki+))^y%ul`bY6cNALSvkKQPYmWOXo
zH+E$R!|pjDcP^}pnL1@qPa|!Y_MQN{)G6+j+2*mbuFq##D?HXpKOe$Y<qFRxvbN^e
zySFwMXc$JwZw0^WHik&`fBLybTV<~Q+|A-+R`;;x8A=K%Eek{1>t#(}r#{5fERV8L
zvLa#UDll{Gd}qlDTl>DdrB|G8xx;H+?O8I`uU{?xPfsc#KJ2#Kh<gnm(;j_reWK6@
zWkigHy%Mi<y7&SJjDu&Bx;>`1zdhy$Z9kNVsK#nmMbA`@h<SMt&+_K+lR5Vj{%v1R
z_2aY9n0DpBsAqv+lcWng(nJZ!nIPocsw#W{Kb)(zjysG&mVTk@h3$S3DlBaO4j(pg
zLfSQsC4F~|IMx`Atpqv!ig}|~G0-iw66rzfhJ7p@sX~;{U-JjfaX|2Z?Y<Si$-vDF
z#->6Ue@!LuM}RkyP=1mKXYM-exaQZxB|wSGp$Pl=*y%`jFs*C;UaE0&i}^zp!}3q0
z^Mb8d&#6JjgNRoB1&zq3^Nfrw>u})<oTKOMn{n9b?Qmne<Go~^5s%3T&x)*fcTyXe
zrsF(4K3x06gn<O%hJ!(#JP%RZ){P)E8gqt=wfe_{_I07MP)lcAG*iKv%tz{?XA1eP
zhA_;o--Dp!Lj9M;Dpf;n9V1SLGd;IL1wlXbO^@V(eoSsO_F9{<317hcFiWNO(?@WA
z0+Bt5<a@4Lg23ba6aEMVu9}ybr25s+-aKF4%#P?4BQ($VL(L|?W$BGu6hwXbn6BAP
zzhXe_Z(X$X7LAQmX63->)T>J+W73mbPig&6`)m$21FhoE<LwD0ZGzrJuw2B~iHaD0
ztvCI7$1gLnsGbuR#x<W{s_rPtz(5wz=JuR6(SEi`x3Buw)Yn%vNmK<d_NyypSg5iw
zXzeFF)3E1gVK(FWshDvSZ?w3{YCPNi2(p7rrPU)m=i#5;!?Rg?ed+h#3G|Nce%t=K
zzhy;cHh7<8f-FaaROZ&T9+yeBY=|dN=c>k+bZN(hnCb>_*cM1kU_xDKVI$T17n>-%
zZT<iB>~U3eoAhezVHs8HJa21_TZ4NmR~Mt?EN5sAYNU^5cv61MsnB7YwstM@WLpa5
zx3vtP8CQ^3k<b3%Aa&uStjCH__!IE~WHX&<2NR%Zk;Kg<A|uI6>HWbnhS4<&hFteE
zLedpGH%dWj!`-^n<VYCx^*5Gbhk2J09emPtXXlEsX)9gZ_jt6<9oZ&mZ60K6Rj`zP
z=}p00`$p&MOWZ)E=IY=hJa2t8>0LPN@7*&!_nF5Py#{4f{Z?t^dL+@<x(EKY??eps
ztl!UmGdq3*v)~sZ!uiYVi0A2}+j$&CFXDMX38w7$o9T+|st!}G-4%Y1dRsvi?+3;?
zLgcklB6Pae<6kAr&XTipY{Y3=n^_Y#UBO>B1?ZAN-OYi5wqDq|+6AFp;n&5&H;)8+
zU)XMK)avb&3(ZHoDAgEY;y0GqBo2U_|2^y4v7ZUc-VFTyeiRNeGJk&dQ!+t}@zM)&
zUhjgvh+cOyy+10}lGTwbj!ZkbwA~zN35?M~`(m{ALpLegw5vea?czE;Xp+vDCcaIb
zd{wH~JWGZF^Mk(CQ<{%fjN8$NjJn%uq*WpcTIE3_`YhZIh4A9>GIhgkj~A(Xh1_9M
zO0Ihny4<mug`<RMFx_|Ew^v9>5%wi8N_~F6Lt_*s^`b8gfr-)#_$F@spRvZgiHyoq
zP`@+!r!UBILVnBo4+vqa@^y^&rPt=euU`*_Hku#hJ<#-)sXUH`eOF|_HWgTlYn@_T
zkNv~Ud5}{vRZJ466tH+Bc$pwSI7_CrfmHEwj$7b!6=@fCv|D=~7KC10FKwuwVWM3o
zt2Tnsr=##up}Z$_BTBbpw`}BT)kU<mI`0^W^3?2pa(EwA?<Qr~2TB^e|F-$Qn<@#6
zdlXU`(Q2H(O-8WndHn;KERcN|ZswUsXUV5o`ih2#H{kg!{>1xa)OcFFRq9gWvMJl?
z$bQS9oD}LC!ocr-x)In-vtyK1u$S*(B9}1JTBNYMP2-M>hhq*z{Bk<g8Aq7SfsFXz
z?m#J2r(pwG;LDpOaVB)UwpwNPYcE>bL?bx0JRt^zsBwt11V>fuopduTN&7&1bkq(-
z0mNPP-(qf)Q76r-DU}L6$}0h9(A*^6gET3}`!XhKuFCGN%<(}~B8Mmu;45klM?T3?
zC~U%s?o0|%D1DnS;B8FY9Ju%m>2lOT+O6dA#O7ax!$lU4HyL&eP&|YaK0AtZUQ|C_
zAG)1;wU3sBvVr%I=s|H=Jw_@;oh`h6^|1)m{3Di|)5Rp3Yj&@d&NEXOyB*i>qy+NB
zUFII_>x@b-Gh(n+K&;%eFpMQ64Cvn56r*BVxWK@>Eyzwi&t?JN(5?;<#5DYsEmrmz
zq%uXcv|}h3m#f2-X(%G^_t^XEc1v=c>KH3nay3fmtOaK6rXyLHN34zGRB8!0&NE;7
zJ_OlBJy>xUDs|)$?NhBZvHR8A1|PkZ_CRjjamqm4+rY@)z~>Ke{EHnbA|PobUOIg#
zY~5z4?ww(m>M$%<6yunUMWQ=!#Eo>=4FpztS{KK_6S`5RL3@d$VWOMYidfRjOG0#y
zb8o|kKGa`ryFc$CYW(YNxrH`0xqIHm+1Y@|`HRH9!;~&xpuj%A<Z72__`#pS>mj=u
zSQ+y_>-w%<kNj&syx3@+b0g@4J>{lPu2kc?4zl_f(^R6<Vyfp}gUQm4#>%m4adBD{
z-_eCs+!eE+ofJUUkUFXQ$1m_YwQj!|Q)y_P<~`fT_T#yShU)vXK_$p+0|q~&O8Q9L
zeVl~Hg(|e3%0nyJCSQd*?5Zm^P0Qlsw@l-YYN}4LT-?j_3kjo<NwX9{wXSB!O%hX$
zL&m3l=S#(O>^Y?3Gl>^+UWXS^O|Gkp_zs}H52KG3JGYl06P1_zqME!UgFP-8vPgT=
z_T*S@x;HDFp{?H!%^Zf4Dt+xjln^3gR`adJh3M%prEPuNB|NGoFo|j7=Ci&QC>Zn8
zR<SwST;X<waa2PM@1Z&+&kq}qMuau#$ed9&;vjo>XOXvgENkzwN)pHL&(X{Ef7Y+$
zYW+fW|0rBqRLO*&|1h7hGX{qvvoQjTKtgc_+wvEQLL8@Q%3hxw2Vvl_i&&gG`6G?F
zIr|H}$7h=5u8J|ld({<!0-O557iey6*iCIM1ZSk;Vr+`hnjZ>ro2pjz(yJPl-uXS5
zxr`!KgcRuRo~#*PZx7~<x*bGX>m*2z+iDdP%nX_ltg6HEpueQ@`x;46HW!{g<fxxe
z_Z~dLj@fb-AYDX#gXtY-x!1%W*+y~f&Gl)2@<O;8TbF&RQQ0<gSI|U6O1%HWcP7SB
zlcY{`S}q;QW@`?yGBIbGQrygomi8Z`dHF4v!rYhI$D+j5W|ZnRdj1laUE%M))WX!Y
zv_1gdG!XABn;AX+EFbUnxpy{&@)4G&&>sQMKclc9OM9H$<lncOpADDzc(EHF9PJHB
z*EwZVuA*;Vx%EjM(jvxyGcXCwy+^HLq%Z0vnqozcR5k4Z^)#mNMP8VW!0Dh;bJJ!E
z=aP#Nsc~xn@4&g+efC~k5AbPL%12)?B%O-C1}BDiDt9w7=Nd(b)K#_nWn)WgV%aA*
zB89UTgZb!T8!0*4=7<VO_j}W&X^sbSEhsA&+DmzTWI|!cK4Zq1m%cSxG;W;->AT-g
zewui6bbo}Y$fF2kJqQFQ1}(72aX%Ow@O_(fyoLjv8QcN7v7rOFnKirZN=4m%sG?1V
z)^2)Sg-bo0Q_tK7T`yNpqQd>ZyNGdm$$JOQP|}!YDZ~i-YtXgcA$+`Br3fMGtD%GR
z70niFQ0S#HeE7j}%okJI0yh2T2rN$2`IWbP@ANe&$g5GELGd*b&p__zFgR14Rb1Tc
zKKhsCT;3fi!{dPQyH?Nsu0P1YAvV5o9|aN+z3ws}z0iX5^w49QqHL<|<~uXuT9Lx}
zx61v5wpl1gP5l{KOv54#g;-=zkbPQHA_?lgC94vU1(6P3|1+<~wX*k#>Q3$1UX$g4
zGN@oa%1ikPSozPyz($?UHW?74uoYMYZ9VokupCgrM<HM;BN_XA;VbZMMup@8Nho3?
zE5#kt#W;9FRpO|~if%14I_$=O$@9m*&gTz)m#_tVmIH~?h}lo(Vlm$eNJ(>0Cb#>a
zVFu9=cTuk5B$7&7BB}cxp8k@gQ3-mPl*7Gd<-?&1d0kXas7Uw~Kvy9Q>9a%b_ShQ_
z^y5ASI%1oqN5&DN$ap>_M`lwPMJm>a!jg$P@Vjer3vT{(Z3nY&5H}@jbqp#j=|{4`
zdGayDMw|_kDJVi4+qBJi*M(@yj_Qit_c^|%{4WzE0@W%d8Z+Sv&vx>2V~6pts184G
z=*bH{b_yCPbHf>@?6o?2>ovQQv8J`57@g&6e@@m}(fgvy&f-86Qxqaqr19KWtu2wD
zNad8x>PG-^Nl-DT=p&Y7qX()EviY}kXVNU;>XUpEfjg=CUpB-7A^cy3XiOR7U*KQ!
z;Am`6a1{szYZwbqLLFk7dA^$`y<G+c$xcqsk9YOMkHEG7fk*{daVV`4N2K=8@mH}C
z%fZ!_#J|=zpfo#;d7}8KL(-33A&JS&<GJ+g-|?nh8xBRs@aq!-r}2%6jF$D~ee0vz
zJW<e3fY&wu;TDwnHrc7O`g+aPKEDyi!|RPI4d^ob;z%kJeXtBmLbJ*X(AI3?39+%}
z^s-<}wHFWWToJ~<zd2`0RMEF#ZsC(~HP>monc;(9Pxy(M>gp7Y6AVIEQ)5|ABOnz4
z8&pA^U>RkduyhC_R#lRh(W=I{xikOs^AF+0n|fSwA;zO9h6NnLK-f5`8w6V~d%Hd@
zlb*PW2{~{76hGSj^73+I!TfEjH?L?Xkv471ns<(+P*3iwO_f){@LMq^$|opmYS?d&
zjyw3F(Tug4KfOk~1LwQHy>D-}V4nUpRaF)9pWiv~Fhd}Ju@P*tl6&-$pgKsuyk~bR
z^uslOxcd-jfgy*F>Dl{tOTdR6$%^1+cYXf4;|xfvm;Lxmb-Jkie1Ug$rxpqCuA&cf
z-dDyMqd?y?#?g7oxN(19(*NvW)|wXmmd6y25!zl7o-uyy6F@MgIK!TjFBYuH)to;p
zsEUs=L0R?FsE(d6@GT~zH<!;124>he>xXNsa5{Pzd3V(T<ihnR!2D@nAtuP!<(q$I
zO@x>*f*?pJB!<Lm&qBXQ!-uIGH7a!&7xSzdS%K`A4;4dxM0|q0B7GI~6}gfN)pJEa
zfSp7Hl^OcX8F)u5#EcPS6IXR%8e>yKV}p}RT{ag@4l9nsyCA`rA*5%Fh!2|4qr@Yt
z3J@lL1S|PWr>x($V*W7{%a;&^Ab4omZT`AezZ`20l@pzVt$!u6L1yEP4EI`bSoRh{
zv_8uK%F!y&41A8Eh>FGgR2GWdBmhg;j6x0DG~B4`yM&IRMW?Eb<B6dHX;*O=SR0IO
zB`Cj;giz+P*_dB^LsCSFrwgBxa~$P#M-N+gK8W?LN~Nou@aY#NNI?P^8uCiLt~-5*
z86}sH%sFwmRx`fy7YlfzF$|PkeUe0xcF!-Dq8x0!{IJGZFK$K{`B^*^ZK^9=c^PM{
zgh@u4C4wnm4tkirJ`zKeanUW=$Y1QJGrh0MdZvpUI7c<jBA`4SHRE}>BQGzhg&`L7
z+AVdo7j<v7`Uz(syCtb~!RV=cGO7x`JST(4ua~9$NW221UCI@w->8uFqhJAsQI)Qp
za@MLOTACndxdSXML`*_LFD#6JuG`I=;tUDN6=>5B7e7tYdi{g>ei!m)d0&OrrO`em
zaZWMu_<<jg-n4NYBN8k6WExxWkyZx3deA7i7A7K27x5>7CQhUs?Y1`Y2(&D~ps9*O
z!lP!fq5*m?W4wAS@>vOzhlCFlBc|x5>~SvB??BbO+{_Xk*ImoM39|HX-614$$YBP{
zQMXZR&(HA*j+o%a-=3Urt9rSvIJpL}yX%Q{&#NRk-RAozQ0`^nqUp44u6AkF1>R<!
zc8PO&9)%JMm*kr_$j^XvSGHiHaQR!f$SlFQn?v~^BRE*2NXEE<qMv6OEs6W6;vsM+
z8gBimDKqvJW8jE%dzVXBy+WyJ>-w^0_4Rj>xZBg3B#_iO;X9@^K<)@FnZ576p28ky
zl%NS$M&)!1d_*z+2}zQLKoq<|=g;4&pgUGGPvW4|@JAL-+GT57n~=DoD=IxkP&l)S
z_Zy9(nd0tHl_=KzyL?U2vHqO6oW7{TTP2n^aVl1c1jqbvQ=vcV%X53Dl-zU=e<&Hf
zpO*iHWWG!BB)lQauuCJdQ;AOh5q54OopcZ#27?)X*gZK@S;CF`TlXM!_#>u#B1%+^
z$W>D52!4K0dKE;+K-6r|=6S-SL#|({F&v5&QkX74;4;n@u6Yv!BW`pm(Bwvu!Ji1b
zT;Y24728=K`k9Ez_$7yzK--2`{?T1aV84O#42&t5P6l7zg0bUlL?4=AA4<W&QImSj
zFqx}?Ux>08Blm_CA8|WxkHTTe1cA1KVA38Uz9WbWO-+dod_h9g^DldKMcBG4NNm<@
zbTCDT2`We(*M*c|VE2<J+VJVjOrP?shC=gSJtVjVW2qUWx{=Fi8blZVTdwTS)<(CW
zEr}MtnkPH59O7EifF$WYn>T2R;3YQ?E#<#rKxNxy*B)*S#6+Tlk=LVKhm~t<5MqGa
z_UShOKfy%^5OFS0(?_t$NqLN^4@NXpssiTpBbe~tViKXq)Zw+Iyr;*Y8}NZG+6y7%
z!+?abyrw20g3Io6(2Ne$zScVga=>TryV56ZFoKtA)#ipnk4?88lb%frpg&XyEENzO
zMqogaa&AySCJ&UdJd#x+3~y8gmJJMryhW~7%do?*avU5PNP`<{w4V<G;<*JN#t#w+
zmw%_!BpSRxx?>!$Q~`R6n3&iUd`g8Iz?XneZrJsE@d6hlKv!AdG@Q@xs`|ToIBnTc
zfRQFR8(Q8LWKY=>3xe_up;<VbQA&yyf%MtB0$6eUciZ2)-;GIxAp`}1ZU)d=V|cEt
z4C^W&p<FW(cuM%ZCeZIcYZE>Mcp!=$rgfC;Dc^SA1%^B1i=hGejR=H1+ujudTaJ*s
z0~+)&2%C(%58z_{gL?zY5@`7+)xG)l?VB?wei)${Gpp`0t8Q9>PfveXdx;YI7CW~t
zYfKM4-X1X$3E`91wPmPP{cQ!>C2P4p67(`!22lI!di`p^)l^C6m8tUW`G5(C$LNqL
zAP)hWF$Q?@xP=9Mpa+81xa^*>$4ADg3)Rl=UBN-NNc$FNB|Zv9y&%|s=Dj_1Pyv?^
z>%(q%S)$eSE`I7ZE5{rSTAF*zHJt`LKbcF|VHU-5YjFNP`Zg1zapke@r~!T<p^W1n
z_{3QY@EhpTl<JW47m$9(02*+wV<XUxldF2d$cRirLIOeM+EGAq`Vc|&rp870zi3^h
z=TbFB`SE|{rZGz~EdH&f!3(~QMozSguf1(YE!3gp{i24Bvy&Npr>BGn(nd2ObI*!!
zrL&V9eed&U`~4{hSXmg9gl;8MslmvT;fMi-YTb1AaByS1*)Bak<5BAm_`7H3*Td|t
zF9ctEUTFC~cES-N)>#XXyzZ`#QND=yySj+?pdX~~K<ZBpB&c{ZTrYPNQE~JI#h5dW
z%q_XDELmR9p7;Tu-yuXzQ66hw8#)Q!WCLvtoEZQ!_(kPo{%>6X(Lf8P{|qN%2dN>C
zcA>#P<F#@~kZBKep}O_O_>XS$>i&t)jzD_mDhL{aLYea?|1xl8fVa?M@lkb%KcmfY
zs??mp+fBu_eN2Grcr9ib(Yn|nV#wXPx{u87VplQ6#xDtXp#JFYM+Ub$LFvccpGD2g
z&c=;V*sM7}{(rN+<U;nk>Vv?L(qRN)po2T!IrBy=4zObd{RA}Xu5bdzATf8)kQNyg
zRSkfGxv_HKErWG|o4<exV8CHOeWPV1Ejf7&@D2gH13^IL^*As@5QxadT-hAkjukld
z>o5un3qAL$2YWnc5Lbh-Xa0)12EfF8fY=E4k(Aj46r=$lZ^7rbqowug)jD7)%X)c<
z0*TNnpnd`HdV+>&D*yA%?jWBROYwNXy8N?Vt<`G)x22c(--KP`qMWxO;$i5*8*-Ni
zpavFVWMIvJfe-*S=ZN29Fbn`w25F&%{`Xflp(v(MJa+p3{sWj~#9v3t{9k|l-v^)&
zn1j#~^c?BG!D9Fnu_HUao)3}+At52f6%`7Al?ZzF7H%kQ?I_o(1e#Vjn$eWyvg~rc
zT(tyNw&r!rx>4JiuMJiUwNw^WcL;H=G9t`q_rjW&TD<|dL^N~vw#j5MV`sfeprE1w
zP0(e9id&xX=3=e%qr8tofB#7lULs&kBH*j*9XS6zB+?q-g#lE?vnC!!&UjQ_3;@)D
zL13(TGk(zQhCQGn)hbE1S&^E17j}9hC@2V^(2oH~E)3K)3P}YtUBqqM72Ov2>^(be
z!n^qQ2FM;&f=2<b&~WTn_XCP4e(}sG{`1;-POsW&8vzjIEbzNG9earROARQaF8(X{
z*H{2951{ikm-;m5*EFVfjou)2PaXGE000Vmu-7}nZTiis*8ov&0FXmo^O7?)Qb>Xn
z%&+k6+h7TgrqRkR4~HM8&wB1AeD~`DT;pzSXfD<zrS>a*@6}_@Y(mYT;N1Tb^As~P
zGcse;5kv3&br`hss@3|i1ZF`8xd3Gh8juZoudTfD_Z;Q6LcUDaJSUvQ%&#m7bUgjf
z`wn0J{p^3{1bGOGX)<UAI{H(T6MD^$k*}KPqSnVCa1b#Io;KZUE8&?l+{h#3|M@S)
zAe8@qR`3S>o-L;WP&_tWA)9eJ^?Y}mJUvefnaz&<+-h3U@g~-u2bc&q0?a`2U=%un
zDS&w;1yE>tFhM3KCxK`>69R!*e+Mi**qU^k-!c!xK8ylP6(DZE*?QQ4z4?kr{%CxT
zHGD1<as4zLZho-T3=&g#9sjneif3$>{4SQAMW}=J5#?_PvKC<S&;fngU<A}rD1%Ey
zM-{(Y&OC?CEMtx{;9(xPBz*TRU<8Hzu{8_!Ic(QCy36udkpaAA4MW4|wzenm6M)pE
z^jLC1ht|M;-WLc%MyTlkPC%t<Wut`brsR|HIXoUPPR??qT^RWQvCYQA<AEp@k^uxt
z^2*}dcYt$-05nQ?|CI*2I{^6MLZGmK#9kQaTETY#L=2L)w6t6_vJnp*@bd(NWo)xV
zv<{XZ3ZLVWNE-mb&b7F4#8Qg}h0*$xQ^v3?rOi})1htP}zkWSBUvxqM68}S8cG=cr
zHB9|+t$>3@X6T8brM&5Q6CR$xc`tU155rPCw#au8YFr)09RM0YpiR>S-VlDbdVKe{
zNNOUwilpT0*B-E9j%N*=5WFEM5HLMM{(Ut=ro31W8SwYaby1Ti1Wo-amX^$bP>Tuy
zxz`?WOKiILfP06%5@oIs-US|7Mqi&2fy)I?mYkZp@zFXP^TM(pMc@G3#_f^xW_CE!
z5GpkbLfsZX)d+!LY7dar?e!DoLzTkTDV@=+HbVsp!C_$dpPWs<yj)zpBRv=a5V9sP
z(C59#;8_U486rJh(oYOQF$R#{^-=LdoQt1*#Cx2+H<!n3+}xyP*4E2Fe(W?ogq4+b
z>iZLEo24)^%Io#_0}=!g#UfXBEM^}{bt=ukyKVwQTy55mWgGFd4@ck}GYB8l8!=er
z>WL7Pg6W1Ki}RnO0%oom&i`lTg2MTL`oez}yALEuBix4p^AS-l284U_K|Mv&Zbhr}
z&817X5NQ-DK#7V>PE8R2?iWy0%PlKYsrr$mCK&<BZUD|2fu%b!q#j%cuss>Tmow!h
z9|th5o7MYUlTtsSp!kCFfm;Adg;JjXJmvv3hgVYI0F(!o&;<8<o7ZD3Z0u?vIEyIY
z5z2Ug0KNJ7wcBNgzS}mP`T7bmm;iqf+R`G1pt8BQF4sR8bU_Rqz!S0iQ~=NfAwuyD
zL9hh(jbJ&G14C5mFBue{559OxO)BQ(<ZX8+2wUEOiQ;fJrid>ezJ|eoR$NxL1vzI3
zJ8g@^ZUXee0FdBx1S-1~4!~_1E19u%1Y*TB*HZU4r-uIi9fKzu8)h#`XWoJlw`-4L
z4Ts*8yYOs--H{q{1l7EGt)4-2`Xr!fnokDsB_ISF2|^&x1%!pofn!$TfIvKeNwGIq
zEqCk!fK)Lw65eZ3JRQ;iYB>SOS>H27i2ykZb03{99I^o)ZUw*{fDs;7TFDFiKRit7
z{N-L3Y6RRAaG4lDT??eHp#cL0P|3-}-3>qruDG~Z-FFY{C7XZ~iy+lj+t2fJf|3}3
z1?>gKg=}(68vkzc(`GiX-lm`zA7E(}#^K=PyC@Tq8{s|%E<?yd+b=f!_|-r?C{w8U
zNV1`KJ+R-byT)<^fxw2k#1Gqo!K(*EV`{+JdT`l-$w03EZmAYPPYLVm>pg$xg&{5E
z(18#ZqA~9j+yz1`dk{!vCnP0p>Surah*>%l3Fa<VLuFYRfO>~+Y+74d%9PHufQBG|
zUZI85!4lk>A(A((J8!bqYxCqoNC|)c{yn$iBiR2#5L0Je1h>|DYyik0AMZD==KEY6
z1f8Lxq5=XJ5YY>^(*r&R!sHMzDzHh8sR}QnWc&yPhlb|VEI?O*#sL(NP6}1#@|i)5
zPj+G=A<%Y9Sn5O}2mvmGO&Ta0+du%V0nRfNu()-6ljiM1hb@BU?Lk4!V_v6~CkQ$o
zc)_4<TcD!#Kn##QSKKmz%amVHfeRd78+-dcpnC^{Kmp$P?iqq<hBy}enJO{|1-`(>
z12up?1a`Lyi%{NRTzfOydN`38u!#}X&Wq>4ATIM{T}6f^d=@Eis}mHq!RuAc%^8IO
z;}&Rjr#$|svIIaKO_P(F-7bzD|Ez{LlarI18bkx`o_LH&TsQ+PpdqLWy%=JQbAI*e
zmBU{p%pO&PSfAGA(Vtse&H#pMPvs{TL?;^&xhYb?h06M$cZ5P<%!c;Ao9q7>u-ge~
zmI0ebz~L!i#fMsE0K3cJ5=98qIRPR=HstCNZ)=N0;~xKK_CFISbvz@vn*j(sUHx3v
IIVCg!0C4l8kN^Mx

literal 0
HcmV?d00001

diff --git a/ex_figs/quickcheck_3.png b/ex_figs/quickcheck_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..fcb05d314346d4e56f27aa56b3d857418a4835dc
GIT binary patch
literal 41804
zcmd?Qha;AK^gn(fduJwF*|V%9lv$KyW>aLZ5HjwfY?8=hW@Ya!s}Lb7E1B6VD|>#=
zrO)U4{rwNWo~Ply@9VnW@AE$AbzbLn&K-RBjxs3`0}+BCq&HO*?jZ;^20^eU2=L$)
zvF{_(@Yk8gH*RVXz%Orthe7Z&p_9t}#|T1hhW^INmd&z;7q7S~>bYt<TDm?lbFo0|
z&0L-A99`|K%~{<oTpn9HI<Q_6g#Y+itz2E5q=bb2-v<O8T^<R&Oz9&=5LV=-!gVc=
z#O2W^-kLL$cxznt(XMZ4Z>5%7_}c~OP@Kb#vAE7yjrH*v5dmGBwnh_fnj}TuL_LA5
z&?SY3K?LO(5?mevBK8}U{H3)|%Erf8E^^I1bd4<cX-FYwc^w`7cr1g9OMT2u>jnW9
zJN&W{mbUqTD8MhRGI}I9C@6@59#0P5NW*)H^$LE>erASTKtE{u|F^euwvcL<nOoUf
zybCFB-_n5%gsp#EFsyaWF&yUQ=WnU={JYjnE~}+(rYh_>Cf&N^w6!{5;8?I;+D{{u
zM8+FK<k)!RUXyar#^NWJ{(a)DiEmWwsNcy^$c}Mdf11z1w#RxaoyOg}6D|h2g@(`I
z8CtwFcao(ST$v8+J5Mfha>jl*%%r?AF@AC^!Op?)duHZ^<=1#DBqStcZ9Y9R?S7f%
z+S;1jc!LiX@-<$pIa*!j{>%(_xN(v4!-p4Q1&#El4hkJq-wM9CmHDYZPdC#79;)c-
z>YCG2P|%1!Q9Ht+6qX&c(Z@BgGtlpOW99f@wT)C_<ei>QLYtzZUeEWQYdePFD^)Ue
zPcnx}Er`dw|9)i{&C(5DDLBKYliviN#iXUNFArCd6VviHjnzMOo@^o9-P?<?KL6sy
z3y(h)gUGwj{sRvpuf3N{HLh{NeKP7s_g+0E0-weEu0At8y<Yl^4L5A+g~)@??{tfd
zLuRgDzy7nq=PJU=$46{CkpJ=13u5QRJ|4TF($<AaokByxvLO;gufh9@%<cqE*3y?M
zfx()`a~*ZmSq=M(e06`jucD8oIDekl(a{lUQfBc9=#f2P1S=@+tc+$(*$!9Q6OfS=
zZn(Tr%Ilwu(KY^<SJV)K&+LJ~qs5G0F1idFJxP7klWa9o?Oc;$VBklt8!Kc&rmo)e
zOHj#wV>ZDtcDFg2p;Ex0(zHKU+hZn1*TyczfeQKH{)hMIHM6vbaF^xfF^?%yyI<vm
z<kvRu7Q}wF61`OsT?lc;%FceH*6;TxTtrSw>x~wleE-zXk_WKm554gwLPA2V2eK~H
zDi^r8riL9GTW31oQZh5ItR>R$>A;oP;pZhj=W6E_JN>?4B^rOP$|xycjO6m8Zmq*0
zNzaW7&;9*HEIO!L_qVTCb5hgLd~9pOr{Yv=A{MH{z^xt(<lh%Fj#&IrZmlV5|Ev5)
zPeFL2^37V;g?qz_uc&EhKMf96RfcN(`Bh=7JFLihFMEsAR5#xwAG}rqnON-4i@Rh&
z1d;IWyVbW^H`}`HU$(;ksUaaax>i=)$VA1Uadi9?v*$FWbpjZ$)t_7@=>6@48{qDm
zS$BdWBXyT|0$F@9;EpH9JF?OT%Vk#I({Ri61pu1z4#flwDoI9b-TbSnByQZe;RN0{
z(e>ZuA5-VW?a*t2skkyMOnmn)LY%BL95}RAO1aLn;#zo$9nGcqMMN-Nh4s_bdu@H{
zPP$6d^TBJ2ms34CEGUS;szN{D<xBG>4A##NPm0XzR~t{5tH!+eu(7cxmX<=F;ozG?
z6ot4Md#@`(z$adLj5qZ;T;9}_HD1Cof-Bhr+tbrC#I3BfRM>Ol;Te4VW`I^_XXo9e
zqDJ0L8Q=Ydte}vPPfgEoI?Efj0+noRa+@DK-C1cJDtjd2JgFePKcB9muKwh{rXCIW
z#%xbYBpH*GTwXsrKR-z{=k0r<gJQM=YBmf6L`1LSzi9K)=oIL`$j|3r@G$w{Ua-k{
zFdNm?rPh^rg{r`yDt4QOjxO-=T=&hT`7N)m1POActL|?GJP+5~qN1Wwf9jN3enpLE
z3g<ZgJsltHODkwdfUK>re;j=>9&XVY8*^~Py?Weuv~94tJVYfUBQrlza~Yngy(m-b
zwlur8XFc9f-~7maYq9@sj+W%pZGL3#c20M`K^589*jSvDTdr2L-N_Cbsc;x*<VpYS
zx@`5UoX6YSTiA8}uHw0T*kJ(pFw@76AJ?|Gl~%{<OPr^a0=KqT$0Hkj4n7SJpQogx
zjJy0O!ZqT}<AF1W9ehQsoSb-UY;0hL8?LUR4ZD+}XHD`(UXq@7>b))T5l;8nvuE@&
z-jc6hzouT&>!IYjlh_2|^F3RGLy`VyI$Hf(q0v=1ZP%p%(xt(oC%+DL8Ekv&wvJ%$
z*Av9;Go~K&zQ2tc#oNc{d%80D@~?_cv-7ftYdCo1%p`t(et=$P;3s5r>Uo=+u3;>)
z#!@Mxo^wemH*ad>YYq%XB_)OX9W9wy_2<%vGLQl;ua7+LNzKmA?wh-lBEyuLno4m)
zrIXq3&>1|5|9+{0zWd&W1^oMPPWB{c%~$q#_rZc=KSUUysYGUM&lrS6(DBg$ho~s^
z@$oUbxG}#IpR6`Ji#J<81$lWtg`K<n4MKKzf8Ts<;^Xe0s`12Rbs?kL06;hD-@ku<
zvl6u$t`q>bA`fk4FL=DaX}wTyQtia1Uuj1Odm3@+6p-}V%q|~0+8K`oAdoK_<g2B+
z-vdsUe_c(D1RDqE_uO2d`=4LhxBpgUTL8FIAPV>Hy*A&QYz@Ul)`uRY1oP<@2DP`V
zd@r?7EwLNA6crc8je=ZizfCJgq%?Z~#f_}q;6KAv#K`1#WfmHIa%O7u_m?kUZV<5f
zo*X@)q^7<A-o**WN?h`I?#}vhSq~98Ik^a=PwU2~+sl-QLMW|3%e9>mj!L^B_DY9Q
z-b<G*b!^pkr^pH+K~Khg1{bCt&vsEFEdhj7EtU4eEgdm@ik-0niYh8Wl77c^6gPq(
zPQWB=s0kN5Bc(Tj;OHsC8N|+VM&Bf0YZ>=D5kP_<B^VO0DQIX!A*}jU4gwAhdzR~4
zgC?P?4F`8na{*?Bym>=y{nQ_i^z7FJ3A&S`RlioqaIFAnEz3h?trFwj90h+NH|xUL
zia3l&mRR?4mRNMsikP<$r0V3g!jWcf9m8eB2nv6baC<sJLc+<UCu5Wh;Y!k5eVRYK
zU{RY(gX$hnpN765rVB~B`XmG}BXpp^fEY3V_#Ah}tH_|-ij7k(Zld_G;b4UdGJv}t
zGaVh5C_{LAM~91t-Qn)~jm&$&)6)+j`q|3$3+P4613K-`hby(pmJJyema4}IUq{#Y
zcvh|CK^qw*3yToIs{$Gh8%)={v`{BUC_X${rA5!{<nV8#q{rI3pzv@S$cR7IlarGL
z11JEg)YSgFr1(o+M<;gvTdF_MVG76kt=OM6kF$37mx@};t$SMlNx={3!<9rl)=aIF
z0s_vUu0lym%jvZ_&*mzXR9;_PEDR}45K;^mq;sC_<sps;#w+aCj{Qz{{ID^Su*(0&
zLpN4rd3kTSf<xx(SKP82Z!j8hZezm6?C#B_sE+hNT2ORyD&DLr_4NhBX?eO@&s87+
z{EU)@=HlqdX0JLf;ylwqV|_nG#zz65C-VRp3I%KhHNVDk@A2w7!~!wGe(BOZh~I1G
zO9KUyp+XH51^N|d0kJ}Q=6A;Zg!*hM=r27C(rfgs|5<6D;4z;Xcm^r4{K{Zm|DY{`
zut0~e_Q|RsoEDe3{jUoz$(VT4($W%RpYE>x4!I<AxYlgm8bZS^BBHT8Qlkk-tq#~!
z;`IB|*>QEY0AMjS2&)6IMAU6c-2do*uYdXk;)%wOeCbM>cAkzoScjdB?K-d@MFO*{
zbKty3lOI+as7?;XPZWU3C<2F=2LQ~mc(v4<6$t>G13#wgm6*RkDFr}fHQ@IoBxb5>
zPq(RIlT_nAJ4y4SAF9^2wrFy4a(?`MvNFOWeeIeeWWd(_t))}P1~BB8mX>b$9DY7?
zE5A+Q;lm6_{UPW*^?(5bH7c<ht*vPP`ZZ>t9%6>kIPl0hDj|UnxCHyBPoG)<HWbax
z(>zw|cNFC0Fui|Y)!$AKCqyPsX>f(ZsQ&KmZV(`_YG-~*Pvh}^p5Z}-Z<T_QQpn2a
zhtLY!!CP(r-O7A@9BU!wv+r606s5%Hz`ewFP_V@6J3A#KWBzUf_tI0{*QLzEPsTj6
z);2cI7S(Nppzf$wZWRUj_$;7lh>ua7hlZ%*m?5~Q5(>eT5QF9`BQ-bDzXx%pc;C}s
zjg&op6|n<r357rpb!^yEa$W4Z34|f!@X(trfC8ut&4bTjXwXW%Ws&x{OHYEw6OBfV
z_sP+Y`Cy@u`QFBi`Hzg75Uokh35du1H#Rd-21R4z<e#-}c?6`SG+0<zlfa+MJKpeo
z8Xq?~1m_Ex6VxwPK`0tejz0iym6wZR$#o@N9W;||4kV|8)X2`orCj3utK1q=rNKi>
zOCmNqET@U4XE9zz7jc{V`m|ENTeTQH8O_v7PE6#ltE+37{t}&O@%{UE^S*42A4dSz
z>z{~)xEtp4xlESczTA>$prm9YA|lF|d!La(fJ_2Eqqsp}R-zZ6UqCC=u$$I77!@7O
z0okCn+Ifn)Gh8XX`uEH8)T_~3)5n16JOfG6o}~mNByUz6N2)at3P4`X{GG+TqS&j5
zeq$Y^QTf2OWqEyl{dm1Pj}p}N0QH{i>-+flv=&s2<^Fsb7#LXXv7Xjki6wb>Udq+#
zzfRx7U1!*NyqIc<H$*BnCM|4WM_fp#XTi4ok)MnA-ppHEq{MMt=Iu<0FKy$&%GK5l
z9DFivUsJuRqVHB2EPUH{5+&J?AmGT=BPJF*t8;T)f+h|80-ZfQbYOV%&tc~-t~>(g
z2YB(`Ev87@#lHgy!(+Lmt))GRBXbLamO7>NH8${CYVd|?;48E!efXJ`hx{xuk!|&Q
z)SXBP=wS}0aU*E5#|mzS1`Lz0Vaw;wA+Z7mw{}*mr^tS<t_mFyzylutf;@2vSMEyL
z5Pz6bx(94R@$c+cqryj{%2rm{XaE-L@&DKWjt9QX|B~b!yFeLp+uyk!UAtye;UEaS
z(ChwqJh5C{T*Od%5aG%}rd$U;b+fb3xSkgwfV}Sf=a-oEUaEXxMh9MA^0lWSv;z7;
zpFZ7aU0Mc!21-y+-sPnxz1D;ia<uxZLKUHa^!hqB=3)!@nXZSYXRwLik#@-#R1_I3
zn2TS!yLAqcn|JQeA}x(4M~;*f6s)jz*iE1VB#VxElcgu2c~kkegM}Aki!Ka2SSjm~
zp|YvaCsMwlC4)!}M;?{<Y|d+B?S1qze!Xft*%Hjo&8-3@^Uac#1jhy*ZB_y^Vhs3a
zNY7_rfM<}_$Y7u_S^$cH8~RoEwY0QW2K5=Ltg%;ys{nh&fS*$U)rA7__rYqTvYsA2
zaDtGJ{`efy(u|Ng_a&fKLY%A$%d>hHiUAu&fT;nnY1=)32>7<(D(O1^YVykVZ3XQG
zAm_sLBq6ib2+Ec4Na){LMbJyRa&P|`#s?_q%~GEN(2A3EW|w-h5(b$Ikux`EhYAA&
zan!nP{@?+Pk1fzWlxybPH_E)n23$48XZsv2zaA#V?|3M6(FZ&NAhES2n2KigJVL@E
z!zg>SNd-2XyaE8CF#_&=)#dSH2r?YxXW1iigd49-@`DNOTOkuX<gHH`u$^~0c|aa0
z85ly&UACNvQf5ZW)5YQy01pj4y`V4AT&kT=)sTd?K9AwkmC9$KrVhx+xX{|DF0+@>
zWiej=R05G?#D@S;03S^fSJm!=L>394`T%IH;NVcL8(+M*eTZ4}OP=EapZ(42kVoZN
zI)U1xL&mvNWpVWM&7%u`o;xg5RQ|JH6I6GO4iCRo+8ZqFx?6mEcl~Z&{|~D_U>_1D
zDeC5zq|I=Wro!}2vt8#hGBQ}j#Vh+<=%}epH|M|qtaj!APWj^LN=;T*(a>?6<9Nfh
zwDk1Mo_46fBEZ?Feh0F=Y&Cb5d~yVc2n<ddGuhK0M9D$zuLXc6>@ZS&DaN8Lf(f!P
z0m@Pr76>c!!}tEqO#$=!@o4ZrDy}=M0s^G)R{*4oSP7dx0?5oL`C{s(+@2eZ=2C}4
z!`T^oT4e4C8IzKVO7s23u<cOkMU;z<k`oilJ2?rPo10V5h2BY$!YX!?=;-M9v_945
z45<<-Xl4DL&Q4;e<0+u5<l0)VEM3i#dDu=7@Z!ZyzqZm}U{J5E#TW4A)uX~PU%cf=
zT-&nr3QtX6I|=(=)F=CcjjvKtBEdz@A*heGmKY5j>DNeggVqzgxw(0%aSRTyrL~pq
z!UYAX*<_DNLi+H}E>AvOO86R5N__va$NdO<+lXRWyn@Rz?e?R!G+=dkC}Wt$gxCV)
zce1OiC0oUThmawe$$7}c`viyQ6xcf>kI4s$Od^4`znL=P`EU446CgbH&6_tR-IwVB
z9>d0c4@j#<T!@hg;9|Sm!;YCkE=#hu<554zs>8xa;?y{ydTB8`pRN?{QcJo=r>b4H
z{m7*<a&65?(J$-gPrd26xv6id@+xX-3LYMkXw<-q3-S$5cO0@zw7Qk}02&liB7m_@
ze#B>K$x87J0bfaTp86ao?Rn|Pj~@ti<Br@qmfgvifN>o?XEfo^(9p#d-MoHmO!+u0
zVfWww3T=~;WH+1WjbevhDc*r(u?!{|Z~6oYN2QbFqZ0F0BJbVbfn-atAx2+cUm&fD
z?(X8HWo491OyRIn@8i9>5Gta(-akSkB6JRyV7>o!8YfS^D~0D%`@c!7asR_`Pg9fW
zxZ9?J;r{*mGy7qBC(ITLRa)PV-p=g0e}+n5V$_xDEEN@7Kof+K^RRuDS^FzCZqNV$
ziJ%N3h`IFwKsZ)^1&nP9bYOnGQFecGL34JHSW<R2LClu(=ubgPqjm55D|K80SmFx{
z3&IIs0V&*S>VGVcH?n|<pDU$*+?eTH?^Ty&<KZEM0!140`XnNgsZA-JvZkgcWM+Fw
zzY3W2Y%%Gh&JOkq7h25zQ%ZN%b6}dt>|LO?aNhEoUeb%%e!`yz0R4OIcQcuv?NW)u
z5sCEfg#Vq!Hq7K|7{G#D(IAJ22xUo02?4l%mBZ+(xVRvwPkzJGLsr8|SE|YfmzL|D
zXGUrs(~DYBfSrmTemTo1>4bpucmU(Udb2u$g$U@E-+eRsi>oHTtupVyH=rmYYu$F@
z91v`1>hx&z+F2oOT0i3bbG76`)u{XVhU0x(YHI2U2ze#&7bpmx6*V5$cCCi<Ib0^H
zh9wBj_1gT%AOu&A0ClEwVX9lnr2`k#mfl{fskX?rz8uY@!d?)bT73>$Ok(o!{mU-+
zL2)f={X?O4ywT4O$|!ViZILW(_En>J@2V}G7Jg(x5}+tF`k?bZp6C$@eHT?@O~1Up
zSnc#18(4b)Fw%4X{rBI`a_eZVSkOjl7{BO^EeJ43ID`TJ11IOZ0j11RQc0}P<;%Sb
zQv<Io-M|M>i4fKPoWLTgTm=9?6Tl@9Ll$+YWIw)QlkYH6(w%O5cIZe(&4LeNVScI4
zi<d7a0S9=TlQ?h3vqBC7KX%`*K6)c~(MKuYkNjXzJLaG$44ldQ_oo+7g|UM5ozRCt
z(992&ah*MT)(O;5kT(j{(tU?3Yy&<2&YFQxSq%V()+Jf#pOc*3yG-i6w57!JSI0r6
zb~^Y|b#+uJ>rT?d!a{IUlib^3K9GxTCx9j$eXA@b<K?|+J$Jc6|Bvud%u&gM(U)Rt
zu~moKIpu>mlFWmJV>_i?lstnOH>m*eY7&)uy`-TYHxZ#rIV=+asi$shz(CSz0&!m*
z6Tr=dwL`^%1tk=)zmUiG@9*&W*>C4}c1Yd+0XoiIEv*1x*?icUKDLJU|NJbG*XXr7
z`e-olz`&QJ!^HnimgXU5eRER*LPFTI34ubd&HDbWgAyZSDE}o;*>B!e>JSP%kEMbt
zCnzr_!Rj&bW=;0krZ*=iCyVWe#mnu7D_e?x^J*I?|7E=DPBQx8iO}CKHMN_o<17r}
zczHSnpFm~FoSExMX@iPY*zD68>;FJ>gz6^{q_m06h29_8GX{?JG_;|(ptHTm%Ho2G
z$Z)}>!ls`H<kl9@8Z!uE@JSiY01Zo&@nKMxURTK6!J=0G`xl6^8R%vq_Q2C>M}}>H
zZ9o}me!=!8RI0@u>*isMSI(k(InWI(1l1Mb&mF&$!0c@9*2fzg8^Z3(dMIvbGav;O
zsslNe`DBce7|51<@o9Xv)9q1lvVJnYhwI8YmZQdohK%BlV<ey)gOXQv@1QJVUa%)I
zNW13b2QX8Vez(~c)xU%^eDn4Y1U=M8T%fj!SpUel<R~O4NCruc2?(-3NCW1OSVCsy
zb#;o23wAZUwwLcSUU5-i_&r!)AfceB_<PXAuN6>1b!1@TBl>x=hYugBn3w`^L&OVJ
zFhb7*K$MxeQYrQD@bHok=Lff?m*2kKMJaroxP3hJ3Xq=?&%c%=jFNZPTbI-TD=E*O
zeeSk2@OvREB{nwY>Ju{M2*#Z6JX#=SJ5Pm6m>=zL!FC%q>&?XzO1!pg;A{tew+%ti
z<`B$-X7L$H#g1z%_FxB@{do>iP_$YozQ4bH9_qc%3lHFO|Hw%4G0!=shZYt$V{pI(
zdZiBuLArCOTUSzS=UMVCFl2gURO`y~H&SNr%#n^|*?lRxx4Mzc*9=#(_}jMMUH3<*
zBPXl6xk#w-6%PhdAm5^(%_QZTU0UN)p)U}o#L;D0)_Fy$zP?@rm;q>rsX%okeGfg3
z$T0wNQV_S_ppHOAQmD^4T&0)<4XZKWre=Uo&}Ng9lS4qP3x?2m0OAS(A>nn9g}>!$
z^YKGwJUZBE!(id8^=Yc70!M4_?2MibDBEt!B?Pq*?INv%{(?%<sAz3t!#mrZ6pFzh
zfx)bSWK5`@goD{MBO@VE0IAP?Wh6Mkp_UL7G2SOv02W`ucF+<5T^F316V$HDWA*3B
zWe<fhlfKd{zPoHFCCSLlG+X{z0=29nJRyQbHlnUhnnxj~A0p{{m7@u&O<%ck1qCT!
z)PX{chovJ$sA4N3A~LhzFA1U;$+=5spki!ByAgnY6P-ehE#VSl*#zU;JwwH2I5Nk(
zQ$+QH-L9)+Ola34O)=E8L{Hf6r*I<pMNz{(FH~RtkVUn>_#zV^1hs)fhHIoN{my)A
zjOxWu+7>_RVIBcBXnwGWh4W5g5VXA>{46$$6M29K#1rUau;1|>K6Gk6;gCzev$Em+
z0)Pda+vw%(N4YL1I`A$RXc*6VG~aW-_g;iD1Z+|aV&$n^w*dsF?qKD^k3SnIA%#;F
zw&~*zV-O<(GF2cif&Y|F$hZY+XS7ua!C~5+cm;GawEu!2(5q`IwdfSK?4lp>J@`W}
z>nn|R7eKE}lyW0NRe()J@kiZ^sMsZCVkOFe(y!E$q|0cd0i<mdrqtp^39YTI^{Smx
zdpBe^zsc)WIWP%=5)7&u)N{W<8vdLfDH8(0Ia$)iEU%$KiIykNpJR8W$Ts#J<@MV@
z=|^|jva1R9k0GSuWQBYUg{i!UNA+aq2Tp#&Y6=eJ2n9Pk{^PA8zmLt$II6MyAHj!8
z%$o56x5xpBKs7lFk_#8=!th2q<jk4l?}vwfdrrl5AU~GfiNWAnpMVZryofto<=6&A
zjQ(OIM}bZ#F!Kj<-AVI(InmVfcz~e5o<N&YY138Q8HU;l^^&qjJud-|(SS~r^&4C0
zQz%4>DR8PwD<4*6Kr0}|#l_v-or;8-NH5|^zN^Kx%BMiWuddcn0Z;}~U<1=>fd=f$
z)=;|Uo*s2)63AIBpxIt_sF|Tv1EC(2YGp9b@&1yK_s-}OXf+6BSTA(s8dNzzWeyf*
z138aKU0ogRkideJEG;=934}q7&3gGV6^r*8R=K`?uupAgMn_=Go9H}i)8)*}%-~1K
zo=WAQZ)Nw}0JgCM3lgyGNiLulcmsktia8+g;UZ{PXrTm_8VVI1ROR{vfFn2L<rN!!
zkF1w@ul?^VKnq$o7d&9XUADVm;`v6LG>oE^qrDb7jiCQp^<_s@FBN`h0Zy?{JO-SR
z9|>|8t>uX|Q2yF*xX~FhGZ3ZUBXf}7Tsh5=wek0FE0wx58+tWV)&WIDMd&Gb0L(Hf
zF+#IQHH=>526VTG!<9Io%hv*-O_LsH7_Q|e?sbWSL!RO03~&aeFnTs9UGAq^n@&zm
zz0fbv3A;u0X^Y@KTY#j?bPy`~<YXnE1rkbwwEW0aHCkzws=oobFK1YgJpDU2bdhg=
zhc?YZT%x3Nnz?HpMGOlre*gurVpLv%2j4(#2bwXYB@VRaDo7t(Y6a;-0Ik^S>g!)L
zHa4EF9QtmVJRJGMAPRjjYAB40qYsRlvd|bh8jf_Ri1_qaT;B=OB{l{Sz`s1dxcDxr
zG-w3fcw^5%&lZyPfDn{9D1oCa>72xfJfQV>y7JrcqLk3(nuKmX4N%D^+)JPhwnFC&
zJs{XGE!q!&Jc6ozAzQw?zww=+J4FaagLoD+_FA}D^6<;+OX1kYs8kA#v~M;QX@K>U
za0{9g?~UN*#|0b{Go1kxXm(@g<W$=KKzd%h%j_1jZd*w|%nR^?9XQ~CQT43_)R`7w
z^Q~Vc#+O}vbQ6z^0-StDs-;X;#_A177(`W|FGR$`!s6U1P!$AT%{NfOOoet+$fegX
z&`CSn(9mGs6(>T*G2JQkT1-Mhw{G)?YMOi?87hIkd&i2Z&S)<tq`bWRQvEUXA&3xj
zs9!>6aPUYdkQO)srew|7qmJ=B$O?B+=LU58Ui0|TdcUqoOpcw)xcA@i&HIr-K@>vu
zo3DS?d6prpWX#gs1-e5TO%R&I#Kf&Ysxo_~1gb{Z$yj9gQPEyis6ON8EFg3%5PL;n
zWvC1Z)gmpVj7jv)4YCDuDZtjblWl!iX2#tV6%{8z5urp6h&SdI?=aen0n;b~ctH;g
zZBc|VU(;Vd)_QY`;6v^0G$~G5Sr+Ju#7Q_ZTXw~147XaZp`9q`E6_lDR^adS2<cZy
zS0W%Ic>_%j3JuNsGg(~#wP(5GI5V090fW+i36hD)p<Dy1M-&D}+y;#*j1qaZx#BUw
zw<cFtBd<K3oq*GAeZ!+^7(wtpejTO+*aZb6en)m(1Ox=NLk6HO;MFS%z%yoGt<0NV
z*I*FBc(^qk&lF?{v?xcx0UQ)%ti*JJ3U39CsE@Xmi1C^(@&5TC%FN6xHm080!1w3c
z&gSMDsYLYzaZ;GFK-*rTMY{H&7LYLo&MYpirwuNB&(Y+{A<xnFJdW)X_I&zu3b>(O
znI-9^%a=i-jPyO)H=F5*0pGla`5~*z8`EKd@0EMSZR_BYurLM62mz4-p!z|(=HNPd
z#irMDaQ<6l7sq<5VQq2;iXB&}`@-5?3%yyYq%J6#h2Dvt{6*dr(CV%v{rF@99T4ai
zJSC0I-tOCKyjX*_d~ZdcyV_gwKp6;gEuN~vwyTs;4#z~6+JBQS9p`ed<YV!39E8RX
z^YwT?(yE1Sbj*IT?v|k{r29L(PqnXPC4I<S@>6wTfihdc*SDdwt@F|rnYK`Yj=|zw
zJ^x?Zds`1kSwT40z?t$5Upt*@D)`^ArERSIGoYDuz7^tNWyNsN!>rRH0oxg%+cq<|
zF0Z70#FqmOjXDNG!Ng=#xI|&x;C&8!r(atXASqV~GP_-||2GD2#I7%!5(toS>Kxvg
zGYGc~CU^1qH*|!m9_TIQxpO=|krE1|GKkL0iXjPM;aBJ&Rd$4R`)JjoYx54+LD*~4
zDj+aWKJ%U#$T(z6-Wy-Ipq(NH(Dwbq6GtGIChHy!4uX(7!qIdIvZ0u^E36_-Uo#8G
z^!jxyw0H)g`sczFRD#V=B#AuTwnH5UBDM(%qNvxep$`mGCXCm-sA2Y{75b&G5)uqF
zTe;B9&}><<TS4B1U^53&_oGzCXCEEE*@e0)(;`XA?GyBrN=!du2il{>ieue6%AS%|
zpl=NStL_x|5V*Lw{5+!un^vBloduaU`?{as-~vO_YiN$34~0@flb=Q0jvj_1xDYvL
z8dA?gO~Zn69T+`vuK9{fOv?)L@O~R*6KXuV{$V68Ihm|%C=AF<>jwN`SnoxHtN|Xf
zN=Y$X^V*aHl20pS90p<ZcG!C%^X)6>RvdkRlB%Ks4>ix?-Bo3*;m9>g=)j}J9Qf%Q
zPXj;|{esF{oyzEAdKc|UO@UiOEzHxueLwU4d#OI6v*0vh7q-L^8&QVMmC^IiA_|#-
zKY&G=2*iY-b8y@J3!D=W5ipsfy3>;)8wr90$At@LG&D5g#B6ClN3ulD&s&W^Pk$Gn
z^wsOv&w(bD-a817j8yjamhQ5AY+-_p`et}uZ*LQH2FdBAJ&Kz*V5r3$LJ<k!dhR=y
z^u!68mZ%e=^a78JQSaixoUjY*tTu3b+)Wx85NDQyUAU1qHZD1{po4nsE)n2g?O*A?
z((?aW=OR0b(HK@_OY;gjeKr3|;`7Lz3sc8EWc}_MLD|XXNIqtF*TZ#X<Q$-Eg%LG-
zZs2RpzqTI;6tsFo5S=D~Vszf>!o`cH7cftLlV;C#Ai*doIDGzeMn$1N?DWRM$CMgw
z4`4LV<#Il9A#L$9qM|VEZb$gd+>GHuT5cPnhsB7irqKVzykt~7PM}ct0(&kiUK7y-
zw<&w>osH|PUK}{Al-k$OVMB=ruf+}))n@hi2`T$^PapNZ-IUlT9}8~Wgx=UPB&-s#
zD!>vg6@rV{PA}ph$9SdV^7%<e3O;@^F;Nqy&rd_y&vkma(j#3Q$<<*^;W&N|hohnj
zJzn-)brcCk!oDwiQx|^@&9Y<9WspwLLgvtiTzJ^q)NYF-mMm>SrO`hzlz}Zt4iDlg
zvtyvI|DuDFt^M~6^bq_XQNe1{;|PAWFvzjzdPyfxP}ZE=oAGlimBHA+s&d)?z2r%1
z+AzjFYDpzbsZEo7I|!azM{cUraJ}{`dLi$>Pu<65{jst6!`@REj6?E2oAUbaMJ0or
zfACv>j<np5V8%rJS3$YaaJhSG0r@*`1z}1J3t>d(e-=)TsZrcO+R?i*{M-6izmNef
zG6b=y{o?ef+7FP~Vm!DB7FawgAxZ+PvzgTv3C_+HM$W?eLqivgB?$t8a*0mgcNQB>
z8T<d-@H2lRmmCxN`H+8~|ILmte8raHkOvDfpI*YV@Y=CPD!sEUD5#?&uaX#C^wveA
z4T-AufEd|iC7o2PzpKl5F}JiI<8q{V`HNv_np8i6|2T_b|6KI;!jjqh%{ai=5M*1k
z&%g_ZC_(bSpD3xGKY3Jf;F{|?Y&Dy0%AR{VohmVDbEdezXYO*wa3bBSAl_4753la<
zv+0>^@YQ|n`s0!@L&X1modL^RzDy30OZ}*`)TY`@si7!>Zk$rr2l-G*;G*{#wI27W
z{mq@D7Jj|lzrv3u5^ZX9cjuk8ap)P|x#^)UPSc9Iqf%lx_IAHVnl+}X*`@x=bL@J;
zzm<m(8N~mIX+X4aoqAeO1>RlXXXGZ~T=95PIas0z>)#8@^!9pj!wkHN--oCuY$2z&
zFWJW0d~;Eb1wMx>HGnXv(I(cuHqcZAO9i0k7p`ZET${$a`-f1rfbPbjXowprC$ZY>
zYpw`Rb{BL=7d|nEwfQP3$w$-}!L*yQwAOk><8taYzYGq#)dU{2zP^r*cR`E8<^wl`
zxn3KlJpnT;&XbJI$=;${QaJYT*)uVWf)*CoUD$0=3&m*AdQZ{eQCBJFL=Q{w=i+X&
zIG?8g<$0ew5WSHzU0^j{*`i;92qoo^iVt#4i{3?^{^bY-=e2<x&v)iV^<S`0V;lkB
z2^tA%{e^#bW>>GR7y<S6I_sy_3qKSTw*Q$5-ureF+wS~gWO7t^#sOa0Ii(?13XTJu
zvl{3Aoz)VzX-tO*;^mQDyuRS;;p?G{$VGj`PX9u?@5=f3EA{K3Md^Ph_y=cx;R1r)
z`ce32m4{4IB73?y1AS5y7^>r+wSICwcdx(0VIqdGX0>B(_zP?<gfmjyP_Sq|J==WP
z$}JU4xOWJSsZJ`TVyV9D0CHC2{=aoB++r1s$6&F}dy!65FfvW!__?7lb;%rsLXLqP
zLvW2s8`pYr)8w%Xz3%I*3R*g+aoHkH^m9vUfYbEp3%twxw9m?=ecY(SgBB_O$M0MO
zOrJI|sApeeViCF&xAy7fIK^Kb_U)A0d4=p}HLjh)pb~%DrVIvDHlJZpcfI5yiK(UZ
z69kD}zEhZteO9CX--o6Lu?4>ziD-~@5|LKQJj;|G%~2gv3>uFPTfX6MUPFQXCC8J|
zLQO03G@L0P6Aog>E-^CC7xSBlb7y2+zfd@GGIYJ}!<2em-Fkre9=YFpw(TPy51an)
zE{@SbBUGnuOL@_dSWl3*Wzg(?>X?qa?menfKDx`H2y)P!!51qe>%@!{&#7<oUC0$W
zJ!6%d@R+}wcT<BKbxQI}#V|U??l@{;mfb=sTI2s#GF*XCHF?t8=Sx2#LM>A89P1};
z6Hy<c@9D|i$dI2(UHny3MZxVnjwx<STU$ElvO&SYIe*e9G_GmJYBcFker(VaS>PNQ
zE4YHNUK<xbF|ig4x>%B8{+XK?b4Bqpk>lH-5q!8>ElG+7)bJbk?%lJedl9sF&KB3e
zb020N>k(woWrj*lx9)v+1EHj{IIkY_rQqz9e}_vGY+s3FN5|4=n|IJ2aGLH_{`gC}
zokyj?)N<-L8G`OJRv+z|r%91^{^A#9?DBiq?-U2RmfU=hFBOJN0TjaV_K{!aAV1c2
z{zJ)#j$Z#FdriMU3&qbH;UfaCaIXrflY7UwQVQAalJ<~T<EMYAEMUh`K10=y4}JzN
zmWCbP^lZ#Vlz3BzpRbmAo|Y@Ql}q-G?|sQV0`pKsmFVk!<yX()4T%cF=W|*92Z{)*
z>-;^21NM1V=oq?GzV`q2Z@KL8=paQfA)qI7W-8R5?v=Lb`O&<~|CzlOBkY{Rnx2^m
zTErK}q5_;WpljN=*py9Sa_A~3Uz<`S!It+eyLz=?yJ_|9<L@$EEc!y~NK;ouoAcy7
zG6ySK)&g6WjfeLsUd${Ts3@!i1GFeOWY>vdqTJuU<P(jHihp#TCb3Au%;<+=i@+63
zj>$2*fUGkQKH{`*lU$ykfpsC~oRONsivKCPX>Hy*zl}wK2xic!R1t4-dRLwZA##%}
zIX_&M2+Xe}ec6(W+=vk5WZyYGhnF1&IP^0YkYv>|+$Qh(o6a{S_#RQ+Ie&`?gIa{{
z)FO914-h5114;g?j=DHjm#*S9J$H#%v|y1-+kf`&p~blLY`zSE7JiT37~|+=3z@uw
zl%R3+FU0cyBC%D3dNb36x-M`0<<#7<zL}PC&+hUOZ~tGTt0>yS*GTq$Gf^_UQ*0}q
z<T2VTZScnRqUxfx_?g$9(G9S$g>xE9E;{MD@j3p(zt=kDzvo$S1h#E#Sq92r2J-x@
z>ZR9;ev(=IQuaI`IUDu#pDV`XV4Vr^f1vkns!z~UDm{%*kKoz!rK6L>a^pD4VdloK
z=U-A9#m7`1c{Aa#YWVzHw4E+?j^x1-HjbR-v+E)R3XY{J6`sDoCi9-3@yer|P8W0@
zu0hv?eY&PxIpX3AU0<fV&+XySTlgTMB$o4qJ9B?Ep5E&8PYUD2!yv1Ze1WqIVRu-#
z(rQnsMmQW5t4p^HA~Z+M6aO|e%yPac5*hFqY9P`Vb@3KNL>rh%wm4OTsT8R+M*poU
zH;qv8)0d2IevhbmQRlq(s@J0f`(5-Lc1Qg60qnE=J}7*yOnYJHw{x53A&z44yL<^Q
zf4H6ripFIi*|E4yYf6;}x6t!}P8{F($@i0nNrc3eN6!QrjSHR51!udT0(WXhAA;S%
zI;YmqeI~t^({%Xn^zAP;>f~oU=*TJdwg)@)Dp!T1%Vza1s@_zn__rGZtp;*dBmjTt
z`AB<Vgyx6oMLRK!{(YqUrL>&q=s5Tm1HtRWc$1437t((HOCAAzXAEopNK4dl9Ev7e
z4fe4RuPn~4c_epp8{yqC#^KJ|)=HI+lSL*fyUp$)U4rJ|akbt5z(Blmzp>+Dll$$X
z^{G(#cUv0)k)L~iuS+Kp#x7ZFj^R@vI<jN<LJpG823O~w3;w>|&M*A#K^by*sLP44
zD$^q2mZ8mmgWHeL4fkt?d)wn&uj^<2cFw-HYI;HK?o*mTwQwWM(qJXwcD=A(Eepdt
zT-F5QyR--#6rNm~R8WdO0)m1Sj7B8G$i^>0B1$nQ?fx@%j$2bcl(g9~c<$2+VU^aV
zhN4)*W!vOxJ%g&9uBE=u6||lRBiLv|?pv06$}d4Y$|zBSjF7)9UW&L=b8jE4JB@QA
ztzwuNQp~eKTV&>~Ki{QG$k~2HD3W1pi54#UoUM_LvJ-{ZxrJEer2~^Q3*mRTo(t~P
zMAW~8kX?K)#f0oYV*#W%bod97AdDor=*_{GUamt8{QJ+Ja2R2E07C&GZ7EMzuJG#>
zzk45|rl6I^TYc6rINqr7&YF<lqx3Z|55C8w?;?Mh{pXe&myaz^6Z0y_=0*}31{7rw
zJn^9m@gOU*2qxXpMjz-V&dWbVNa%$TR2IK=iwH3Vxg2^lI4~M?H@X!_*cW%6>ct%F
zx{3$f@w>LpV!DC-vUDlLe404fm*du3KN453ho7;2sxW=LxXC>c#i48tqNE@T%yRCm
z!}oYlp&kc8#~7gK#+NzT)P%Jo(7E{pa~UPRM_$l^ZU%J=lo^<PcCNivZ%kf4Hn4X^
zLZZsXgo5(>7H`v{4fcSSr@8ld>@MqiSb2Vzzg&5Q7)DX)@~qu46SB7PnaZPBotmE(
z%YO*lv0`fTvcfd;qARKCO%gnu2<B_2<3O?G=fz3TfhEKgCXZoCj~!L`6+>x%ueY&0
zId`1DgpZ4R^906e^0ve%D76FI@XB@i_G8=_-kQpWH(z?>gS7b_UrrlW)9RuF;LjYx
zYl0d2xUZJbIOd5BeDUHpOfk{?ng!u6ATg0~VI0KyytHfug|j(?)$uk6R+IYDiv!2f
zBaDD8sp(%<N63qJ-ap$Dxq`FjMdiDDxnlCgg)|%OtOQKw4T~qVG&dyqaWKlvo*8}i
z))o3z6~A(M87QN0teKl^7ICcl-v_6-e#plX7L_B8xpn1k<~Kn^pT82{2ZW31%KxVW
zEZiM_8y~-Tmk%vrb0tnv<;#$T%D2ix?(erZ<+2+$l%3pDHMXmX_V9)opDp94OgAjr
zBYjq)FvTYUJs|2%s?+vfqdpWb8ebILdT<^axpq=4R7$tRy#LSWvXf4#{XShq7QTzc
z(Xq4^@BD4cF1@ED=Od3agMXaW(9Y|<-L$v2r*h{`>`DsggCII9tEdpbRHCr$06z?x
zGQ-SV_1<g(I)~2&dLqnvJ<Qxb&%zQ3wS3&yL?k9CH`bmZZZUIxqsjI`?L>mf@kgV2
z{Wp7_+hQ2H%fy+(#>~9av_m!k62uMCZ=Qc`6MFD>y|eo7=b#R{#GqUS2mt83ew_GN
zNgUe){S`zQIu!YaHD{qW!UA(?YtUjuXRhXdRq(;!{Yw~m)Uj!Vp$$~Lg~{L0ckkZa
zvJVYX>2?>GSd73lwfGhYmzd<aT;pnYxHJz8^oPCw&~w8i4d)a6!oquinTr}(I1?}x
zs&Di73yi&l!+=oj%Uht)G<w>;IodCr{)wI3ZFRje|K?At$|lpwKE0u!?Bzv!&^^*>
zv){*K_&SDF&mn*2_CF-r69*`j;aaMG-L$@twt;?U5vWpp(V2Vq#};2ViWg3|#|PY|
zrT+@aWF-jA@T4`?$Nnjm)5-GIo|T;z3|-htz;<dbf<^&fR8ocx6*?&fQVM&n2KgBE
zi()we=ISqdnZmsr_E_Jn?ula={A5&{7N<w}Oc(odB;6LN;Y%t13aq|q?uihB?cRxr
zQ2COH!WDu|_k2>d9!)ars6oU<jv(s5^mKuf`@$4EVScUBg(g=n{8XP>)2pq+11DV!
zN0<2=mKnsG5mod6>EbT#M_`F~(fmmK*Y@_osXPuk4ek#Q87cegq^^Nhq2a?Z;=@e;
zpQ(vkaOC-~wAA!=x@wo=_~RO8=_4{9PU+neryc1ps0~$75Jbg*_VAw@DF<9lpM8Iw
zWs!*I4~dt7LiPdA5Q4egCa7%-mVSs<;TlZ}T1IyMayC_r79#nodjD=d9|Qayz@_xz
z*VfZ-j`CsBF~LEYVW}8Kgw*mGc6|ZfU1p#^t&YCUg|dZaM)z)2yon4lJXMT1+{~zp
zV*&$-lH&2Pk(K##D{H&)%{(eGw@C`Bh%vH<|GiW`StU5G|9z%Aiw=skptfTnLIb}K
zhV{5y>}1$%>@HkxIfV2`gLgi;`=Kw5^Jv1<PG^nEov%-73VldTF;xuvOTC{T(lD$w
z`ulOLM`~R-jtZ$>iFo!pD3=T^N3%acCG9csp8JePo=CR5Rqzr+mu0IDOZXqY_D=cC
zIC+pu=>G8_;&jZP@IOOoEp30eo&(6<s=9VfYvRRZkPX?tLTGRpvviB9*sv;yRa&H&
zl>NC+qD*p@=AGl^f=VA;LbIoT6ts@OO)$Mp6dG<hwm!(8dg)v$vcsnY$>#??{w@*y
zz-b&kM7yg-`%*S7z6I5M@Lq!bPpiDx_&m*#w}S-nU9{kz{jC4WMdIsj<wR?>^Ezzv
zH2o?i{LK!Na}Zj!_M*hXSZDhgPd{~UFaa~8bEko@Bq=^ExDgL|Wle|7h+!;4U(L!v
z__aaVqR`L&%^>{bPS{6(2}LaA(uR@A{#UCK%mcaq-CvHXE;bJz%k6bB{AIY`G|r8n
z?EO<t6}8`Y#%?A{AbC|Ny=hCK$1=2a=7ECN*8kKT%=v<M&io09td&Icn+vuY62J2U
z@M`&^Z)fZ8S;xFhBHwhbBdI@Pr6^OJXn&#BwZAASM5(Mr$|(CnD;J;*>bz#VCq}C5
zX}9v4b+MgWb<^|Li8nN+^b0Tbo%`BxofU$9hyNM@@-2Zs$gfzp;%#&`n;EGot`oZz
zd#+$5=fr+9Tw_bHp>Z|;K`A%avnZ2u(>J`XyU~PafBZ+Lw@O$HgHn{7Wxox3B3dI7
z!+0+|`mz@9vP*nK>$;a(TilxpFL$})%aaC>cmayiP1`rOiKjID#)#OfPCufwIMpv)
zZk$X&z7e&8FGY&ELAkG1YHgNpCTZe$BYAnlSwYJgrLhhfGTk@?h`{-STUAMTa-kTF
zfo0p$JS<UzGqv5L&m8``e=?m+e5MeC%1jFVh4OkB*AMpDEYg2|x!tx51>x<_J~9h1
zeq0)AXrdADzmi5@Tkt!qcRq2At{;>YIL;jK;=iO9z{V>JmBBFc&RbOYZaKdr(YXCD
z`nD<vt2O`1Ex31nZXj6K6vs&rplxk8u^?r=YbAVhXA8waZCbA9z-&0SoGA#9%@Cr`
zPp7<C*?;0grzjP#*n=WS;ODvJcnmSA<<Nt~fRyY~)W3DH+0qtudUz2m<jGFND+h^f
zh-TVUt670>!#0O#-V3YzTL82~A?7pZ1kpr{Tu}C}J|N>y;Wb)C0h8ozT11=oXFsCv
zBvMA<thx;;O10laFU{)|vcpyU<rY1$mj~EYoX4#wxnIAi^S+OW{(2!_|NZ%u1og@3
zOhd@4b^FAsTI8sQYJ{JsV+xj7nSZ{<3f~2;q|&{6!FD!AeamIONKG-i_@mu*9X752
z&i@f#&FdNUj|xw4q(RE^d(0>OlZyy>Wn>j_-~HOeruU4YU*~pGw8K*Pte2G5%k0^I
zqJ>!kLGkr@=I`wb8-0dJlgIzr?xBh9(-$O|H~6%a6fM`gTrAHm)j3O0e(P)R2k(fF
z51+NJgx9_?3V<}sV?!C-E_NEQTH}Rh?!ATrfGIXJh635TQ;0h&)fw@>vjEZ$zmvz6
zY)cMsW@_c~zby-0`&HFAwHo&PmVIFR=>LSI*Tq+_1MsYvVq<i0tYpn9#i)<pBx>Z8
zO0PF^=&euTsu_Q=Zpili=L!R+y_gM_cqu~AM+%kq7H?bX8Ft|oZ0C<$eX?;+5ScL4
zA1%V_>OEI7>qG?D5NK^veHy+x6?V(Ig<GMvi2d~2SDH_yd<$q8)ON>H8=b#8nBMf+
z`Bjy*_CtbeZI@0Na^f^f;e$q0<n6E<L^}3VOm#OoLq(0qkXDPyKH~@z+w;CP0lE@N
z3MqRZOfMO{iqC3JZR8!)GNiM+;(HERQ@!Ob@HSwC@)U4wzzsfO*Z*G7IP}6P?@;Xv
z+_h#L5Msu)BgWK}=Do}=a^&(=zoc_wD9w6dPd|Bc_eB^siT5M&m6K%(U$r|=?sJ-i
zXEm3CYWt++B&ap`QRhY}Z<jgcMY88Sd_}j!12vCM+Zjp<I7#CUgZbCvTvhq%L@M*l
zYs!%9%keb2q)|QR<`;fHBVNGuc+!^Q;W<S}!1nI4{==RMl^s7RT`xr1SLD12Qw;&K
zyV$OH=p}NV<w9~!6CeMFb4w8%vjqyvvI#mXbAqE!$qal23ilolFy47MqrK1jP)J6G
z83bFH56&!t>II7wO?3KV>&+wIji}#x<6tOEpt)iy%vX4xzewP6+}HM4Qr7S11lsw_
zjkFbHUW64X?a^?Lzpnz=jR-%XL@2(uD?4ACT-^M#h3tGueoX&Oro5}?swWS=qn{rG
zBK%&tx))32Mv#|Zk~Q!7W9e*6j%L;*kUEuHlcHD(S``dGU&t};TGvl-+4_=hYhKNd
zF1pO~NiW{+O<uRvv-{!-)$LYFI?n`<*+skG-xxOXiAXBftW%=8-_Kr6dbkhT(wg><
z*0aV@kFUJ=d{22URw65N*uJN;d7H+ugrKf=;ljS)&x`de!|KMZ3wbp&!-W0k|7l&7
zsvpJr2=t_dOs^}k!55I5JiS2c*uAF{<!bOql<$*S>c@<1th)FIwt_lOb4@D=pn#{B
z5+iS7x@OQ``M}vKqKX{DB{XB&Jv)wtJaDwb3XjI(?j~USHno+6U}TM1uvoZ`NcnCU
zOGOW{Z{?lAEVFYQKbZ%th|i$jv>v5bFO(ue;Ojttv`hVR4fJqO@qd|1(jh1we@~@~
z<GBwLb5FYjw;=9eBs-%2O6WZGy@z)P5aTFZxuox5Q*>o(qrrT7<$p;&dwVx+$6X)l
zp}?3=1h=xk+HDhC@J+ow<Z5)*^YUiq@~2C(MOvjhEvA*+O3HH`@vXNlpK0pq{T=*v
z8rZ2X4NmY|+{vOHP;W1!HkVBq&$Kf@`VzX@qqjKJdL&r$zR~&?&T==WC3@H2NI&OU
z%4%Inph)^|?4ib)p64!`<b9pn24kvpCSCk3@+a2<>K`%U2XS{iD(CJHYP6?J`$#6v
ze*P4Peo)L|l|NB0A`opD+dO}6Idt~=SV-H<by6og?ASr%ct6sGtj@Nk($@tzU<gUK
z5RZ8!!NU$4GEnE@{>(CWMQ?HOUy3s=A(+3NiGMxGj>$iIiQ(<^&{pGQ+cb$9Ow=_$
z*dQ!<?50HjaAzj<J^^Mg@)RKm8s9!+H4y`Sg5Dr&r%m7w14pfeO7N1&*BFT<eQJ(i
zzrCxfC56&`G&^*zqpp07%+3vC8QC?x|Gg0l!1cP!>FrC8L%ZTk|HV#yOw@L~%P^jJ
z0U#oCZfU)@W^;1EL9JU^31@pX>b?dJH+e*#6QH=*U8s<vT-nMhL$3>pW(2=WTJQBr
zwGCy3FU@uZ*+=fI{&`l=d+>H&8qO3l&OxW3Fb~eV%ofC5aiBag527K>hsrkfV;Iht
z1(mP8Q*G0(Ae6+FPYR1O9<|*(ClE4MV_twO<H@7UP>@%ZC&GCAQAT<<C8cCXE#CaY
z!X93$*TzSoD~d{ry=C22pILNWMArW{?`{S@7VOka=ck=m`qC6JRAKbmD-`Qh_A`dH
zZf58rz3P>#J|~WKl?Vt?p)sWu%dE*_t)ddBn4S|jm@w?UHzKoVapLMCO~6L;b6J;p
z{4HbcGD^Pluci3-nf0Vtd$=DJkqyU4P|?NQ#mRY2R3pVP8OzudM84C!oDqCwYJcgf
zlem#anU(CtE}B4u+p)XS^y_cx<!jL|Ukt1B8S7S@#jf^d6gKut5wK)kUEIQE!MWpr
zkok6PFWP)URQj5Zw$<<0(><dOX<UvwVYIH7{{F$D*mZn}TBHVc%OZDxWY_Vke8E~X
zkF^hQXy^{QabNfc7)%DUA>-~9eV@ouC&yFD+ijZNRjDkEBvTBlGr`3T{t5L3p8TO$
zi2W6Dtl>{~ccL^z7AkxiZ|?o!eJf0-lBnx_uxm3>cD+fLDs<IP49la4!)aakUG$N#
z7`3F`OWsJ-kW*H2JD+5gzsyFg@Q+3iY+qmE^bKiy?8Z4weeO@n?3->Y!4kX`7k5H2
zj8fSoLkp~F+g}TrfOh)t7e#)}Pp_V6_4Uy(bfG0#$%*QXPdT<^J<*ZN0xKULJzc(4
zX`eD?oa}zPsRQx2U+M5{EQ27_>%*@u?|Q=jaPbafVjYL&_V3FHeOL{lXc{`hDqFF8
z%rcIh5_zUn6Z4#8qK*6+4hy0<)bw$8(ye=i`}kyYVDiMzn26q@cWT~#(B*wD=#G6&
zF;5tXa?jaeyB8XSvMy{po=m^9&rWHU(tW5|ueX=Y#AP7Jn~8UK%q||BVD?96?|;-B
z9-0>Ep=CrQWPXi}KSFFv8e~1SuMo7Deojkz^lDd`m-%EesG0nd;_W1(6Io9xx2W$V
zK46DA3m-P(<ypx-I3z^xokZ&3c|!V)G2_E%Z306nB5y|0!$(f*L~kQ#cX~P-d~x3t
zaUv(@h~jQH1#dOI-9b9>Xk;r_%vfi*CJyMZlI#>J*;tCJ2^N`+<(@4|L4Dr;9C)P7
zM557Hh)rYxIV;t>%OLiJABo<hvJF*|AzNzCdQoCCou{M`acOxWEcOyUs`9VjFDWup
z{aRL#pka#1%J*t*7*CdNr#aUZGW+15C;f24RT1yt*st;uHG|s4ZLBwZeF=;Sm_OL>
z`h|Fgf~q&Rw!)Knj!$IeLr!`=;HmvQllaTf`{Wnl@SnQQjwIR)tQV!%AMMwjIIh+;
zXYj6W`=wY9|NK<C{3BKnrqOV(D^e<oXo!l~P}4-yf6s22CKYL=PM3`yjC+35PK3cn
zj1w>YrdApOJ1uM4ByW)G8Cu$(4VvAF<1A&5W<BrxS9;ZL?%43{TX;$Q(UqN%alGv3
z2=$AiX3}yE<UNBFCSdvs3DzUJ#4~3TP7W4%=#4Z(4vON;ukf_w@;vs8@_g^48906+
z7nvZ|#K(HosuwDOnYv*$7-zt(GsJ8g!xbZX-eplCtMF^lr;A~BfAgZqd2eOctL;{k
zw4Niqk?EN5vH!9TR;Zp7#=O_c>?#><>0PRvBH|Y=Zsn%)vV4q!??zb>|4-{dY$k!1
zsRhY|I&V~H$ziFeB+KKxnr2A1xwZbIAnSJ>;gLvn<b6cn>>_er-`nakwVB#E!)K}5
zv&9?h;-1G&{?|=|ueg6TzONTDd8@aJ?43a|akxY3Pp{7xq;ThoN4e<U&hLdr<Ha<q
zB&fS1DZh%9&-$3}>OC`O$<?NQXEa|nZ%?Sbq5s>dgUnPpnxB98%ttzs_MQdg{TU*;
z8yktNOilUE?M=^4ZEE;94~6M>T}dTMx4mWmcB!}1P-Nui%G-^7q-n<WUAeHkdta)h
zCu0&fS;8-z*Y7S>YEv>bP1(yx;^N;W8MTRDMwrEE+}qRV;{$T1<>rJw^eXEnY>yV%
zND4CU5U;YlQpo*@T+OyFy-OU#-p7huWx%>!VLb3#R1$L=!&^80e;9kqsHncMefZ1}
zLr8ZsgbWQzgLEn>Ej56FNH-`jNQ1P2fV6ZfokJ_qE#2KM`9J)g=UMO9_XBJ30nRyl
z?|bjNuIt)7<Y+gckQ;pdCy+$EM2x$+f(bk<`s+c>&d8BN@F~}H-9n{F^nLUf`D!vx
z-Xt8LW+7Z-!Uw@ZiT#mk@n_$_c39KcBg_u*R$$&)VxW-uw77g$AjT*=zm31RjT%bn
zdLsR1J9heaU=+#0ieX?phK(CT2Ytud+{5K*cdHcux+<ks8~kCL=dpY*haQwW!gwT{
z5V>S-X&fpJjuFB%XWz^ig3^UD{5N$J$jN-iUG$>LYzup{|L`#*!QYaH*)jBW44h)`
zSqoXA11s9Ck1)>|?GvPzT)3QZ(Sd`Q$a)K|S>j79+ob!!wnRczPxkzuGLe+|>6i2g
zQ+gOt)Klkka<_+Lbhq~7n`TTk>6}M|QS{n3p{5hG+*XU|Wt+=pvm{w#e2Z6KC<%Jo
zeYr}l(U3rK20&wDr9$EJ$FA9brR6NZ-`Z*J<DiG2U-aNS5-82k_nSjdV0F;eAs3@@
z5f|ba!B#zdr6lZ*nf;!?`7sW&;VGltc5K9x)LCNZ-m8N9`kZuFNFkB}U9)GXIWs{D
z4x+;LKtl}$HL2YkVyMGW;g3JLXJ;7lC8=ziNy{%{aG?X#WA@Srst;j=$f?0!KN@XO
z-3x+eU)cz;kW`?=0v1Tp4h0dAuY3$~HbjJg;TXj32mJ|JjsQ2Baxd$?JgyHLRL%f*
z1gqs(+-Re=nSo6$Va#yA5Y)9LjHD5UymfJ<qZQ6m!=lT~%8?_)z0@a|+G5q5s8Tp3
zOc%tYC;G(BcyCwtLvFJO*{hgBxpI}=(1~hYoJjfy<X=<?Q>e%EeP0@Cz2l3hi@t9R
zL_{+IBmU&x)9;vv9y%T1ql2Do6UMk<;Gy}-K>dV~(NU1}OYn7;LcWfG0e#GhCz}AQ
zjSs%KKl*uTB+ge`XS>sI^<Gy#YhmjiMcp^A?MWKyspfm*DH8V-xQYuy%``iffe1~j
zPZ^2X{<W2{<hcQ1Sgd~fjBuDDo(S;IXTMPHb%9#Nte?S0in@81?&^P}TFVnDgdK-c
zs)XrUpEywTb7bDr&ZZ6%(|+m>_^!(dzm~?@Tv*pHh*qK)y<`r>sKb`8DLw~eV^anY
z$FG$UAumYqrQRoD=LXMp5j!oC*Q9tY*6FtP3v0Ad<q*mzYbm9X5^2(nQsqW%;xp^{
zz6s_h7Fq%Nz+JE99`aJf;H)+7vuff`3vW1A#LQFb@i3!_!4VPxir|R_L*e|#e+#PT
zK3IP8dbOrAyafzQSs5JmkodL+KUh(6__JvfCt?BY;}WJ*kBz=ZFBHeE5lpRo77X2F
zi3z+JxLi2dju*|I44Z=Dq4nfo|6{K`R&GdRtQ0A7rI!9?0bK5qM60Bt2NRtTyOaDJ
z5rMUo3I3}ZEq?Lmn|kCa+zpPwVo7~eIE!ul^1GkSf=i-B(;v4Ebwg4_va5I2l&ZDX
zK6`U0&hk51<;9A=cGsM2fc%A!D^!B4_b1B-EoUodv}eEiJNBKV#7t3B^i~|d|7YW5
z%@4(-U?7@vC4aa9x{>8pM!CIoIELMuJ=6v?CA89x@BB9!LkyT!QwBt@Og*WUq#x={
zZ(7cT2XbZZE|nX|L{}%9HGgfp3}VuvnWl^3{~ExrNf$I{?ni3g@IrIh=-tcQ^2~`q
zy0vIU_t%h{xehWxCF0oJd!<6WduLUD8m|dy<g2YBDTNQ{l@K8`y)Ob;V&0xCG0Edu
z;w1jh&Ssq*+NPI{f0ikZQm!t`MhAa!5GRl%tVSLpjVpvJd<5-n6csf<|4}(b_~Z}$
zPh6tN9peHY?}0@ERbH!iw#08^lBO!J`riwlZ{;-N<k4y}%}_byDv)MKcx0?j_0dFu
zKLpI=c>LUM8Q{0#Yte=^+f8%uO4d$dZIhbs|FL(iQ+$1^odE+rnBmy@3X(#l&-mu~
z4MP~u(nhZBNlXv`?r&EcASW$PiJ6U;oTk9i-EF+60?n>*v~KbYX)L&fkEx?;5EiLP
z8>XPakZ4LbGfGCoakA#xnt@o~H(6`-P9Z!WVNM~$0-ah{<+mKd%Lc`75mrPuN4>)f
z&Jx+3L%@F2LzTrsvTL-Ypdp6WW7gXDjiJyk57sN!oLjbe^xL=>xx5gATcKmGa~gCF
ze!!X|MDyZ-z^*I$JTidSA0x+<rYl6v5S&-V@3n0)`tG6XN1ST?a^Eo$A<Hbpala5^
zAx%#;zAMEAN&D;&8EOP8KpbQo?MB?Ey$3M?G(Z2h4;;-dcm+ORl`<Kurjpuv&Hs3J
zNZg){CJ5G6LH*5LTzcMEFSKhXUL@&;c2qKreh<z=oT_&4^I<yu`@DskOS8SsaHi_+
zf5jIiRYF5F(ibKrvl8-a$uc+iGlL7)fag7d1$^|m68d%<+E@Fqq9+p;Kqm^xqH3?a
z&3-cgVbl57ZB2JD7M)ddI%0X=g0=LZQIZX-j6nS&&IMSJZodb~jVnzRq4&JKv>q{_
z3|1)iX+|5f7HFR>xoZ4Fv2f^}wYs$-WVm%+cj!M1O)nB)IR`D|x*Pu>e1J?lw_+eT
zUxBOKB|nP#p;JFQS-DNOY!9st#al~Axtbfm=OqNsEp<^`w;C-(!8Gt!u>FqTsrK|X
zQRJE6($7G8>NqQU()a*ub=>fV%*XRlmnhdgY^e_P=<UZ_V)|BZ53zuPINTjcHF`W*
z(ofYML6i7WKb`rpkVNu5msvM|cwWL{qtb^v#x?1plYT(&m2L)8Is&(shht?~upl4U
zv%%^LAT+r#Q8H)p*nDqs0=mDeM7IB(CJ+L|m_8yHzkbc?kRnOSYfe$>@6ujzRmQdT
zo<cI>5nqaU&i#yjAfo^o7t-`l`|p+Wx|$q6vPT|y@+||GxlI!z#{y>e7MI$aa-I+K
za}i$DJMJx>kF_Ul+H_Q*WYoek$7zHNyB`$5XpWJ^maq@1DKi)$+xln!CeZ~v#gsUd
z3N$-92M4`Qw|<rt>=HK#WsM!DCYr1}i?@;~0P`^XF3X#6;iuLc54%Q?!)JZU-f?vO
zb`k1-6ZK32&Q3#UIXJk0`iQO#^si0xh2LyluwE<=DN3K8KJ)@_Y3AKR<*R)HI>Y4=
zHD&4!)6;)=V%1L3^s=N(4KybR#X-62FMjR14pO&q(kM2Q{9>NY%AcVb6Pf*~Vf_NH
zye0=`sYmK3KDLe5w_}7Vv=ou(BM!s_tNdw7BwnbVUuTH$zKM^@MASHVusIG$+7JrZ
z%H^T0Pm58*6c;kCM?RQt;3iji4w1)7bZFNtq4{}p?!GLw1p93H=jOiNXRp?W0sZ3T
z_g~I6Pj*{foxxtUExFA*MN%<-!C=hAp!y~xz)(*BQ1kX^@$tDqEPPl-=3)rGzNIGz
zqjtC)CoDFC^}4;U<vE4Z?%_lw$5NlaUl9KNlh8j4=<eRjrM?V4r(pSC;FdZ+>vh#O
zU)Id~QBxo0ko6}c=<tl5-2V{?Xi`)S6em2{^s<}H6|8Cg)~q&~(BJM{Po&Q0_?45W
z`=#OKwx-`meBZLa`Ld{7`W_nvp&<8rL~|Dt)1N}rWprGDf9|pOG7)&Q=Y3yocK35f
zz1cQG?}Dr#_<Row#R6uOJyQNQ7oz$=Dev*uT??&HeKa@#7rMlr9hD{)ihzj?$4(S3
zoHGa-+m1yx^p-Mn)yrqeT&mjJzw?xxDV!T7@J&(?=eOkDp{ej8b7?hK<du!qAzArJ
z<O~l|9!HIx9L2_G1Dx_89z1v=r9W|%%o>zUdiRaQlzz{NI`>!};0EEv630s*05#Kn
zgOQ)<;LaJOm-Q13igmk7HyGuk;*6J#dpaEhqtbEV`TsM3Bmaj_q<$vm-{QdY!LI{{
zbix1A<|(Vg(%X#NVF+7Lc6<XRs<*c{QQBAHP7@L&KF))h6g|g$FL<{xCVmc3WA-8L
z-t!QAPwaTsd_-jA9i5(>iYlwPnBY!)0A!=yp<C4VXyKf}gvgiSBDjj4Fk>ura9G|u
zfW$XV@Q*P72?>ex0<J0R3ABk$|NAfAIpGGFL%6r=IB-t3T&z2(4d)gtDkP}IqD_tk
z5?O?JT|pek9Y^&}Py#Hw-f?-K>@mss(St07R*-zWzvJjnr?0QSG3hJ+Kv^PER^p1w
z?<7sD?1jJeWYyOl%&f`)wY?Hz`NNMXl&Iq3;zXg>q<)uEbI+RYgwU2gn>e(9ywGTn
zKTib$NI+-@KFE?y-G4wqL2;+g^&h+S-*N&dlb~JaA(M;5L-w<LV_8~ompu4$YuDB~
zSM+4b;3u6prrjgFXn&WWSiSgRs1x{{^9?Qk2oN7)+~f~F$LdZ~VYhDUj=nw(zAUe}
zxMR=nOmg^#u(0r*uFhDAS@P!0eYU0ShohEL8LJic+=c_$Ak6Jg*?t6Z{9~7AfO6Z~
z(H$q|Kg##T)xV=V{0o2vIl2QQBbb1Vt?eBsMiAuBOu73snhOaFtKE{sM^c|yya?xb
zF~xN%rdzdZ%AU%F?3R&N<=QCMM}w$_fP#X&{QNlc()|1uP_hF8T%PW<#j`(2e@!c-
z(FRhG!$20sWXVxU{cdZF-*MK*T~YW38UZ}&8@Z<BaXFjte@XgsZ%_n03+`pje}E%j
zLP+JGYQ@}s`Ae-lD8zd;39cK+YhSGyuYC8<g~z0d=c*w;AGfly^7r4tqEJ6Yt=0*c
zz$YcRQfgQY`R&bN)XoWSqURnh7$@&va_5FvRP#dn`iB*9)W{wg00#Q{EgNnjGt%EK
zE_@QWsrf-@5(1>kbncSG>wNHjl~0c9Nc|=Aa8{N$c=zfaZYh{j0s|u?u~z7+ta)(v
z!8x<LRskO=#yhAhNUV!Dx2UwG0I6A@G8((hydG{EQ@4Ha2*&xHj;8qcDkr}!t&U)Y
zSZ4|?00qq%LwpCQs;EGMv$U`#>ABkussQO28;65aXAQ~#*Bo|p3))W#-7&sFiXi}M
z)CGb%sM3LNHNw;tBB^dK<^gF@Z|3gE>DPaAbgc6@^-!7VuXl0M>x5n;ciU$FYu2Ha
z+uCy`#euVZTCM;<`3hE$vQFn$vbJ_71QR2Df#7RAekvp+G~Xq2BY3BN0Lt+!S=?Sw
zfkIhB#XBWN<xo(g5>-$z^Na`%IPIi~EH*ZFmA{-Kk*e@|_N?l)3!sYtFk0)9C83;C
zntUW6b%M~$6Eun6uOVKYG_Tr=66-;7rBfS$D4Toz9YYFast%5f1c8LG?2_bVroEj<
zgur>f%ElXchpV4zF9_3V?}ac``)~pf1-iuag-0RaQTRF?oMuw_Z9!T;TRi>HbpZS6
zSG&ZOiCVu^5rK_KR(Vxv(5o)KAIwV)t|d_4tYTlK%6Tx7vJ<vunqXdS&Sx>TLM%Uo
zfyE5>4&%g3$)QKT^8nZve?!!V?>02qUFrsE+<c%DB?JYk`j1Pp$#uvFKCFZ57oFb=
z842oYK|EO{lH<hWY3?ExfU_}Z10Dh2pM1$IDIo-9bAN&V@lkIzIIbttzmiWy^)IY)
zB#Ie7FB4GzecvTIFKb@nwx7ui+|u$8RXY+#^=-A!Gd4*fb^S>L3tC0Oal<74%yqw?
zDUI{Tb`hrQVX`zhP*JSS(Ss|e4Un3&zddQmA)E%In4X<`;oTOZJ8j<0ehlMRg3^Z1
zsLuH0fSm3F0#*3@mHVgftwiOV6$cHT(%&&u|MLaylv&K@Cxuhu-P}+99huZa0LNXM
zus7b<g%k{Ey%k7-F#;G(4>6RM2nwNeRIcKeM2#iL-dWAvyNevgZe(vl_I*e#VFNV(
zIGFVZ|Ju0$O#ZG^pRZoCQr^5Yo2=qIgM5&xdV{Hf0eY3k2%uh8+5oauCcq}Cc5E{E
zMXMeok$t>Bl8F%$ptsppof7#jjk>c=0nEE|og8JxoK}dsw)L1shpT5U_8MW`69B#3
zuYai1J&<HpkPfg_8HuCrLC)^E!MfoS>dzvYp5^0-!Te>VjxPwWmV@gYs(+UgK$4YP
zg6Ue^-n<PWCM7V;@kBl6H;9gniJJZquUHx0RP*O6HcUH;VOL!=yu#?;hKvjp=wUPt
zuC*>m_~EOM2l=yCPQ(l@FBmCT83k{>a|3?H2Vtd~V-;nK!UVpkv^17qs}7B^*JW0T
zqtvEo;X#QRci%Si<?8NF6<_FUBZNnX0bOAtu`jMbN7>$(La&<1m-AQ8@A1A0S9^@J
zv2Zu3>%gh0Tik;7M<;ftf(m1iQ^<!edOP)qs;D<nL8y(pa}S2aT;9PY)WJ2s%{B<d
z^7Y0d$%reUN88<lbQKo2U+*}CnO@aYm?yi+|5N$jNp!pbo=tywXKVHIrJ4t5iiAqI
zYmgi+OrCn1+7;$MPyz3)C<Dw^>i@IgcN-WsD_sAe4dO3Y&3K_Y?eyr6Cy)<eO?GPV
zsBws%QkP_SE(djo@pFp*`4`X}=;U7gW=L_v*Kre*ph&kE-49%=ny>A><ds<>$O98Z
zU$Mmb;}528!0pwTgXt;S#LIE+VE@ewL0n`SWq)`+Nd<EsE}Tujj0haxrg<+*?6%^Q
zfmBijw9mtJs%m<)wc~E*-Om~ak&y4pj!mtDEkkKJ{Zgpi)oepeHN~%1dbAC_{mH^x
z?rH$I_(T<Ap|u*5mi}@N*kl_jj3t&Q4JHJiU566{E#n~OgX*vxp<=2oviw#@V~8L?
zbcGk$<Y!uy76lRK^zuhn=U&NRsi0oQuU&>i__2d!{JWC)|HA@gTBQ=p*Fo==qC0V(
zEq0rYMjYi}=+>2A7_BEFff|hd-XzZA6jQ0B=<HeQD;PvyT$h+Ij|1O8L1QPZ#POox
z^F?er#^3AJt#BrG!meCn)pIp9thhh9In{8o0CZEV(6aj7Y9l@N6UZ_fC~1uSpRLan
zZ2&Jh!cQS1N0JnZ^5~0ONIGk5gNKY2g%dOELY2`0tf8vnOw9G-dF4~zpX8a}mQf0d
zWdy*I*{=|36z?oB<{<e&77y#|r@ZxDjo9a~1xFnx!0bI>clH$vX;}EQ!z$A>5!1y8
z2g2m|BozY@(PUEs0(Zc367s(f8NYGHDDhclC^EW!-CbqrUiy+m)g#^s$7omqjbi*q
zaWph}{o57TNhwt))i4pRG@)wCF2+$z90G-i^WQ@SRJ>n}>j3E6#-Cu!P?mzF4oUpR
zmREROY5-{kYdr-!WyB~m3O2dq#ZbEr&8yAv8`tEsrHd}nj9w~Lo5m_QvR=nY4;2(k
z-YSg}*JKWka_4mgj4|UsvGEnMC^26k#GLzFla+943d6?Zk}$+k9#C!1W=B8UhXAr?
z^6B&gN4@DSf=SSvot1Fm|F9U)^c<hwf3QdT@LADO&%7pWyEG6-nN?<61*4|kL9Q+X
zF>FrmujYNn`(v&0OPC_&#cjFApOtN#$%;0?)q{{`I_v<@=?6gty3}5%_WmFd0;}L`
z3tULK=VEPH^O%stc6!+Z(#pr&=V{Hps8X2&3!ckAtJ(HHB^4z-4q#tITbi-z%SvbF
z1QgK!IfKNd>>tJ0u7GQ3$tmQX@cZ2!2zd#6Aw9Z;^vSl=?d&T6^+j6hV}-p3YzDGz
zwk^`gVSXE4w---LdBj5C-<E<2hWjvblz6mExH!uHZw-}&ubuTdw#^H!ay&6fHK8aT
zL=eiXFBTxj(s){kT0(fKiVi+2TJMtfL)QL#1ee+-$ivB#yW6Md2S@IL)toEWxc<s@
z2<geqw%e-{UpvVW(#fvGH*7(%B?^$lCANqDS5yAJO#I^vtyx>1kTP!Q?`S%)4aPVF
zRy?V`0U3<U^*3i<gZ*3zP(N>BLA!nipS{%g!lN=81DMC`W!{T=3eCrfdUTqW&Zs|s
z1Owx;>iLbRx2#O*P%wcY#*?*L^jg#C>n{4Y?In52X#eDh@x(4ZZfkV3ETawLW%?4b
zzLkPGw5!`zV)%eoZVh#VZzy}l7tIHlG(dEzjR<Vdkrr7m0zK-9%lPhD+%A_@NJWC)
z>@H(T9AKaWC$|nmBhj^gvrM%I1_q|HS@yV8&)T0na5%~=i+Cjyn2E9O0*!4;_S098
zC9rB`;I}-CvDh8MBoYp#m(>_<m7jPDR4FGZR$uB3$7EYvzoO$i-;uc5XJSf{elw1(
zOau{if389y1ZR^;2ezSq1@QPjt)J!C$Xi*w=M`}I8alqyp7PdUsp<AzeEf-Iyy-4Y
zi~-pcdL51|-*3yscFyhymDLUuC02Z*lCC2BzDknPk(pXr^b`9kOKeuLdE1Y;r9mNr
zW<PgHU_QLMmta!AUPPdU`5Di%`7*r3|8>A3AKpJ<zaWsDY^+0nlKIl7b^DJ5m^UoV
zRtD~0Dl^5HP?4bUq)G+|f5GR|_4chl5z!JQ<*0cVe6?e1^1CEysH&9Se2|G#Qsx8@
zRfmbAeqc~<Rls;#n`HCw&V&0N+mLMD1^)|d#8&ddD4FI;x;c~D4iB2DKsCk;)6x=2
z>)RREdzUdP^nbnDpRr-y`@=%qO#AU-`d^@2zKg+dMc$_h31z{g{H@MXZ0COpju^n~
zWy`ybM+Zj7S2ESQa%us?6{)PeB(MQ0oX53a%DI;9v5?vcV}-iUY&@7jA7Z>56Q!`I
zOkk_i!4zx3y6|+@!(({U*zMM5!&?ffZPA*6qr@y-+x0Dx%w}7k$`LVMW0W(A6aKys
z?Cdmh>}ECjgO!C~zMp`dO`HO{JjwLQ6bYf3a8QTj)#}lMq5`(f3mka1B<Qip4sdY%
z347c$D1#k-606?-O?c8l1s$W4<F<sX3yYz^M$54&Te|J}84R{R+9i@%u0g5C()or{
zSBCz>B|PbiL0?K+Mml=&ts}8KT3Ue+S(LL~l^{Ek{$go|Ma#BSQeM>$++SJQ59Fj6
zT{+U|(ej?Qx9aiNW}Uh=bYDgBy;8pEqeEDT#o55<EAae+?6%{Nd+7dUW!j`e7ee{r
zNgF)LnzNT{_uaM&;`3ZRbpZjU@P?O_@QW|Ba&+{v)=N7&!i||Fk)T7v$3jLgdSvew
zM&?GTB2?6se05d2PgzY=D=`wcxsbvE#AWM11j}!#fn{PR0fhLc%&F1oIBA&Ec$~rX
z^<Io~&EJ%VR0K<yqz6XrW+VOIhBML;At2<kSpLMJhqz3GFr6z1Rp>*3oSm?}y7-as
z|GhRdX74dpXY%P+>w<%2caA5e-WYQ4{$*4PF1;#6zV&_5eacQjZ?}!svdC*IVJ&v%
z<zIcz?y6j(`J?%FP=ss{y(~!r26e27Mx>SCv-0<Y<Xa9TzdD*tV&+Q!k&DWlILN0(
z-tQ|)dLT*O@$_7jZn83L$E4_xu6o<C{pan>crC-$HEy1qo14gNlk+YPn8!$T9&<-}
zhy@W3*&_f~^6H1~7T*rcH?+y`39@BhjxdXCOJD<D%JdeWFy38<Yk1krXuew@a_p>Z
zn4*ZYF}R+rdm=Sj;bGJ}7WHbjgdQs5ILCw&!fh3x?WIfZn<;7Z%w|p<y{^33cX0Qe
zlD9N({Yv6O#$qw5%a0}_sb1epwoF^b?#;Ljl}D36Y}n%=|9ptebX3+j&CZkEjZVLu
z?O$+&68uE15SOL;f3>atN-Xbo_o-i4eF@VQY$%JCsN}u7T<o}h)xV9#ke#+vEY;4S
z;T~+}{<ZL{eaw+!SAz|68?-wX=}}XxB0P`T`nRmm9xlBC>xR!l1YQe`7!wtj)Qs;Q
zonD!mkTzTun@TfybE&LTGlbBiCufyDZ`qUNc!oj$zEB-JKbQSkj6;wYM=uLjlvL}|
zfgYW@CEAYGb!Ksf2sI!gqrqCIK=v)v$BPh0+{jiy50YGQrLs_{UNQGWzecp23;DAK
zCg1gX8y^udmz*|fH#ysHaY60>tog~TOr-B?8x0=+{s_fVzpQi8TfgO(ec+n}k7cTw
z-Bk@-0IUEz^+6n4+J;v5Y+A+KRE)&U_pI_bgO;r8^d0c<kn^O&&d9;#A1>kT&u_d|
zp||_Byb~2`)HgoYMJcZ0?Tq%)!eMuZmCRLFXdWLgTC%!->-3_-QFByCj5Jo%AUJnW
zad|6^e%I}YWk*wr@_M(Slh`SDzp}N|cETf%hT9;tAb_f$me#MoJIc;h>$b<Fc3s}%
zS=1f`4q10XobRk~Wc21Pq7MiGnEIAIB|W;ixnAaKVtwFEY$@x3BhLsdmJm!b8u!;;
zMlY5*3&aQL_X}0G3=(t8Brj20-R6pVT7xCgq5{c}Dc{st8L&CF=S<|~)1_M;>q*kT
zHwx=&BrCrMnJQvU$97(6+nY~P5uHa`|7zrMk>piC`zbiqp$h(1Jv}It%cnJ9gi~EK
zcdg)5*T1&gYqCyR-(#>IEk>ZT&HcVbO#ZD62quHgM-JQCf2*Z*TsFEXd#Q3>E~p}!
z_c}g-CuA6d3(T6?uJ7_)Q1}}O77@VWo=@kgoDg;VBeRyo&{b`)TS)kB@2Y%Zygk{A
zW!E|r%0VZv3UqG1sNOR&F+z3(Ch~t?_k2wQtttnUAk~F%J%~LA5cLzx__XnyA8+Xg
z4iUwBG)M4W(v>@p4?5vY<SP#0d`J+hdmoP^o}h+)rk9OmA<k`Sd5HHjzVCi-@oZQg
z9osVQDVH0ZIYma}XFof|6dF9H<%h8Dn#Q2Y;O6FL8Q6#YuoH-=i{~x)aOQpe(rYGV
zp=NQp1$Yig46_-m6Fd?j%ahaS_B_sS$?KZhBtV|tnndaD`mmSSn#vMV-Jj&(kK;lf
zNQe*p?JpVFDeHyYQ`LwT&J$@J&$W#w-r3JL0(WT5@|oje7KR3}ET5UfCWZ<hlckB2
z&F;2HmPWBqbi^5m20Os8&v5lSBE?bmA(=GJpMSjk1-VCS+7*WJ%Re3$z7Zgf5|8i-
zjPT@p)1XL~{5`Zv5rUYVu!;lyQjQ>FvJCMDt|3C4zBDa@%mI?AL6UyQ2H(R|p^BEB
zr`pK4=2xELVdLS_!&Sd_D(J;ux&aur0=6KelOq%1<D-zL6sy3`b_msw@0QKf@0SQ)
zZFmevY9#nA|JpH@P(WjYXw4Od$y_#`k#=k08w4p<I_*aWS!+7##scYQO2Y6UHn2oO
zuhbyJJ_D&hKnFRUzG9`1GGCg$x;^k9um48ryIqk-n7+N;e%2V47TyK1JHU3D>^B5G
zdhIpO@TJGNf;Jwc8Uo-w($pjWN+%As;QcKB*(fnXgb}!g2a^-NLKO_T3b?mk#0+S+
z@0hA^WCDf_A5>8D6$#e-eD|W$Y~f5f=KCK@Yr6WJFm0*rC3%WRcR{pUi<bfsnoqQ_
zFwqrYjbSB3w*T9|FTlV$MUVD8OQgeNvOlrh<u`8wBd#YM1hT3)N`D~aN+_8=rxGvC
zMciB_k_fGKb6*LM1lb}NjL2!tW4_Zw^D}I{*SA@?7HI`)yJOCB<Qv{jR_wKj7}QVL
zV1cwZWvjXkm05B=jgt0AuuxfVjAmJ^NO0{sQ*h3Gvsw8s8nkQ)j$(8DI*08bM>U9l
z0@+>t2^r0@!uwqelP9FDRn&P9qR#Bdn=1HXV>$D3JlTB8JDZ|OmKe<4%&%+AefSLs
z(^EvL+AT!3J8qjAgl+S5uP(MRgTss}EQP<aF~YQS2X+*d!v8{~CN~udwNhLx`QJq?
zXdX-sFldBk266xH5D_RZnc<k7jjF%77Hm33n;g>Na-q{R))jWIsl0irx&#kFAG`#1
zmKHJ-u%CtWDt!<~b#}k1U*gNTD;|+u)qc&Q=D=kmTiR@Y$6ZJ-Yr#xiNtJ<+`tjt(
zm^H*wbX$U}kt&P?`Z-w{6t|w6g#kSbx>1~gZ>QdIl@phF5q5CtePiV{PdPXmJ)h2o
z7>W41J)h%MREe7kAA(Yu5-Rz~<U!K#9;RLwP*&m@-SdoqmK6^(ct{ZMiXuc3qgQ~N
zImt0=Y%+JCqd5*qoI||-`8vvLqb)FR$Bcf`BJz#**e|XgT2ST1qr{vUkw#`+s$R$>
z-@%yseIHC(K=f2%mJO#HkQIz_Kl{Bd1;!gwSKqyq6JC(nzdv%%Hg0mJ!)N>uW+@{W
zL~Cywc7D=fD~KUm&`EJQuyJL%L%3~g(^hIR!-p$dxRpJy6O@U_LwJO82f9m>;XS)*
zu^~)voEjbD)K;)Z_AK5Xs|1m!;mRN|%v?}D*H#aveOKAX0PYpxRJsUK22gWGF8~+%
z8r@Nk9GrmSAjk61TYAtqU<zpQ$A%FEnHhiCIxv3FS&cp<#KBGL^pF7)dAqv&C;hIn
z_z^o&vw~u%CTeh%at0-KbPm}nF(YLa1&6H`0FMc~Ki%#_3STKGy-`CFye<Up?`*ZZ
zF+NQ%^tq-Z@bGy2nA=HF2AU~N(tuH8CF7F+IqA*WO0mwkb<xGA63>Ok>#iw<sVF!Q
zxk&uR3a4>UfNzkC-r^~GpC8WVs|~CoJk++$EYkKT(&gF*N<Kwm1j(lvN*z<)RSBf(
z{HP54O=D<0Z(wh#8LEnFdJKy2xZOSw#6>yek86u%kQG0DCUwT&aQpRzd0Lp5Qmaw6
z)pW^~I6kaQci@YO9yGE-Nt|P_0Oby1S%%<Mlesel#YVRqB*0j$mu$9sA5q#RF|eO(
zSUt5TwJgsmocCM5g>wA4Cxfi)jYBkzJldedD?{lC6POcIfH`r5*POaCg_BU+YFHa&
zd%tkvhs?;a=Suyhb8i-%_S|-deoO<a_{YEex6ju)!(^DN6$DRr$Yw+ZVLVBB<VwrB
zbf;E-hm*5*(`tTGMhd=zB-1N#mw%T;Sz%al?~wJj9Mct`b}*4~N(t75Z_bX>nnV6D
zy`wWZ6`jNrydL$Mf++Jt1=Fa&bClHsx_^ax-4^o#0$S*`M6JVV+5Q{*o8x}f`$5b6
zcON?=a9P4Kiy3vMP$(jxthn}PeX?DWgw-T$)uYkW^Vi3p)i0+kw>P#6IZ|PRCvT3h
z_S5PV@*G9hptSvGe*{<ABj-F_q59w)B=ET@p8YPMP^)lVe5eO8!If1i-*u+ine_GS
z7gP%~#+~wda(@H?M;7`DR0RlH8Dd=}|Ix&gslw~uS|1P5bsC|&C!QDt=8=i+DOtY=
zkq=ye4tJH;7}l(S(%PRDoXV>Kg}6lm5f}^lDM?t|Nm}>y`c3^MHKoiUA%Gm8S$nc3
zSA^rY-8+-#c$%pd4ohzGud0Yv)C%{K1_Kn4=(3wy5LK2UmVPg7q#F6$9Tb`fnD0B`
zatfwn*jmr*Gvd9MScvLijFZq*DlbZj^4Pi1BUoIyAEEw;7d{)#Y&9XZ40KcU=UnfA
zWwizE*0pudQYE4%7>DXhHY~}+^6?<@ik|(iJ+l3T#!>Mp9d$hm@xm&Y7(HDw^(K7d
zlN&NYp{U3jM|V)A(7Vv3V&XgGO^naFDVZ!PT9IVN==zI6Y;3cwSF177J&j(Fmp@gD
z8-g&vjV$@<xA~0_%fI0z_Sm~O4ilfq7-S10o;?cfeG!LYTLi62I!RU~L_Tkn%g0gV
ziPgDx8xy<^gwV?UnGz%|WOx!c^)8yx;sJ(iyPU9rzDY|7e^9Kt9C$>9S&l9*1Imo!
z3^wtzWR6d<oW(%!9gDsYt;+N7k!elZp@MV%Buq5X9F&8HCm+HxT6^Ed{3b-EupWLW
zu~90Aos~?6tcm!C@XK6Pz4q2oGzms^kT_%t;8U+oNAP`F2c89^V%+fz3(0|{5&SLU
zG8TK*2Cag>8uv;?i9b&3{Dq7#B^taAxBcZzaqCyaRcalKipo4FUU_BNhwBSr6X`e9
zLCyJlbJ62Dttl-n-O$5>+^E2?fc>v8kfpySvFHq7LFU~M>-x-MD9Ws=p%#TL4=4bh
zF!q99>^Jw9>SXNE-JWMzqw;_zlkH5q@x4!suj3fdHo;^povs!-%bfwrt|jwH_My(b
zOmi(aOL1o)Zc#@u{4&e2i>tvHeC+pN145+gh%v5gS_a-kLB!vUGD>A-ID1JK(@1v2
zOYV&OjNujpi#Ov{OM$*oIV;)3Wf>$DqsO0&l$cpBPUBP0Nc+B0vgbLP7DKVtqW40H
z{N4CU#Jp8U>`CQSA7p^JO#CcNAi{Vhj1{I$E_sJ?B4NN)_fy3hbh&WzI~X#!TFdg8
zK0JPzkXHwWl2w+Lg8@f{a)B7sX@F2_+@V$V3$2=tn1;Yn6QU)swiRUYlN6PCgjikO
znCvd7PF)6`9oGz-J*w7i`9>Ah)lfd_wON=ORJ8!M3yPh%#^6XW?eSMVhsvqd{C+=~
zf^mMHT<_c;Y2w`CIR3TDng<%!wfFUGX7vHlLkC%!`vhD>GW_Iz)Jpa3UB}!Bsay3-
zZz{m0VWYc?*pe)gmg!|#5A-0BmNKE4>9RF&5%*sYUkpcmQYC$DceGS4>KU3wcXUh$
zDjkIggP6Bt*azsqg%Oi^HJPn;TY}?WOAu4jbC)Wx&8TzP6Mkt`NuYxW--;GlGw6X7
zA*%Lp*ImS%I({qc4=iph%m&P&=%H2e<(yDK#`@*-3+NCLcKM?iKb*xlyT6VeH*W2*
zrR6y7wsYU2CvJsOg{$sgO|V~?DRR5jEqovua?Jr__y<^pDA$kAB~b?<D3FNBNT!&e
z!x|y(p6>yGz}Kv%-;rvS6kxIu>);w{{Hw95*xYA!VDVF`76@T`qsb7#&E(g0Z0zjo
zr=QT8=0P~GWu6z9^V&$?^H)_=q!?vUTj^x<-^i5*J{WRBp66i3QC>D#*6m2ffc9Gn
zuG%7N_V-O|@Ed&qhTOZl-^~N#u66CxMBidOjYcL97$`NfDLs5RSxF&~4ac-UWOKI?
zN1=S#%gV)T`WC+`Xpa`k`U%b|hk)njbFdMHj-cSa>n-Nlu1<=u$-o9~9_8b-D+7!a
z_ZcwtG+aCsVSJy&QAs(Lt7fH8V9f80!}&>7-;PX;2W<ChTqUPJJhK=?Rb_p%@>m(t
zJ-BIFs(HT2O#ar?=(4z0Y-g?Tdw;}kjut^DJ4gi2y}kNX1r<uSjUTYJJDb8o8+nJA
z`xA>w9auEn{3cPSBki`X_;08;I+*DQMt8Y!5^d`cR5hb*l@zw~ErpVYkckA;<qut?
zcGHA1`()({E+`3Qr9`L)>$>80PVU4`@YIHYzLD+?3Aj!QXfq_Jte#g`2qIqTo;DTR
zhbA3tcCB`ze_%{pmlb?_1NjA401L6}065vdonJpmz&IZaP<`@QE2bidX^SxZ(>ccT
zfJv~JdnuXeh+C2*F0|cgOrat~8&pb78okm-qt&sjIsSn7-ec&bn6f&X!S`~}^u?1t
zg9mh;Oc+RAOqLo~;TNY+?W-I2!F9vRnrCYj&xDjI!ONl##oruYXTLG&S<!11N5#1-
zxxxdLVcIFVc7hu0sl@vrQV_WrJ6m((CSJsS!>hqCME6t000QvIcnd@j)P5_E&;Wa&
z`U8+2m(^PrUSQs0E-}y<g9Uea<||giOOw3Vc@R1#@a8!Mp|HE6nMwt5+3fMDwl169
zCO+nOQyDcNM*!w@G`keX(YnwMPGfL5pY|iZpC;A~-WWeH)<Gb7Vpui7hraK_Z_2&*
zoWblS(5InS;#Qy{Y66aA1zQuD2_gkKBObmEx$A&(pGty~%}kg+(XX!AY4%4{Gc`Gc
z&1~@<ziI2TI6$Q{6`Joa??EE_O&Sb*@$C&Mat3zV=bEJd9M{$K`(Ix0<jE2AGWh9{
zzjaD4H+%N^B8E7ga~*gon3+(vW>a`<?s-3e$8VWoTYNHP#s3z#&6f;H@IS@-EOQyK
zbLc`6_O5Hlth_&nE*XT*?no*^URIVS<pj5t%*ahgBP6Z`Bc2yUC>A`H%Hbq4pct@{
z>vS}9?3%T<Usz)hY|T`5^W;sp%@J?)Xcd2nGhxM41O^o=7oRAsQnlxqLnJz^V1f+k
zgUnBxJ#UQeZRqkedJKM)%h}mH%5raiH)vgaC^5zJ6t|r<d{sVz5UD+!ee`;?>b?Q%
zy3N($kQyMr2H=R7P;@W?vgOHeq`ayJ82ERD?LPhLL?#mQhRhl0=>P0xdOpgX2H@1b
z_fT{NPQsZkW5S^Vh!amT#~y=ApS=S6#KQ!Aqkp?MUPNuAg9MSeCy~+m4#qF|V>X19
zbOxrh;cusjQz>GE2RYg5YQP4dz4Yi0Q8@8seRYz`n@@B1>pl^NzW25+Z0qkxkv)K+
z+A@NaoD<+5d=}I&H2!5a;+fX?j1A;na;M5zubc?VJ>P`#jTuQXc+*B%rh>N*iroze
zgc@SI%Sgk<dubee`X_e5b|#!nR~uX_E8zlNo&A<3<szfT8S=3w?GQWC)r#m1i-=kF
zml@WIwwOBtb0blg^ADGSs!k`s|H#(n<>px${z>&E*iNY$uTK0N8N43SF%?8pmTEef
zn~k<on=er!JLvtXvM^0{2?;Zo8cs#x7*Xlk3Lt#Diknd1pHL9NK$x=m&&Q4rH@_Wm
zoT?aS;DbQW_=5LQ;^_v%k5I7>?LHaU8Y-2r;(0C!j*BYMBDbKLcJ%f=f_7k_3Z7X*
zhqAvQ$6dtH2jl^YSreIGScoP3bWA%X|0Lc;Yp%dg>T)JC=Dc9KY1+&D{E&TQy;jG8
z2h+tDs;^_5A?%MdVHfRipksrnb0AUQG6)P=QV0Z#a?f~<3+2Fiv8??IHub!@>{l}k
z1<Sr`r$4pLY%OAu*)y+pBsyEUx=Lb#!SeK-#|M-2Hv9etD%VC{=jD#@FgWrz<QUuP
z^Z_P{tl;eeS)fx@(jz;4%tH_pdT{1>wlsYJw={XS-f1%T2p^9eZ1co*4l6^9);%5*
zPOfkebx!zwM>vUpx1CVpAizY<shV;mImsY!WUoVm`k6nvgB>P*L;JSC(!KT{KULJW
zbOMvua1t1x(MoX~E`8etn_wejbl)P4N7)VNh~4aCb&*aV)Hm5LtW5MCFPs=2z-F}?
zHfb)%@N~9=7`=oax)ouibDcSo9UVyELrQLzB*9*mZPD#<*p72Uwdlx%XKz*8`EPvp
z&sk{2Y$!Cjv)8}~7aftgzSdEY`;DZ2b<ks7UXpl(4OZ4$nwi1w6TSQ)<5NwU?(=Y?
zZ?HnV@k16-^M)0n=?DBQpF!X&xp&IC39Yk2REExyca8I|s;Rgu0;tvAShK)c>G-Q3
zw`HV{w?%I!B#AOlw)!fo4URQb88TaMoBW#5oeS)%5MRQQE}&q5l>`=xt=M4N#Cn}K
z*2)VNg}Ud(7l<M0JJc8GiWhV{BF340jP<H$%eOqS(R!TtK@NE?gz4hmw^2I;04jI(
zFP$I1O^x7IbpIWFrfth(u@vkjRGr*`5McfG0XeVtFLsW)GAnD#No~vNMavz{S+=5-
z#bDY5wDhw2TzuuN3uTnN2BWoon3WpFmdlr-eeXrUjB1cnt9DnS6Awb?%BAFiBl8X_
zN05}G7u03<e^`Kpu#DH$(G~<&rnjo24d-Um{;Jti5c0OR%$oaP9ZLT0xJe+2#upXH
z+IndSFEI4jeKZ*&epb}kQ*Zt${u-m;<<|t!AI})wEM5|$_@{*=uO_%cf79VS7Bwvt
z;g-1d+IzV1NJlI}+yYP+y)<wHVj64d?tbd&CfQ&Eh^9<^s0nW6kIePIBp$Vn<zx60
zk_A3qLSWhu{e6@Q-IOJWnVzXiQ72*5mm95*BSver@P29;pv6qzzoL5;g$eK^kviYk
zqG#@J6?Uol`)t7YV67UwUAh`X8=Jl#qA)x1!bEzy0$K1Jr@Lb61F)D%$!ZYCNIqFa
z?KrOz>_IaWR;XE?8)^*pS~*|OmSSt@XM^mSd@LMhNm)R^59xhgZbngnI0jGm9by^J
zvT_B|YUOI3hB%q?i|9P)0M+-~^iRNfQbuiQoEN>U<e>5E1kVkRXQ8MmU#?Tehg7a%
zVhj=aN<_#V;Qr)0BXXXOkUccW!D2vz*#Dq$2T2T>=I2k(E5$kV7E+(wpL8gchCpgw
zkLygGIr!QBTwdHIrVW%i?9_a1yrn#lPln2jocwa<4?^y2ul;=#hWt%JIo`Rp>+&NV
z%#A<cv>N0Ze!@1SoP-_pIY03r7LVQ_l*d0VUWH#<GlnLnFiqw(Qa==lr8M#XNZmTR
z=#%R8mS{8h<w)mrwb8#o7DMA+J7J_~5-;O6S1GF!vz&r+AlFK>@d8*E=W)3NC&CQ3
z6V(y>T_-4VttBI|Z9t*$F`M9b1YVO^@tB9nN$=_skCV7SS*z*5a~>~2methVN4EUw
z#iskDks~3`;hMeSpUvg|YhVjRk`zAc7)aCs7%78d5LJH7tGbv3)}`(JA@K_<?wk=;
zct^$*KGO%aWOqD4OFJ3y>{t5wAdjoKNp|EnWhV0p_WP$3hRr(7^=s5%bN%5rm@{|@
zA0H07w1*~yrj5&at3gNbOH?jSJ7InIn;7mPn6?LCT^-<z7;AO-q6F6lksBN?MI9m1
z9<QRpkMZ5a8Jw$yy}b^E{#@x45<9)lz%PAw<TkvO#t3%gm-17pSW_4Cz{`<;t*O5#
zfy09o1EJt37-V&XXgWTCwmF%0S4e%_DJp?dK5NS`V!KdruE##%i+}c(I#&kxfv`U*
z&>_3-_**)QGQHkBft4Sra`V;sKU$ssIl0Uh;jlmw7#pSn`2G6`KmRlSvU8NKocS@%
z1S0-JisbC1@F;^U*mTCUC^U@X1ilfn?|x>%T?6ojxgX!V-YF3Sk?YPZd|SrWZ+Mhu
z1S6SXe4%ZJ6UXli%5^GLS&C5~CW1E-S{TwKNjexgQX6)cKiUTp%UgjNdxzV3I}5QK
z4o&A<2HQ<k&TE?kY;}7fC<rj4{gY$+)LoF+B{W1YNhyT^3<+*1;^_9%XDOlD6S)SC
z=;c1OOYUv0P>T*hIUWzp5hrU4W*^H?XBXuPdf9JAypI*ZXJ@5G+-JOLSbm=3Y8AEe
zwxkRg$)@iVv0$DqcwW()Vjh}_>@4f9Iejz;g@HMh7}4Jyw;-k4e`Hp<7-5R8f7{L6
zBtGW#Eh3Y*ga=8&Sw6An<M)~iyCS4(mgvEX>8Bs|AmE5+0!IQ|5VW-pJ(|#;*#C0R
zZ~C<7?fKO*Waw={j#@B0lllnOtB)pBYN=hiT%ddP4JBWVL1oh8DEuJ30jFM4A<AIE
zYOv83jJ9t=lobt2XG$Lj3z#>j(0vJ8BJckZjw9>*RMq~4^ZB{Lo_AS)j*ayxuKv&&
z+;Q&Na>YNr3NM8@|G6Qk-J=n^XPqFQ$HE*l<`2n{5m@t8QW;(cplGI&lnAhk#`I(@
zTHU5`@LB6IH}}1@#?EfzB9NyrCAotyUndW`8d=*&buegWyo+nC{(M%(ham3HY$b4}
zPF>L`>ggxke-P%a1NIz(O#@<&K#>0DZ;$XqQv^wcUzBq=-y<eHwhoptX@t;64~-a+
zXEWR|OF*qu#Sr4&R$6UF?Mpys(o^^Ydz-y|>nsEpw~NW_{E0kveFJ~}j%d|o+xYvL
zAX4AA&Gl9hE0>u9T2h!iII@uqje^(RW$#*ybSu3)iA(E^G`_y+t}{@h>&1EDj1AAu
z1>61|VgKG`q6YCXQwZ5tqqvAanluW%C4eLGVFEfYqt?m%?C5o0Wsl}6@0_eYtdKc)
ztl!IZW#w-7F`q>F=H;LwNRFepEvKscX4HhdcYoAizBS?bcqMv8Hk>)=3BDQia4C6p
zDnnyUdqE5nbG34LvQANC_s`BTWN&pJTH{r>LrNmY7n_9uYH0My^?iJu5o3MPE%G`B
zGs@IK=~riFpq*$1z~^~#SdqL=ZgJTNyza=E&qwC&A1J<C^*{6%u-W`G{^{2U7y`9}
zNYxaf8WyQ00+1hAj?M4u^{e7)7NHR(bH#J`kI))NjJdkFyzuZ?jNyLQ-a)lTmsB(e
zag)t6oFCU@3J?ZKXr)v&DF|M-to$R>J64>3oriMINry`v-VEW?Z{9_f&|^-)@MOOg
zr3nv&o^Uc)f?wZM9<)+ZO0j0>E$88OQ*r<Hjj*H2@I}T{fQSEftT;UASn>R!Xr!RQ
z@#gwsXGS}re!enfonrNSK@unCGzDy6LA%UtR~6(c1+!#`wK{aSl!O@k83(0h&j^E<
zJkRxOOEWA8A)-?_Xg?ACjPmu(tMOsYyw(>Fu|w0|niA%Bs7Y-x80nC^)|z8EydDY(
z(;1;TTD`!Bk18slt%l!VZTvPPEBu{hw)8!ij~o~7Wo^E2EdqCDUgr7Su4>F26q||%
zx?AgQS1RvEY!UPkq{Eemg(2lui$)@mA_6ntQ0yWERc~^xc2k!=mty?L!>0)NWp6o|
z{rSO52*?tP`fN?&NF-U<?|avZT?<GLF_-T%tB8FA9?6A{k{JD_G7obEw)E#K>`$zw
zLk{YQxhu&iP>Xe;1|1e~Mxy50ZMKh;HT&zr!-DRrY^8@mo2r#Z=-Ji#0}|h{Vf$m=
zS=o;_HW684rkYB1=I&4bLPLn<e_(~6eo_lwaguUEb@m5eY3Ph+U<WS5IiYyUuH?4B
z@0RPK+Uiaj7O}j2PdmW#;lUB;fpDfC1C+Jjsw{`}SR!IY9A&vam*Le$7i<LEhdy%}
z?SWGp=v&pBe^J1VJZ6dcV-C=YSQa5DT>)!~O{0Uf7r7bbGyK6X6I#F=ejco=FE}A;
z);fw8#yh-621~MHF%Ugm9$Zf&Q*Fr1wtYAzD<|RL^kOogd2$|w0D9!}%L{{Vo4<kp
zUE;D@yqYPfHja+Lm-~zn!XkSVbjY_Dauqm)Ir=y?U^~@LKi*dHtgJUzp3H~;nMRS<
z^5#a(QRYDnd*=v=&NR45stEFmQ+VR!8YA!6ZYeV5G9}{fMk6nZsg2Dd@)Z`h9ws8{
z+7PP^5g=yK3$0T6HF`tg?UKpb2f}7UgZ0lP;xT{V!9!WK-VUJ{6iRX671v1m9D)r7
zcLPDtZprTsGY`UaLnmOb!3Nq%FcApyMdyFO<{t1~c8PjeUZH-~RIL(<x7(%&3|<pE
zEx$B<VPotvmT22>=<GYvzbC_Y)}MfxZt;m;ZU&MfT_Ji0X<;6DrR_F%@tIPwSV9xa
z-jBkbH}1#>Z56$jKHC9y$>BNUQ6Ifb7xs#N?R=c(C0Q~OwjNJri6J}lb9U+3c|lr!
zPz$!g_;_V~e^R+hcJ+P}tfPJPp`>%OIPOhe#Q?jRD_A{sxfl6JQ;gi3u_}5vL-pu;
zvsfdgp{E?!@lxfJhY36x6w>JSCnLE8#y(P5{r#I|*JWv6>e%u)Lo{8;xQ(g(^R)|3
zZ9O4U?juXAO|7ZNuLRlmPa)x{{RoFHFmis6G-A5k-vrrysqs!0$)e2RO-9z!nQ+Zn
zgxk~_KuhOTm{jCdM|!Fu>dBV`07hef`PPDyUoQRL((EiEIc)!6)pEei?~#m|zAnWb
zVYQW1mvZ=a!`4{=2mTiX(|)!x1fYuYZJHSzDmp2mUWLYmEWhoJz`R-3g}ftV@PGxc
zNyHJ5j8RP8>PsvS_<_K|HtD&KE#Cq8(@!^L-!&W|k`}S0#l+}0TQ7Da?XFBRq?I=-
z9D;)3WHyfsoOX!Wvr)D8P7jWH-A4aA?=<EBfPz1Gyj|bJ2_2h%$miot?C1$-IyD3g
zc>iqOh&{rpe}<`u1Fyxo`#7o_EaFr$6Dy}s#qG|BzbA^r8zmTsSw;LA!r#Rk5dit+
zjqlSrtG}=TqXRC+z;6!=`dKwsRZAQ_Bn)L5ZKO(Md@=n_zW)~Um-m-yT3)UhKL9&H
z@u`tJFYwHMV2H^hJ_>-OVMrndAv1BK_|IrBp6hPB?{9iC5a+EUa(7sY^U*Kza7RfP
zv8o+-)&(829bQ;8H@-bv{P3_3eRoA4sBe0AALG~A?*blJ%(B_q^6FQj5>jU0q6f^x
zg<fl8X)+pwKgl6{pNWW<)YTzxeC?Q7;ZYUjTu&b#OZFCP@*Au?Ucz|VZ47tfFw0fI
z(@oe^6DBR=&1_&Qhy~IIyBedtmu%SvC2AflA}M`Ro-&~YuUQth5}^VxdbAfwX$Dx<
zjc2}x8R#U=0Pb8|FtiB(kjbJn_iV)+<^2J(LL-i#SkwEZk+P<IRChPusi#WOoSd8_
zK6ruGC%Gr}@#Dl=)OY;)D;bjVnQ-&#yy|<L+!507_UPS^7LanR<!}G;+*>VxOhqCR
z3A$SZw_7ro@2&GvzES;-fZxGkRZ!LUwK4CMjLtuG4>zeW(`R}u_NKn98&w!lM+(-x
zr(Prv{x%4qpD9&0(xj~GeD{Zu9hix89Rlr4g2Qig+lgvY3{{Y0xtnM!_3u-{_ohIw
zyiBxQi7^3Q$E%)7k4L6&Xqffm2dPR7WADCsa2O?YOt9*P_*>kM))L$}wk`N;Ch)`V
z=h_acVF)^F2alZbnjfd4KyGDP8hujNUGeoQAPh$U@5h2ZyVsKBE34nz+Lf<<#E8;_
z)wAJAE`7uwgb4Y~t))&7wFTe2fAqS_9&{{Zuus;ew-4@Zo3UNnls|SZ^8^%b(}Xi4
z?kZM$zc%Xccx3-Sn0O{7Z@^16aj*kHHk-zYLzfa*OpVr=4ztSO=?tq$GZSh#Jb50X
z2oZ-g7yaU$`CxH-YC8S)wJM6PLqlrBv&xh6pyn>bx;kz>_)odOUI8R+-mR{BNimLp
zXD+s284C+7>9@#a!V9$yhPXX3o4jp?K~a{Zs5PzsSJ|1zL)rIx{Gu!+B1B}nx!cGt
z`;rjJQX(Rh?vN~F9g}5*QG{&SNeih^BC?h>O;MDwG`3NaEMprQ#`#{(Ij`qF=XqYw
z>l}Y_`CY&3w|;-i=kpfpBJpC>_}115R@5f57iQ`?OfDP3dcN#u9YptV7HPIYVJCuu
z>Z+$_>jRUNxxryZ@BBLW=nA1=Z?DGnK$KRQq2$ViQXo;vB2fiIRga&PqWH_(3FL&h
zmA!iE6@SJ?+ftr>&$o5yXa2LF+0y^gp;)cc-St?PBfEWNG~Dlc5_u(gJ&32qF=ND{
z9Y-7I^&#}<M9&N5$i70`$>vWTa96N)`2{r^1AGZ5F*I-kn20*GM$MW$jY))quNJ1i
zY?c2KZ@~0!J20<?S)Y{jnXEW>@T{e4+cQhb<=X3a9oi->AIyciPb3y(O1`&l9;*!a
z`BrDNotLdOKih;|LOVXElgZ(kGZTD@Go4XS8;S!o+(^w-=@tZ)8)AV6HCH>l&J{!s
z5I|l}EsfoalWhX1wE`ev9}O<x>gRBhX3^PM_YyBmaoI5YbZ)c8uBS+jPW%ZgBmZN+
zQ!|d8bJ7e|gNtTt_}kZh1ZGAbE$37?<rD0{b&&6%&Ji1z-PIy$p=X@x;MlvO^VNO&
z#iicn(Fv<7*Gy-nW3Q|4UrckP|8#Grz8(;%KA+LgfedTIt-8Aexsyb$2?2!FhR@E1
zi|q_&>^)=7+{PEqx2ADdq~oXqp00O;FkShc^tO4WPanr#@GXud3RU$)SXbo{_@7i!
zh(#@4EwJgYot6`b+#g+7edgY?4~U*lxKPI0s%>MKHs12ZH(+cE0ex~`Y2Ez0jHx;R
zLs~jIiZbNVMlU}3<94~e>Ta_=f?<vqGlHdSJZ&VttWthQiwRWky>nQ;W>;)H;S+}U
zSm`}y6|bs3iej`D(@4}$N5MbG8^FOJwf&Q;_v&>8EWOq>TKZT3E*<ML`!Gep&p<5O
z;_}Y0xVg(R1e>&$fH2?Oa{a23Z0S5z$UpCu6PV$2^QMY|;ys?nVRFz*<j4OjK_*HV
z%M1JMjv7v<?6+te??|D7KDV#b4;YJXSFi5as0x}LN}X<;f!n_&AE#6EgyZy>uOk(^
zEUaP{+t35cSYnahDL$({eq(d~CpVrKRQ9=@+hHa|d+20W;IB32d$C$+Nd7ql1_w<;
zj$n>lSjtWAyYV6;WqnHwx4v&$4i{5<<nxl7h3=yy;C043%ZHtPc=s;C!ZKOB`*WNg
zF1;G_=pbL9?EG8RvR%ix?q(p~Ia>=PJM$~AT)(<QI!GY@P35j;jBHGRjyUo;Rp6YB
zKhA-bsV{OMyAMP8nuJ{|A;Tu+w#=cl9I4(#VSV<F<G5d4qmLgSQp@dpHdtik<mBzk
z@`6WutIT(A{#UulZi?nXw?$s}4L<Yj`_*<ifd9kDxsFxaWiICc_lSp-Zx{{BL3_7^
zobmM!MqZk($_)DY3(xj}q#tnZ&~ac{jr#UWk7B(vj1ek}eYHjHlsBkM^~#q+lS@Dm
zZk1Vn%eu(z6MehU8n%ojWGE|D{keWt(xzI;*q>HoZv=`s6+N3D|3#3_Y{oOK{WjfS
z@OHXITfkzgIE~CpwsUbSz1J6C9QkdGD^&e?t69UX`4ivY)`U>};QWdxGBNUAp?{+}
z5p+9-<}rBtbLXCg(wFBJay5f4f`k5}r%zi@iuBcfMTg*TX@G|oLaDKAqF4mwrvtZo
z_-{JTHm@(;*8WTfH1N@0V4HM-TX$GHTn2Vyn$SQSl*-Lzjec}3BB4D{qW6*_9DTi9
z`;qeu|J*}KvSFyGB@&eJql8zIr$z$DeqmHHHi(s@>%4=*@^T3+$IACNz%sHl1w_p-
z)@Nu2d9ADhs24%v1mrepy5LvO$H~dLLQM?61zcI@wH4|LsgV&aajb^KKnc$1z0Rzx
zEKA3{EXRvHZ1R7=hR!RWX0_3~Pd~;BckTFg^JdfAPVI7`>~AOD#Gn4!Kz^it3fr(;
zqBIhyF%sDH`8@<v4?^Bx^~q7(`s_*nB~s1seShmKNK{b_Xm`c&j$?An#WG3u*c-uq
zEJu{gBd~tRVg1(!xrzr$8p>ND2fKth(ybIeRPs}+a}<t;^MKq*N<;lHa!nD(V_e2;
zx5MN^%Ka#h>Bo4(r?Glk2lIpqUWctase4vWUyH;ETo`^HD3jCYgv*6(N%&XH0ptZA
znMv8^RgI@n@t&LQdpK;P@m9Z(z{<99G*(Udz5Ar_9|8mrB|^4$?bkhVz>nLz2$4}d
z^LwAlR+%$Q;g|vOGr%W!cu@4_<@7CrabSE>xU)n#G!N!sXqamLpvc5M#vdCyd3CA#
z$?HF@iAH`%qnal-V|h3<!@>SS?JXzEn=}W-ZC5N!gKK$e4~>-Svw@V3-DIx@{(l_u
zzKjRm*-f#~83Q<`5=uWQBBhT1<cnB{4zZPmUz26bS7yE>3NCEz`Q#zne~<v~3{dN1
zaCjvV-v3M>J5X#3_d6yBEsq1DuW%YLrB&xp)$xd>z1?1GDaQAC%arf60%pdY3GGMA
zMf5;nf|igVbEp2#Y|W;`_#T~>&co!~NKFp|ew}KUuy-7~1~pk*++r|$Md3^JFM{_M
zM{&um_aQ2~BW@V(>=2B+V@_p5T<Gch{?cyTm_GxqCZjZ`ysXqEwBlCLMWF#7ejgl4
z;Ai+~zf4)qe%KrPJprz$kgM#c)Pq8Fb2J;Al9ywlFgoN=Y3pOKF5zJQ==SlZx_1Xx
zQQ?+EVuXE{Xxb!M9C9!(M8I!8Lr*pMzOj7xaO#lDuqM0;Zp&!k?QYyVfc03lbE7>u
zAeVgnO{>yaJR~e3CB%ofI{_0uk<`9Z#$o|vhoC$uf_+98*i2DnV3+*CqH6E83ur>z
z9B}*pHi%Z0>N((z>>U?uK?jCbwCn%6r(GBny_xz2J~J1mq|)w9<+ZQ!kt6q>K9%XI
zg;h?}ZMBj0?9FDPZ#;CA$@st1*@dP!zGt)k*d>Bl;TkwvlX?H@k@I-eB?8`0e8%Ii
z>MUsj|0_lNTe_p~7P-b1Yk~(pe@-3M-b|?O2W>ip>-uRVFp@xEMf5y9Jqa}QYb0)V
zfBc9Cf%wT;BO?${v8JV^A!t-(>!=6n>~^?mP*cZnV1xu!<#tS~s;Csae7PGiv(vs_
z#ya6aYaugXIz6N?StnZNkdAJC--NKRumW~&WG+-otN@bZI({I!ysqx+yqIZGPk+gH
z_kSkEIVdMBP3&9tTdk!`_|X^izL%~$G|VW~hlVwH<dbG8t2Y)BG-s+Mdo+`tVCx;f
zhSJEk4I%14LzhX<RB$n>dYMGbpPhNBRJo3AAf55j71qfo(mT5*mMim#q|m~uuCDft
z)l286UhNos?Z4$t67A$leR4=NNvA^qThhby>xg*~uWJs5)j`&%=i06{N0WvokNk0t
z)iC%Y+Vj~vl;&0GNt+dF0auk2z%>R?5lZ#KQ4jbXX;NaN0T!pXu{aZ6{r>p`Ga4;$
z8K=r_B3(CQqSG{$cZv5ORq4E20bD51aIV_};s92T$`p?9Xjg%{nI5*X^<NPr?GELC
zJT2bg;TS?Se4nQ+3|<yLdWv@LmUXycp}VfU#sK^{TrJHDUA9Ak=dG=+s4E7br$6kN
zQIR0WkaQ$gIawfqbN2S=9CWqs8B<x^{Pw+EY_c^!{@5CM)cq9@r$wsBfx<_;hi;*i
z`EsJ{F>&s<A02tSL$TAm$;0EW^eLIvQJ>d+>4UR0S2yqZNECyMV*uUreoc)~KtKRc
z-a}TF2EA%IC-TE}kdfoMzj@5Hs0ILe7jbF3WirKBHo%fySsm$_(dfs4So;PLV#i)b
zY#E&}Gc)T0$}%hM(Zs@JQU2IGQLY_u$ER#;2%sQfLY)^R536FvTIMK#iQR(yoSN#X
zy00zhNXv{Mg<XbHSLB49JIwb?*D*l2w1Z1S!eRTD&eXA1AC271p%T##ECLRAXysZf
zBav7DAXAqILG2czlV6cDIL)PN!^eW0EKuxL`Fj?@hcd%iUQpDa9=tAq9=ipF6$b~a
zhs~$t&k0=E?(XLH`nXtVl<wN-PIT#kI0IOF&(M=KJSp_@Xl4E$EIH*WsBoJ<lSnQ=
z!a?xm<%(MCi#^?Rm~ijp3HuotlO_SJ0to6zG#*%|)7k+OG?zOYvcIP`O<=C3OQvpF
zX-nC)WHb4#IT8sXnuKu1LOyo7k|ggvRCjXL+_7L|dyv{*?9bvNG*m_rDpf-`=`eW8
z6-UD<*Ucz#l&!yi4XsboA5m(lBss<bf_o%;cx|7~#%h-<E@*x`_3TPr@Pg&pv-|7i
z7(Ycd{2xu8#Tpv2gl7ivKoLhn5sw5|l=B!;oRm6v(OfJ|kto2CZ?m${ZcVb4@*b(Q
zU{`?YYMgV8-+Z3mksiejuR{kU{rMf>aQ9Cm*<=#RmPTD_PMDgWI)6T%L#~b+i1cZ9
zsJQy-$--<)YwPIb<Xs^%e)-MNP9FT_)v$UQb@>2`kPNNh@^3HEWBo8H$1Ne1?eII)
zgrzHN`9-RTutea4shUPgg67==v6GDeD@hd<laLCwM3O~2vQ#`!i@>_2UVB?`3So{w
zzIWv(_V)J9DWWS$27}f}0jPTu0N6ivcV7oYxW4t5ImN8P(E*RL)`#hiS87XuRKI7Z
z_9Ax}HO^^-lBnCZ(50KWN9po|#TEAFf1D?+dsdMN5Iz}i0Ct$mX{AZvbl9z^+foHQ
z$!yRRJdg?%k6yQ|LU+L5kiL3VEq}JMh)(Zs3~&7IqgEl{6!K|tRg5tgGXN{ylVzkY
zU)*KWP3bF?M60TWDF380=V?aJQ=c+@M15xJa`#6);M2fM$-(MjD{{uhh8a2HIk2^H
zIB|7hA|`#0yfcC_d!feK>xZCkiZ;jWY=_duL(jSZcqi1!#%J-zr*Tn4;=H&usXi%2
z6vuwwD1Lhv|Gub8x_tNt?f2b<0E0v5f8pu^At8c~WZT(9q27g+N;m3xqlTSx{yH@T
zP^ZAzp?FxCu_kIS3yOv=p1L|vu`S)f*W`3!#BBqUZvk>+OE_u^uZ1&M=#Yn8^^Jc!
zOS*MaUQyeGsSNt83v?ib^e@V=ZfvQaah7K<BgM6gUfTh=VMesp5|0c1LJ($86N95?
z=9)7!0uH_OBF6$KD=4^e+ay<U-HahR4l1jvQefr(vwLEjL}mn+_~fUr{yk<Z;II+_
z40e}~caM&e=u1;<`CTzC?FlAC5{Y!buyEhZ5^Z)^Og3FwhJiRkci)`x{QYLPO$rq^
z%ovK|en`kTRC5E#eyUdo^a30#;*&{WAtLNu$J3=lmvIbTvfd$l<RmT}FgZT8WN2U3
zE0t&%O}uj$(8)Gm2i2E8Tk)`Oor5us-~=#qVZ{0EZ3(j0cKuzF=SlUQ0HA-4Te?CB
z-_~(dka?eWG8mkUb?nb?@$1r#>F)0K3A2LnCpI<~*1HMlrU2TF(b41GyAxjn2R$?7
zFEE3Ljeuhpu--pG`-O_SS6?rZt?JE+px!E?iEOYWX$E?`4~!vW4fL5~@`5ek2!mPf
zcQq{)mzWes^C6k2bsgn#gZ0s*wxzx?t>e-*MTuf&{I~G%HHug>v7)NVLv;vy-^R|a
z7-l4Q7#TmKgLKIR7%^2jbis!-6H&akeT7?6?~}7~C544XwJ<4G5(orkk%JOE-NEE1
zj~{aiDI3#gf*P|6?@NNj$*@QHaPIY#&EewGZq#w+{~iHt1}^_TNc#UjLfv5M=NEcZ
T$C|r=z>kTcxk0JE^Y#A#^HlKO

literal 0
HcmV?d00001

diff --git a/ex_figs/quickcheck_4.png b/ex_figs/quickcheck_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..9dc4c64cb7492a119581d34491883dc377fb0071
GIT binary patch
literal 33743
zcmdqJby!y0*EYHyI;EsL1Vl<e5J~YtR1gs9Zt3nuQY94xq)`EBq&p?0yFoyZM!L^j
z_<P^)eCImfb>6>^d%L$jG1r`9jycAC-(yVPD9X#=<51y15QP8q2~r7yQ2ZbWF^`1-
zUb#0gy#)S;<|y@484LXK!ZHp8f5*0aqTvWZ1P1VLM7BhhIe1anNm||MrLC!xtAT?F
zWNqMNXJzYTWp2peV&dRvZfnE9#|{4BWH57bvU|Y8^S?g8ZR_xgCpdK&7lIg|r^tuO
zZb=(6u5KTeFEMsRbpCMD_A9YlBPdDPHE~{Vm=n>YF>%OBMtPhwuGurZBt(l;B|w)}
zreS|1ta7vIH79xu{`Z^gH`%!qACuEQO_`%-G@lI<d^%)0le)epz4yB(kuA6CM0_z8
zn-MJ({OYiF8?Qki!7t?s3g``ZWf~8afryAGI?(`PVPax(;K%cW-?j-xM8Ti$;f097
z+f|eplHgq?p%&-~cz4P4|8HNmu{rs>9Mx$&UgKIgu#SmO6<AqWsabQC!2e2$^~1v2
zNN$-6{b7H}D~0`yiJ{q^qJ)xHec&Af@U=<n-}Nq^Wg-hkjucgLEGlZgZMJiL=-ioh
z>{F>yke9z<+MDzy0Fz);Q^X>m_$d0f?aD;StIbhvEsoo_n|>zn7dx-3dI|-tVS%ks
z$u(#R9xt;b-)gw>d1otPJt=U$?R%9Qq^j|1y3!uJ6IYkG@X^7+f!A`BGYi*mEA7h{
zv=Y-EL&%9)_s|9+5*Dx0j_};7YR{Pxv>g4}mj5lGEu8wZ{bJjp4F;5!k%7XOfTyc_
z{!$L<{Ek%ednh5wJ93`KXNQ{$D=Q)W{feT0JDBe~{th#4`1w;#^msnNY`XHUm$&yo
zt-GUKJa3b+tAVaAT`&QCL{t<Z^6}$ef3i~BdJ=_hotag~i=S=qms(8_`}p{*_NSw|
zY|r8|qLFcHhaMlhp<!TTxNgthcV3HGI<&X9rx5cfZSr1s9kZFKCIyq4^V-ADluv%U
zx@v}oiTT-fuKvFJ-!Dyj#rjPst!-`1V`Jp5bKXKemnW}qQ(k;i$yH5vT<HZTRZ?G{
z0RxZ14;AyqlS4`|4*?WZRQt8z+junh3H|;3L*Kk<ddE{A`u+QNj>xOO-8{@JENIWZ
zD%EW2U7eB}^HGBNWzT#}O(g)o&W{?d(DiF^f5wxr$)}kQWv&i?BjMJo&$BM{Ausw}
zcE0*KvOhx_w`19Xg@r|bxhoc<{bx_lk3eja{z`kJ<KtrnadA4ZA+M#<qc-9`d<bZ#
z^Td1j@F5m9c2ki-OH^W_rJbmePI|6tzSPy_#Xyn4qvMm4bm#R^w}TP27uwp|%Znq&
z>xB(F^8o}*0s=Sd>+8+t>czyb&K*0&KOK$4>G`}N*YhF@r@HrAn|rKY!S`%a$Z2bu
z68_+DHc{sD#9))|j~XLA$3qGd>E36DQQga-k&*2p+qFhc5{?pmm^Ww|(;f!&r#(ak
z%RtJ=EMA^%w+G>p%auRG@*mUlrJbFfJw9HHggujmjSUrQ3!_9$ki$rj3kD~`!gjBP
zKq3v;e&^^&Mp2RQ^kDs!V*W_24?J)b<0C>EnlRuSq6fn-D;<{KO7{(Ydzr;+`jcYX
zzMU5S&gaMByly-CG{O$88H>uVwrA_y&Uf31$jLD!BqUmY{4hA)S-9`ImDE)Kv6jH<
z;ON)JL`k+@gE*u=nx`>2_IU5;Z&&Ocx8Dh`2uVm#fxm+pEY5mv{{U;ZIu(5}V+gT8
zeoEg0rP6tiiKLz!Ch{7fuqvd4)q9^YF*BnAhi@DBEHm&`mV9Y>+5AYv`&8h8>sHt;
z*^l$UyiNOmhIZTOebX$*3bM1ac`L8dABAK{zau-|s@dLI%Psr`wx<o3r&hmcE>hw$
z6j<Bf^8DoZccQJLx_ao@b^|?6{V|iz)rBjkW+|3z9QTi6qjp}WRmFfqtzQRYdhJz?
zD@9fle4IMfH?Pk32rvlfII$Ig(=^{HtbOfsamolL*(ZL+*%nD}OL((qcT~$3w#kzx
zPZ~ii+Fg#*%P1-$oT+wh0Ryv~sVXba`(x5criy#-TpZ0U!Y9&myDp?+&Znm9Y;~%F
z2L}gdXC>9wqQZKz6dN4Qj-Nk=ZKi77`2z{T+l3RGbLH05)lPq2yKGJxbbsVB1I`h9
zcD2(&00IC3ujiqa*V$%8Xn6RKorRX&&5AjTqt2cl>GlZPV*AC%a;f695?AN^Z4tEZ
z-1mMPoFDH_UVu{_1nm6_HfXK;UXf`J6;fK-V7$n{wCp0)_p+(cA9c5rEmglI7(ZUr
zolmt;H@qnTQ_XYg1DiqyFxq{OeRlEVdDJ|e8ZvBbY!EI4$T-#Yi>@pS>w-5nth|mF
zLewj4XsHCu{8~Z?KI=93u!D#SmKi}HaZY%4wB0^k<=A7<PynLSViP7k68NfWf%Y5V
z>4pbuBgY#heRzlWR`$S|3xkc$!$a`r&mUkKq^2g-Cjs-<&CODut=aF~X(3S%^#}WJ
z04z=+;Ul_Le`aG%J?plJzcE>cTjR3XTxq`uL3+Ly_vpNK9)jq#m}pzy#FpyydVQ?$
z^Q*p;V_-78t;K~9`o|zG!v-|`{WGDXqodIeff{z{efJg{8|K2oLcD~pxS-Xz-~PUn
zo4b4Z%Pe_S@8IXco`;;8<yLP>O9iZ^D~W6yE(E!Cssopnj5~eba=Fuw|J{}bRxCDX
z!GIdai;c<(drHlRAc*;KnB?Qf7?}#G5qdr+=wM$T&DDEHr>5GLtb}ezMroY^6QKEV
z_!=4+1rbVt#mGX0S0{bGEbaGwFKc)9_DHTSj~c>fL!NylCM6~P1?-99cMPIM*;r5f
z-A521E*WRkwCUiNryx43Wh--R3PMaA9M5i&lA2eT7#m|j86f_=_Z}J?M1$;)cMLYm
z$H&$0S`4!X93HF=JlY4pJ>9{)Q2Z$F+oc&djO41}LWW}nI#O7S1$qtKAn*JD!G?2U
z`)JOW9AatDlXr0`-}(EO`08}5fn^F87gt(YImT_HxV?1haX8g2C=?7Kdq@jLGn@Es
z5_|MgTbt9%%c~WH5U@n;O1p;;VPWmxxA5Y=e*Frb0}dHa#^=vv72RE3#8B(2RPW50
zO!v(K?JB1F^S`nSi;K)DzL)Ol${e?jgu&D|J)fZZ!C$Ycs+zRM36d(c7<ndOJF8t|
z-5iL`#LKIgB;qRXaOOJeE&~zDMADH;VHF#95(gZ8{B0j^QE~gz(R#V{6fnqYKjVip
z9LCngND1yGhpLX=-j7xaPoEN9US9qJNn0wk>qzBjYX(1J7i68cetwYsau?~Q;8&&F
z#KgpCP;_jpiV3xo^V;xJ5X9eURoIwUklwr*T2#c_xhQzFSL?d1i;y%nHdc0TfRGjz
zIURj{<T^Djq#$(=V-nECYycEP7cll{PyO@f&(c%n*0;7(fzJ<s!~n;}O~Ib^qh?&5
zClSSlKX4F|ULZlKl$UUTCC9|Yy%X80eB|I|wC&q7Coii@J#%)OgM%1>K>PwBXv{|F
z$Xk)xmj|{Y6iUf&N>M5VEF+g9I`OOhX+$`wBnsrU`-6PE<W*#o0@(1fvvZJf00whg
z`<BI`(wv>m&ce$241++g-is3CVx*iLUTtmd{N`pja3QF1cd;FR>k34@C~cdu0@`P?
zvSl-&RCOK)Qj(H>hYstb`3UIt?c48d#ClwebU1(Wjk3-dZGqzoHr?%SC)=$v=Rbqe
zXX-s=Ab_#o#0jGjR{A@AdU~1xb_@fx#Dp*5p>qzf)`+;Sudgpiuo_d&9}*JYcpR)P
zfB^j1-My-O@VogCYe}vz$b~;v2R`5Rz4WTRx;)$2;oa_g2BI+`CFQrejX!_h093ay
z{O#pr!4G=hb0T<_PEMsu?oXdSMWhuxKxzEQrA2u2rt!`vvE5b@)3Rj%3*CObVIw3Z
zMORQzaP#y80Fw?1YtVDPI9)GX9m*mwG&E$H0xyQYe@`61{S#y;DjFlOg{w0)LIA3l
zPKk<&?u;8nCXD!m|N3PJ+#mB%FrI&5VWEqsqGD9j*^lWyM-YpMsHonm>AIuS`y8h~
zbbhL)s!9f@!q=}+HfHOn-@JKaT6VV7`Jtn;vniB_nPoe*u~8C__QA~>*X_qopT1cV
zJWZ{EA&#!D?)Z0;cN$075Ka(^ZQTJG@X`6jMN}Rh9)ODGLB#)d=sd<2$h-P*fSrTm
zI@JM&($d|H3%-kmhljMcFJAKb=}kX8GKU);K`UYi&~-fjs~Z4CTEB>mr6HH0BY~Xt
z*ZR`~nhpWLNRsfS^YchbA(Z<2H9r?%H3*7*7Ib-W#tee#Q0DVTz#K$`gz$?15Mx%@
z%<xrOySY^^eb?s3gaW)<nwweTyuG|~rgb$nEzS{%>D7Yf)e;#Q8F)69R$3|oGuEH(
zDxK~>ifw3USi;pE5_hBnsMLEGB>IfXN{U25Ybk(Ti0J4dzI<V^0-5!BqM$S|plQ!1
z6V<7P)3rMS7Q<KoB2;ReSt=jYF0ZV>K)j>7yQSKB9k{afKE<c4lJj$~&)c$&x*-`E
zOit^g_yEYk%hoJ4Ljcc%R|$6R?y1j6Y-(y`Fg`VvRb)iOsBU5Rczx!i?OKUx*TMQ&
zy3)6w9txk%+;0G&3Bn7n>y~!y<>`8*&&2~OL90-3UKYcJW}|8aeJ@Yg3Uuq<0uZ7p
zFz|SfCc%SFg@Y7?+Z*6<A0peqj(}7|<}53pz=sYpqT6z;TK{Jmg23S5=9U)J#kTNV
zu#-n8E2&05<L|z={&h$g!<OQPk|<=`2J%i+Qc~Fbyncr<9{}Kr4idlX3D*8@qo>hX
z-hb6g!>Lt{2k#BQ>~ucI2At|ejlUY4m+n(QomPNq!X&KNC`1tN&!E*AfLV4RMsn)a
z(*Vdgs9Pv;`*Xp`=_yq*i~YfxO6OH&&sXfwHdd;o-@i3W50igi-ZNUTcnJ<3FCb1<
zbM>_Fl)@<aOQ(EKmdRV&+bww#jX<WP5VWEII}i+jLE?<h%RXwwlj+AuWEwc`tnJO_
zQmVj9jJ1taj?%XQ9F_(ouH0rOC;t@D7)~ve<4ahveKz=27GuiM!J#zrVO(+9F)++6
zBeQ!y%^L~;hw3o?&XKBx53n}WUv90Hr(H!Xg+(LoMbRC{(+-XzOtsYXyj5!E03`s2
zJO}_}sjo^4V1sybXFiQh4ezZkEck<w0@{bNUodZ)1WEY=Hwn0^R9Vg~tZ(j!W`;rG
z({jB!4^n_-_|NV`Fr)@!x9gvQW$l8TVhEQ0ekCU_4-drG4;z+SXKCD5x@KEETwM6(
z=H?*Xv;m;x2PQ%q$D=nILCdlN2)-$`fEnTMzSKy7l2s?}eLZ<Sbft6x5|Bli_sE-f
z?^+gGLT*i6UY^+|iMkKwKyH0vM~#4+-BHbpa$4V<t|D3KO)g=FZcy+-;9%ePJ}n*K
zmeN`qFJ^un5D@+GWAOB}_Wt@9fnPP>@!^!M5tu#zw*A4mYk=h$0!RrXQbDv?Ko&-3
zZ9s?u=VeG$DCXHM)qKq$aJI~IWdbnpn*rz_$kX5tfYNf+3U$@%Jns1%FJL|u+5uap
zV=ZEF>az*rB)~6MjBIQ&f0MjHx&iD%$nqqKPRv99;`HGCRT#i5c^V~cd4gtWXlQ9h
z?GfrVE?h7n%s;(JMrH^A-+S+#3(@uWoyJe(<XT}=z1$sV1ehDfS?z8cH5LvI$dI*u
zAjJMm6jJa^{?_FK@XP?%G_Q+;F}FWo<4Oyk+S}i)Kid-7{Hd=4%p|F#^#1TK0P^XI
z?3|i@I;<ZkYqumW_DQ&PYePXuO!TIaq@4{5QUH{TF|g?Z92q&jbUfax>x?Q_CwK~j
z)0Gs@8?bwCH(c3#IAcB2hgeEx!2DoZ1e2|enHut$8dn)fNeBjub3SMI)>Gv_O8Ug#
z0XwE^l$gSVlQB?IU7cbyUyCil#DV|-#>STlVnI(ouo?jV4mcPMfZS<1myJs4>FMxd
zc)<}4qZYamDSpb{n<^0r_@L%A7cn_`2oN=l)`q_U=>QMVBykWdm;j)LgAED_%FnI$
zBVSL5hw|$^JUrkG3j_CGi;HFJ`|z9rLmdZX1cq7fwz6Oe1ke$f(r?YwF!S@10M_#D
zuOJ8yaCU<UF2F*Zy0sK+@<~yg==^}57@L~HoWbq#WM$yHiEPZrk1%OeZ}1TlKi_#I
zU^7j@EE93PebLM^hFjBBm5UJ~MLriE08kfCmb?BLL<cL3zi-cE`ZFG3YipaQUW^iQ
zgO+dn2xte5fJuidNL<_mgP=hm=sglqwLjhegN2LR@|N_@4tU$3=`}jw_}|L$0cigL
zf=hpv0=-=F15&`YjscG<>+X*iDIkVsz4o3@107>^tdL&UYgd{*_F3)4;S`WDqLh<<
zUWi@+6r^73c9+g$l~KTY@}?9P%zuPzru#2f9?o4|%v~|^@)Gum?%}<C`*sJg8cEg<
z@MZ!)3QwiK+Sw8aEldv{Pz?+Ws2AxYQoQ~?#iJ6yB4L%2)X;#%j!<eL8+|}nF^-B|
zHZ@_M0jLwqwqVP)wj$rPJ--3I0AiI4P$~KgbVN3$DhP1Nxl8{%h;C|XTAi&EfrkkV
z#e&H?$g;;v9~AI%!a$_ASlpHdGz$=lT>t|=E2N488$CiI(WD+d8pu`S5e4|hY$S*D
z_3PI~0DM}F7a>4$EAIJ#JTz!HVOzoy;Ga0$m@wKu1vGk2A7dpe)raGO^BN0qqQKPD
zRI|~%1PWn?FR)VQk_yi28^GM&q@^(c9K%89z5gOQIoX=GTh5jBTd~WZ-lCRGjo@m5
zl07hA_KAW7`zk;~Hw9y#-2#5i@JY~`127C}u+Oc94VM-UWJO!n&fBuCtZ9JofXG8~
z_wL>1_I8<4%Q5*pt%~0H_}^U}9fSbC5d%9&amT#_Sai38IWiPve)A*BdloTZBY7+H
zizo7ni;FP|QwgPDKnUoKgio!pXM@iL8ng=tjq=%ec%2K_Rl=iL&q83Lq53Sj9u)cK
z&q+bpCF}Wgj|6Bi89?JQ{PC6)!@3UWD6nr!dwJFF93GMx7#J|!zWo&VmkJ;Ad2<l1
zAvmru^*}>Me*>`K+hh^fPxo-$Kq$@t;Iw&ePH(tSPY;}2`Hi`Txuu^97aDmo5i~?V
zm?UFlWP}$9;5OBU(VF*HpefuK`1X>GF|PZXT%9b({g|?C;DXFv5ht#P(EAW>PL`2~
zi;HJ~IAI7JiS?C)+L61s{jWd@aj!~vi(q;LhRpyQ1f#h3ixg*GK6^bv&E4IyARj*#
zsW}1+nJ8bgj2NsF!<T@Mj}Mt8pBxDADX<`+_vt=S!_{TYQv9d(V%s_KyCAo1u8Vmc
z+eOF4y&Bkax;S;d>$0H^Q<a77ZJuhEO%9ihaf6+c{6C!fJ)Zmm0?M!{&bDe4&Vl0c
zSdF!4$y<NDfgFG~V%HhxI1qBcu7C|==i(v-{Ng6OCa|f^eSHc*!Bm0|`Au?iwX^X~
z`_91ZCMk0yQWc<+C(ob%09w$-%*~L|27u1QJjr~oE-LC0fAPXXFF1({K$}v<pRs%F
zMXzYBPtHpejT+Bqg0oe<&fVJDIzL%vsa|Y|&d1LWs~_Vv=E@<*;%U{*&=ckEm}e%j
zuUMixjd%9<^IFKFnUy$V<HfxnI5;{oe&~#fit^)WI44#RJGcoQ7z13ZvV7M~-r(LV
zpMo4udcp~u8oxId8bHp7#Vw%Hs*HV-nz>%Wp`oEc)+6q!;#Clu(+)o+f=1ZT>EdBH
zI5l9U-*0Fkd)YyTtSL{s4U90v{d%kSSer;C@;^0z9qq>dttN2)w*WgEfSXZ3+yp~>
zUO9qG2kp#(k_HBhfJk#a7>c-K0Ok27CDDKdkJnIwuNw#Lt@KUp5Bv58BKmOvsC=TJ
z5F(WhrTL(uV`5HKslcC!0IUOigMppBWA=G5buUVRKX@fDFmN7Z#p}w1{Oh^7Bs3|w
z2;uU7EJCP)v1O#BP~Ly|usL%RE?m5`C$q!1BZDg)dkaTGcl@pw6!QO7<j8L{(TB?*
z3HFrnRPzE3Oc~GA-gWKl9`d`DKEnGXgjMS57%KPc-+;!<g@t0TBg<2p8VOv+Yi=Wz
z@!_h;_csmPvuIGv9c33AY$BEGXIeGuZJBJd5=JM$Yv^D-_ZHopL1W@0fr~K{fvG~M
z4ACVZpEEHf>Z3<>xWiSL=;z00c6isXm!gorW@X}AYR6%S9@2+Or7SEs09^_e^E`x~
zi@fou0$2UpFJX*O8p?ko+MpMG(KcxRNch6}-xHToV)bs{T7D){9vGT)UDXjxpZe1c
zJy{z6_kloplsv`1PIkzMFxW~~QmexdCG>`d`rm-1C><J&|NWlO-`na476&tUi;PLC
zNgEx&%0t@VVv%6F2!>)4xBh?rTHXY{CLi!`$co?XPZ^LA;s0@nCr>0D9KP2jN+Fkc
z1A7@HgWi%#GRd?-W<kTl$}}P_c%FwF#jY<yLuoL;`W@*0J#7Q!zyFvA5epMs+Z>x4
z_0f{(zm>o<`}bK#8r1(@pHrN_0WfXx-D}gXj-KBH!`%q~H!)W{ePM8vXsG@z_Sumf
z13b3Czp<Z^7yV~u@cI}xQB1)=(bofo7vaWtX#Cr*C*-e9$$6uhjsEQ;_~V{Fgr=yZ
z^cv)h+WkS<LmM2T=%GO|ayt!lu;Ji?XfSDWS{v@1?O|YYumFbCBE>no2exSW5U~Sm
zrf*2tc<tsTLAfZj<J+Hz(;Ah6QZ13tY!7)_sKRzG0tl$gAoYW43MC~a#u<!}c6WDo
z4i2Q1l!!rqWURXU8mHR~^w_nWP9&0BwgN>RM4qMoqz4cgIXUy)yF~BExcEFSa#V78
z-TxZH00yY~l7Kr$y?_52sIE{WP~sxVxHQv%AnH6}#l#f%MFgX$^fdw)!F{3zaW5Kt
zFec{ahp`3_eJMm-IDwRf2|baOZ2^CPg)jh=34nai2I6EfNXI}dOb2Bt5PLzWs~%X4
zjg3vu%ail0N=s`jCxDM%G0KxAfuw2__1M3E9bhbSads4EwLRA$;j%T24aE!D(!p31
zs9C>~U1yt)cNd=rOY~fV1T>i%A0MyjjfX_`BKa_)fJ24+{*KrI*jte!HZ&|O#ws!_
z3~6IipwjNOSvF?3_xt%~-^t#J^?)*17=sfA!hsko+Q@N@Ti+27+D;RI#|0rEMLU(X
zzwcfKNu=C@iYQukARppTMj?GhS}Ueg-YdB@tbO=_l86Wa#G3$%Ilvh3b6$h0tE1HR
zhAj=|lch>+LFg3Jif-S=PDn_2zX|@c3K(U0L~=?>L|`BqsJe`;-bW$}kXIWGN!W5r
zl}XjOmD$-?AV&`B_I7nOgPkTKBMWAI{z<<p<~GG$L$n7@tA4%7B1V4(zrxh6^$m%o
zQ^U7!-$2Hae)td}B_(BX${QN0{ks1S;ybWtCD?O-ugL+L8PrVy84A)L&VHd@?<EX0
zhG4)pf2N30$8qaOfs_d&Wx&oRbFZDI0YzQAZK#Pzm?P_kABZ{v$MjkCFlPWluwdOB
z5U;;vWj*?~bY=4<v~>THhWsNRI6fS4w5aNdN-tm9Z;aoCiR$cgMkWUeDINq_jOzHe
zTS#Tve+TU1M#5_?a2zAv{yUDh_V4(?$6Wc}iT`xIV(*!Mkq942P7KLAcC;YOKKgX3
zC+15J!Jjk6^HZH?A%tJV(GN`(g5*&c;TJ)0VqQa|$iw#g&sRZo#(s{@JFZsOYDEOc
z6!rf+;jW**gEPd*y=Zs>&!Qg(&N;?U98EL3pbijqKN~`&C~W?OM3jT7&;+{N($2qF
zBVYst*r-9WUGQt!|6T)=|FMi?UBOEvO!jYm&&P2qbr3gStAOxkYgi-vU%dc7ATx9@
zRVKckzNwd<5AMsOF*?Zo_gZ3CP(Cs?hLAk`@2FlWhNH-uyMggr6WHp_e?nKijNdc~
z1Tx%zS&sxV;iDuEV0Ezjx421e{Z%JK96zug)AfAbbm1_}Z@1h9UkL+C)y4=8?T9kP
zO2gwro25hT0I|Eh70e8TRHSjBJi6i#`p>5f|NTJc8iMgh7_f!^Gk$DtW5<2$z>cPW
zq1CJxopTfAH`orD*6W1uLKqtjm~(0VpLv|fU*Fi>!+@5^V<(EGk;XXxm9cgrIvAkF
z&RQvUmF6Yu&<^qI&kH)WL2W^pjdSfJUr9NVs+5dZf{CKV{~6XAT@l00qoY61hXI<m
z@_BPXC5?Q17t-{91zYtCEh-iz#30&_3EAbxFM?!-kD5m6#tJ)%lqUz_fy4sB0h08w
zuS(WLA~XL*aV+!8N3EMU`aVP;M1Q{Sl1S{=!N7o;Q3P}vfkh*(TP3WdTq!9aECOX$
zgx)gPjvN2-yws7h9Yoj*Jf;PdBtbeKOhx|*8~Wed84h@9r+UxHcglV_SUX3B(xAeN
z*Vs(0m8w0RG0?8m&CtPkEP}D)LT*>36x7TYl&tv#+2#hKEC4{Yr*##ykfNeydOl=9
zoK{%DhT<jqLc5^#Pq%bs?$c~<s;5W{ko}mLa#vl}b-CGx63{t^Y<RfUP@oj;Lm3^x
z@X(wlcu!QTl+>$gyORf+FiQLojqS1*_g*@PU=c?fU4xKZ<!p{SetZc(^=Ty`_4C%5
zib71b*M!1^;P8Qi&dkV&=<MvgPG>XOPAK|8N+x!!gwX1sD2<;-FmEncZvb#C5Q^@*
zY*4`LZab5ZXd?XCDyHOt2|+S(T51K`Qbmn_6@x(Z;=f0(-h|Q>-*$29CqwKXI!jm3
z>UH5f#eVGs95BR4=GOniF3B*Qnia_GAaL#5q%(K@)^O4mIQ1bc{2h!&V0Y&0EdvHX
z$g=vcZmMY@^0E_yiC&cb8z{5d&&1=FWDi9nH5?Q0uKnn(QX#H-ixYYjNNF#w-9RPq
zA$ig6UB4u7tlj>7^Sv{q(!5&bhe%6l@F2#558O4Z$;%kV_rn1b8N4VZm4wd)#lN|w
z`?3;JNQ@Pl;xWM30`q#Sr5#uaW3%f~)Rnlg!;!`k|5=7c<1$Wc=UqP#zk-y~vgKZ=
z^)P2}{F{msn?Jj9W;=9x;?~?ZMFO=Wh=p<f&D)8R0ug_olm-g4Fhy~sVTgWYw+^X;
zk~F9kmfFtI!TJxVYg=uz5)ldG&v5H^a_R?sP4idWl+y<z$jqB;V~l@jXpjK<F$U0r
zV`5^`0I{XuGsXh?=H`q56H^GA4yGdd_S8LeBT~s8WE77}vUH&;{#u4RQW?FX00Qz3
zAe;1%qNXO!SEWu$3nbEXa2sVb1-rGQzD_ge(cH%mYP==~54E{D^8?6;qi_Gukqdwi
zK!Za9{dpJk#83k??>yqg#FUjeg`TI3z4@KDjl<7qEBsLagD|AOdHK$5YaP{j0LjGH
z^bJ)XNN~g?B*kU}OrOUJHOp{7k+}_2WbEe~5wPYCN<|=Pj&8jy)D@$7;DiN8(=n*D
z0Jf>7*24zC7)Kr5he4d>%{e@hG&xpK_=-RLo2oIErc~S>)S(MfY37$N`n^fQz*wOJ
z>vP^mI$^XTWMHn3Kq(4VJ>%mkI5obv>d$p_JOO&bjj0M-A$MOB6BBlJ_Na40qN+O=
z=c<{C2wq(TnjhN(!Dwx9$FfSnPtr(JyEW+l0DWL-G`JFwBv|a`(kd5l{|O54j4Uk3
zGZGS#m!1TEp&d<s)S!E!ax9IWi8AvoSzDvJm+QZJG|kD0RQgO%=Rv&`ls1VdDQ{2E
zQBuAE;w;n%D&+ljo~}R<$N)VMZd+9=unP3yf5%9c2X_gHg!x_~`jH4i$WAXYm}2JE
z4an{o1%%4F%5y$mzPhV7IE<xJ@87?-+nIk@5_v1G8;A<llj2NFuS_hkQonKQhYN;F
zK}9G}j1{=G5MiOAepy*r<&#<VCx1->a?-vI4h=;E+Pk#0L`53{pkRWgVE4FGfA!?-
z4L=!28wVz==;TT(Xn%M2Awal*RNtE7=jR92%Qz}pk$d+@ft=iuBIaq~L<1g~DL^!v
zlM4yv(nAE4iXc*s{Q9nGMoFa7{;kIE-P^ZzplN7xIWLcsM%0ZENGxW9U$8Uf5?&KA
zKTdlTa$|M6ivJ_G&Rc+j<&y3XMC7wFIpB|yZP$v55fZVtFE{fo&2R$m)0+|u1CV_f
zVF#*1t0TE_hks*kzl0!A!)X4y+<n(-oOfwu#Rvq#N~b@>lcnYXpnk(FC^)M79E?cG
z%I2^968Jzip&mFSk8B>^o@8om<6_QOyYCDE-1Mp}%O+94oEShq<Ds+Xfa`jAJOIkU
zfd~mv75mG@qkXnH%FhL-Gp(J|D5d5%!-5%oCf5TA$4`$)1q$gy<9U6x?!|>VRc=r;
zPPT$<gjD&7BDh%of^m(MboOM~)=94jJuyNhCCL0Tn1o_)f=;yIebzhX9Ior=6#K~u
zA#@`@Z$pJ0efIREvRr261FZ(+hdLo-G5dsx?;5n_8G10Tu94^ug+iUn{AzD^$D|Ju
z5@JU1HW@QM83FJ#2J#Z07)F2hP#B~x(WpP=dd34mPJ8rY*ep4;FzBHGsBRJ9DepQ$
zy7`xz)`@}WjbmCDqyp(OFQLYCT%jmE79H}UO6?bW!Nk#stQ2%PB=XQG54X~}MZd%p
zapvZ(zs#TfqCk<cV+@zxS|p%_+$LkMR`1psY20n1rFH8P!4K|lyKl0y=lg*Kw05FK
zzuI~7FUZQFOFEWCe#3;=^A)97U#4mxvH#qC*Tn(ZrLR^WE-uj2{+%J(Hy?pg!+zVb
zskH)-#Pt1bG50(gE=pQ3NX1kkr<8SD>HEp?A9v$^w14^ZV%o=EOzfw|a7YpnfE@rK
zg=YWNZnUOS39eVg56F9<YPjg}xPm<P_RjK_W2MvHN0s(l>t?!Ij;#Apj{k)>AJ*9D
z<!0-*%Alpa5R)hkE#-zRsso-+IQ)7Gf1Ew$6ApvX>)c;=WRXa+sU0<Lct4>vY})n9
z5mIVa*MP!`ES2_mV;bZ;Rk}R`5{ya?7ycXTqe34!GxMSExZ@_*sO<-<Pnp75O$Z)V
zjOU11dBN#)qs7B|-Bk*D)+(Q#dfZBFb&G@DPCgpc)l)bfW<M7FDSV`Lj|#23tO2v5
zuD`?tHNac-(!IXz=+W;NIm(ba!pY>?eTdR4&fjsNq;35g0CWjxD6P{WQ)%9EVy!{B
zebh->jX3%`2I&0<Fl;OFd()Au!}BdlyIdOD|C@LIpjssz4HBKWO-A|c$k0mYAVI2t
zET@lHNjsAlE8SS@-+b~Lfyl7b>tOrrj(vF-R>0&vq+cXk@a&BG_d1`n>IcW&#iFfs
zrzBfR#YCywqLzsww>|@a6NX*z76yt;@08h9-glR;v7T@5)Bt$(IyKUX)gGBPtv{Er
z9$~Oww<}ns$Jgo%>Ck@`19%OY{vJcpvG>CAMuMCv;U%<)=%0ESe5K_MHKbNsfZEKr
zE@vQ~=#XBg{4Y}9B(T4@Z1Zrm0%eW#ng2Gy@A*y>-?X&S=8pVq*88~QVDUikk&f|T
zLlkUsL<4vF81Y2vU58}tft^;sdc{>wN#HouD@4sd+#7mHA*tnzQ2Tn7{Ni{yh>jv=
zynnu0Cu{G8c4i|9{r2O@%m&Lkt}VY3KOK7DdzE;2L{<D3z|f|F3{~Vyx`TxU&LIv(
zTZ%VaQhH@3GkxuHwKZJv9bP1%Z=4=?Y5I3J5=M&*(W9<vV3JDWgEqj{c#rqi=wESa
zs}DmD|I%xmsM4Nk+6#-%xswS!>#`hvtR?L84I1LR{N8<g2kgB!%ql`TGxQLgFSF$~
zG(<cqEc_%&3KD9?*DdaPl$V}Yeo)<L+2S9wV>o;8pfzaWSGd;kk>5w>!5`K?)2=Qt
zzCL3()5I3{tu{)Y6>5j7#Z_VHjOnzyF{y(qZ|H1$DCe(eldZU0R@1@Q+wS5&>U3MO
zQxO3xw(;K6Gp~lz`4;tW%yt~(%9f|JMZXi;{ZUZTQLU-H=%V$Eq?AC1_v-HWR=sT>
z%I^n$-0xsA#hKB+j&fMA7}a0<kioF|e12u~jlVTD(-ZE^pIuci-rDPJ{CG1h<z;^s
z#%O|b>)9Pndg@B!b#u9^OZL<ASGo9_Hnd-R_r%<n5qMs{m|Zu^M0#JO-^>n?8s-tb
z!QN$YP~Raw4o58vn~%!+KT=nA6we-q7Ob#U$-hM4#dtCbbKfkS9?OxRVg>!gLwhrd
zs_9a0X*53KDeRmt%?ht<uFyBP>FD^_V>m<)7r3=6eQ*<AdN6P{w$H~4$%Yl)s$38A
z5KVsueI<Q%>_N?~tTV;o#8E(r2^!i(6qY~7$=@0tjp6Xm|9d;TqJ-kfG&+)Xc+xxC
z8{}g!E8WXOLNcrSZ`k544D6Ckz0~7O7mlsuWLl-shMo5aH`c>XAFlBS%B>FkT=My0
zg5y<jg>plu^3C}#2P@G}Kzy)dW%Zfv#}5AJ#5VOl^NxCG*RwR6`Fwnw*hcghdfp3d
zRho4?->)S*zM?;FbPxsYS>_*<!$YUA!l<n6@JEwHhi><N6cM}csiI$=t%Ky8&&IQ;
zT1EUpjjr*OIgaH_WhIh-Y)AL^-0=Z2#hS{7_4)j0vXx_QtqW?J(k(k(YN*5#wlmG*
z5D1Akq-`?_13&i=#)3Yh)q=-vMc$=N&Khd(>=#bCHkUzyFzhTJ>bM^t@bu21Wd(UW
z5fpdfg0XFTiK)Od_s;k4Fo|A-9!Fz*-(~BYX%uQv;zfZVi5W;;TeaH=QsGD<uc(PH
zEJ0a)O9ACK2Bm@Ubvq_P_))1rD1ESNShQ7*)62SGT`^x<w~paB{>Z0y+(Z=gesm~Y
z<sBXeeT`MA@Y%L*Z{;~+T)v<kcle9*``nFY<?p+@05ETexXxfiwnc5VMrLSkp{VAd
zaff0Zpt~SySYQ!^(TvIA6%M8@%W+EP!sCkez*A<{pY(p!7FoL>UH#PZmGZ1|&Bfq#
zooK*+u!!A#^Vz<~t2?7x4Wav7Svi%l2VEQ>298;}7h429n-8nhc~R$8tGfMe<C@iQ
zW>BK8E;ATG&OyiS)HAc6rM-s<VyLZr0mQ!Vu$y`4(3Fq!(?_zpTwl3}=e1X^u0YFN
zyX=(p<@%fMGNtsPd`Opzj}B7!P4+81jJe}Z_Qb>^;CV(l>IXrVS^nRvb=IRTQWw2F
zz@bhz|LAYuij$Vw%g0rxIojS>{^HoeitWpxMM3gC?bzwpN}|pRoZ_@I4{P#M8ial|
z-+A>ws7mMQu=AhaT%F70s<fq<Cn_8x-yn-NhBfN%$F5#wBGf$j!pfh|l-SV=7%5DA
zqSx@<Wh|XU9Yn9QQZ!(i?PoaV6iQX53t68(6d!l8IMp$ufTL!L-WaiK04hJLBi|ju
z>i~M{R1yjxq0aDpclv%Rxtk|idzAiF_O`k_VoUaAB(p%4X2oLao%cp59bBy;Q}Was
zYT5+_#G_6$8K!>{-g|HjQx_y<8`?TN*2el28Y(wPPjJ+LN?v4~`@rq~Bnu&AM!XfN
zAmb_t63#qLy|{{##?@K<NA_Gr1Le5;&|1O|IQI<|i;6m_F`wJ*WVp-`O4tevYSBy$
z?lCq*+oQ2L;My})5RN_TD)x|C;W(n-eY3z48-_}Lcc7Ij^mN6tJSMYCOS4ee&K%Qj
z*;Y~m;YPJ>$hNZRcIC|sC|)}KCtY=wiL%vDokpm?MulbaUcsv(G4)qo^N9py(b{9I
z572n_Eg6MnpPa>exS31J3fK~cswZ>Ld-#Z((Qx4VwM}!V86H*}jXTviRO}6_lqOdZ
z9ra$3o|W7okG<qyr}};2MZ=NYpP68;6Z_ubmQ75IQKRS7uFdbc_v>9ZInE6Y2P_@U
zP>kKp91ntL{xl{Y_?M9kKa5JEgL7!Y(xS<!W%CQvwA)5CZHv206nQEAl+0Uiq(P3{
zbWXnv&I%bCVdPdmbJG&S6QqO^*PKXsW<79HFH?d**p@aAB39r$Lk;?hqg`v?J?{de
z!_=$6WEHCi&|{3;n(rlrvHFseE=fP{v|=V+-K8Ku{uy_cBYI^IzIl6lyD^aGNa`zb
z$9+2@OWV}P3hDTMT+TL(el2(WH@6|J71V}WM9k>yX^3*ob$}<X&Z5b+D1IfAgsvS6
zRgwQDu+FN>)7s2k(Q_U%wQ_@88fG!2zv6(%nx?HmnJ0BoXMb^cdlBHNbVY1qy*o_`
zA$R%&9QZ0uHA>rgNartI@Db3m3R>fM5wt`agYY}lgDS5A=I{==xL-E5524aM)@>hB
zduw;Dgn$FqTHVU-+9WgNC!Wt-Mk*;iP8*CrF4*w*sP=QCpL=7WEy|k~|IvUP@1!M)
zq;)t{mdUeSsR)1k`JxxiFuy}zZ*!Y*1WghO#fq_H3s`l*Zm{=#eR#y6XS-i1_HZez
z*L?plsZMos_sVsi>E<(1We$I}dJ!v!27QUevP1{wnKmtX-G@^N)WyFDT7D$Gb0RCh
ze7aKVriLR6=5rjPGRyD<n~>tRSjQhd{~yVIBoZjODQMgq{1Y|~obVB4NgsUbr-CkN
zi<yLG_y#N484<Ab(K%Eat!sC)wfFeXNkL`d^B5OTr_jRkw^^1)3Fj7(?z?SVFqyg1
z1W-)*?mP9ZWpiHKiI})i8Y%lK;+I!PaZS!Y9;}ee?)Ir{ZkLSGKAPApFtSx3NW1vp
z7ss<R?vUm_*M%A=BvjjSz=LS`irB8e6zs!L=Fsosn_ucPEsI;b0o0t(CsHOZ=>1ie
zSk;Ti`>*g4KR?noao(tTRWrX6MD50Km&*S!S@^8eIUkf}5L$S+@6@(5%_$urKW3ks
z&5+c;E`E3zvv6UsX+M^jKvr?mmuGprC@0Q(V8Jb1mdriFOTm7GLU+u=JI~cvNM~m+
zVP_X~+U1Yy&qF`z37w5dKt+oZjX`yiqk;#^H8O~&pZ)Y!AX+wvYrCZK!dRsD;?FIh
zK7uIBp<Rh`Y!*UMYC{oLtg~q8mG@;_F|106Zo%K7qfT`9iINdoXI)grOIkF)kSz<A
zs`Fv7=XP_+FDkd?|M)=t3{N8Z+Me_Y)BjotKbu58H5JhMRUGNEAd%p^m^-tgsLuqg
z;Y4|&(50;HU_#LkUOhD*l&*F>io+*J!}f)Wt{mqVXV+{NZG#at5@=nr!|6&dOx?6{
zQ~>4sUuE0exrES;&dP3zM#(C2pUVcDctQ5`(`2sFooJVFk$j$NfygMUhV5Q<uOln+
z(ziHL&l1^dPxPt1?lWQ2F1FxB+@0@b7r@(XdC#^T+z<)F=jL?E3wxkZsEQp`^S>5K
ztqe1#44!E|Me*9tNqGJsH((a^3cc2QWTpiooI|a&L@xWx=%P@$=G9I&b~q*p)2a`m
zA6Ejt$*&lnCBCd(zko7k?74wb4p(16QR9W8q95o<^%MdsMW)joKb2jVzufbaEFU@<
zj{YL0S{OFV^W0I(bb0^_i`$q3kaAyTI%PO|u$=L9j?#p0zs}y*@{9GUjOXP-rJZlm
zhzLJ6gr`-lb#`{xP#Ax3AJ3Ga+gNg2h=Yef8w**6K8gQQfx>*2{C)*)<@)W$7TiiZ
zPsBPV8KT=mZI(NSE6NkCGUaQ%*QP+!E~lM6y`sk>Z~8Wlx5oVE`+!{p`ka%nq8M0J
zE(qH{<+v?~_b#=TT0(5Dtjj)F=TnfS@Ag23c*bk*Tdz)d$u2>%MF457C7av+*z{a%
za`SmJDqeL8V!r=`8e_@{{lsITPy;n*y4_C!vg<2}EeQ(Ahcao<n}7M&q@a1d_A{;X
zZ+kj8<?J@Oqn;&{+d`j4#}*%r<Hb*N|5T!T5@`ymv7w<Xpjh%Ome*slV(EU1jVzx)
zQT{@nzPD})U58h?$YF9#ghA(B)926r%y1CbX?=?`F`v4RMj!s9`B#3{T_ZabGTUz(
zk2fjEgC%k`g4;li;E%#|IB+3RG2_wHXT4Ku7pTy|Cy0jzQEQ`6PPs!?wCk+DgDI&!
z)-~nibK`#fjmq>2<({U(;VVHaTOF<JXV;;JasT6oTtyr=_Z;6yto^2u3<od42NSEm
znvLs3m;&r%j1><=KJfyV!>bfaV3FMVHsj-JWfqIDvojPJ3@#f}rSujRvy(MB#bYAd
z#mzszNO>yR?JsGTbn0+1y2d?!LjkqRVZQI{+9m<l3s+1_fi3;S@+h=_fdv%X(sSej
zKXpY3uQcB}!|vA@<7Mi{3|~{J*L9@-n1XpShpPFkIzIQog<g1v@mhgFg4_lFsf_+0
zH$!cL#~TU)+ca~Y14?PpJ7Z?Z&n!djEV-bdibN7)AQV?hjX#+fBza>L<>C%#u(C>S
z1p(P<Q#h3VN!a78V>@HI?v_fM-L)Sn&cm9}FgaoRCBiRTuyKibZQ0bAZ+?qt4gCvA
zh#L39M}j}#QUdX5OM}xGit=6PL6FA-L_gRAYw95VxvF(&I4c+T?}6ZhJqqab7V_7+
z#CIavUzYo)nk0Grs^7Qe(nc5<3bba*o5HU(n83@tIR{lJIl=~=S8*$d{H#LqX%z~=
z+(_l*lsxS?)pKT)(nah|DLOQPb(4lOSJZ!5LJv{<wZyL|(=#5?UmSESTjLktRaZP8
zjV!UDW;klDw0UlsYrez)>o1`!fwRy2ZY{?mKw)@Av9C|&%rh;ijE?WRP(^%7+W3Cz
zRlMT2z(WE16w|zyGKZhF5c@~Fd4>uOYX6?5L|F)NIJaSV><ZZ3c<zeFpm&Ea*x7)I
z>3zS%p4QVKVxTX)OO4PTwAmAIFeP-LL5QcUf`j$8>!x-|bl7!K2>ZervW>mNyBPBM
zz4Zo)ixsHer%KEaNnw@9-;vo0`8OPBm;E3qa38x>nD;#Twhb-R2P)n`*<=FM7tpoS
z^68%C#tf+S)bPin!Ict+>1y0@(8&!Nec|ha;0j|{&2~LCxFd-K7bNNuA0v&0PH`W`
z#oWO=G`M7TumM9-gFf+K{E@5_^ZqnMb91xZ=Hxw47jUrjK$i@_Ny`ce(|L!K?*(G%
z|D@NzU|-$yXe;R1S1CC;IcuRr0PO!nA~-o>LCN$&cIo_o+;Y(Ab8>P5n*LaT_^L_X
z12j@Ij@bRb2k#v^O?n|#2Os$v^rZhu(*dNYsu~RX*C-0>4ynLxH>Kr^|9i6mM&#?z
zj^b4{sE||^HH{pUbmaTKfk=Lvho>FfWa|g*UY&>FdVvw>s)L46li}MJvvr<Et)WCC
zbdIyN?x6p3;IgPDK2$sUQmX%~#P$idC-7xqu{eA`(4riq^bKij{KeSQ*q8-$bRi(n
z;U)t2G>3)Z!R49Fsfu>cohnDSS2m{G0-CtXBv$%T3rE@|k;Vk4q>|oniAo}C3q>&G
zQtsZ2$^f`|h7)_KAIAZ%ae{vn%OgWO29(Jm4bbx9^Zz2J1L&+WsE0K<f4b(vz!b9b
z9ns(3jtMTIfIDw7R((Hz=DB>AasGcu?srvaciwj4kq5Ift^69+X_S_hhOh7*I)IA~
zH|Rv++eQNBgJ>@^<=_h<{ppV|;mfF?7gyTFMUYiKN&WNLbI^eRvLv{KNJvbqG!*g>
zxuht7F3v+hdGm3Ow+YT(NH1Zuk;58P32rRyuZ_rhx`Unu$nVAH$M6knxLg14-ril%
z=Br)pgbkX3n?Ub35jC}CAu9=yu;_DgTfb>sAzPL4s(VOfYT#EuS|6HEiaiAqc_8RZ
znArCj0(FFxlp9|MchLTTW)S(8`?x#1#t@6c3vST8BKS2k^B1^x0N+LeoqS3Kq;ZQM
z=DtViDXU)eB9$!#u8I%iM4ZF@LUS6SCpQ)+3J({0=pzl9<IX@MAX7yqXmVP-XrD3~
zirLyZIBfj<G<<WIkLf$<<xvDp8@<LD1bO!9R5;3pPA8huO3pYdZ-?D@yod4dK5xFn
zl5QR?p|A*sBo>N>)_?T^z!3yZl7Mn&{hgRyRS{kfH5vjs_Y&Ql5McUFbScz}qm3CA
zs-0kJksKlyoOyt1uP|g+Qyvm(75Sfbg#x)%W#~=Ey=yHo@bKmzK2}L4wSPgH1BhqZ
z*Hr4M*+>6fxXBC%zncCZBttu5?Ln8>I;_!y28ZZ7L&}f|@VhF|X#wU0tMX%@i4X+(
z>UD@X+VWonOBW+eN~V@VmIYrw{8zP2?48~DjJciOwhy2#96h7#g5XCGS8>2f1##%Q
zrsw>%QY5V<KLI+{Ts~`HeFD$cCI^+H2%Mmt)8tTX8u0ikSm311IMoT&PMMeLM{-~H
zB(lIQUEbFLxCv-1+Vn6dd>T3xz({fRZ;TsF5B-pnI1KGz;@tmf5pgj26qF`*jQ?=y
z`|)o1bpb*xQ^~v+tr?VwbHO??9(-29*1pYI;>w>*1^1D_k5GQa>QzS2=_q6{_Qwmf
zUFE<nf!9wQN%(2x4*4ngh#0MWR8*-X6o9S9GT8LA5gAJ=*(Sj>1{>R@(QKhZ;EL!M
z=zzF>0Z9zEp<T=7PhJ~=eYlon2}{?BAIqa>DCTVR6EmF+<e_%JOCy(<RRs&*T09FN
zAMiNE*xXQIa!u2~jDo)ito4HQ(sucwGCO*kZ}Hgdb9)NFWW!}XZXoRwnrEM<Yc_-O
zP62E*PQL9Gl5x+i2dd}xz3@93V&uFa!d*+9c}kj1#nA=NrqHeCObK6b!JC@POH;|c
zDb@eK_FfT`+)`-Jo-BO_cXbSk#^<Z~ZfLhQ)C;H8)dKOZe%yJ~W-~nph&Q0-?w|u3
zZ=JR9K`4-xS*9|5eTU`q)7P?LI{~eXC8Vj4im5$K*`AVuNey_E@$2G}*NV5uLbT)F
z!ixjmDU2tRXrFC(nfv~8ovyJ6Flv1pvH;YVXo8u&Mpnr#fFHR20J{f!s}v}Lfg5Mw
z@{!ecX5~$pJA&#|S1|Q04QsIbT9l_c`AAtU1}@<Lt=*V`Ud4R+^cM6L-n@B}frX`Y
zwkKWF&>X=FWH-nUcT9F?B0i$5pDCj{z1I6ZxeJ1%idz*dx`QTzr1E`N7x(;P9FFxg
zaYnVu1iIBqFyV{*h4#cJAw`voV}$Qmmp0%A7@*?6U$iUQrkX)XyEPYZKFIQ_{bdso
z?9&bu5r-;9*5tezS*22M5-T`1X7ErH&}EixfNF|df4J{QFMN-kmnN|{90eKeM|s#e
zUV_d}n@F8Tu$y`+%0^OHF)?H3vQq=~mU%ju{R6Be!(>_wx}WA<`6?RAF$Etzt$MvK
zzSdt$2Jas@0y)Ki^geFh55ND(aR@Zm-nQ&R`(^fUbYl2`Qk`3y=OK8@&Eq7cy3Ppm
z02H9c-5=0H^PNKc(cOm77pgNLHA9UuQ@z8h_ahXAKiu?Z`5^hzHt~*Y;vdyct6dNC
zh%C?PYVOwjiCmv68)5gaHrwM5pLZY2p4DLf$IYzTMjrG##RV2GS@y>^@Ns?XO|I4s
zMF7E5A%rJkT888>h66~c727OoH#YA5aE3ixV%C`d-o-DM(rp9FIDh+PwCqv_M19d3
zq5N-Bri1kh)}w)+rK)SHP`+)7iuw{?_(FV#Gq8J4r6zj+q=~-viR@6)it*xrcAkVo
zXWm5Zr_bM%KhA{I58~VspY_3DbL5i1eRpdn3Fj+Lw@XsfuB@}fi?4zvI$UisK)RFr
z{qNX1B0g*m^CBKJtA)Q|^dk#*bqs8Op_S8Z3WRmJV+VTBRn(X@xk#A{Y3_+@mTaT^
zE|7?*T8~`ZI6vd6&cEe;v8S`byOOmC>F-|L|Bf)9g3FSHX>#9ZVz2lgEo}i$E{Pwi
zvwSy{J(0)~Vn<elm|06IZr8}n=3GutK?n$9^uKk{sr3Q&yfriIXTrELe?m=0JEajb
z=QHDy+!<%}>ffPL&Ie)XNlFbRs6A;e``n|gvF((bn#>@+ml`&YRgz#S>pW9J+SLV_
z^$ulC$lOXmGjy*pY`G|FNUc{g9<b*6v#h)-Dz5hB>KI5@OV!ij5V%3pf`yO&gE>;v
za69vU-YzJ7jAZbMR&7Lb966RbJKZ8^f4%(>v!l4|vC8wy0ert@)4*FsI*rfn%s#1Z
z+J9!CLF!5M{Y#fhx>K=G6<v_-K@2GEX~#|Z)`F&1claLgp-r%CwmEqb^AhXJSoU}F
z=qt_Gii7A@OQ#M&CZv2lyH2UDv7~n%uDhIRPNG{qUDkM^gW}?F8rc^+*Nxoj-9*(d
z)UAW>&~BSn22!OqH6iLPo#>^U7iZ+`m$!eeRr_%f_^tUm=lYPUI+d)c{^q1d%^pgz
zu*tPMz)4eTKv`|JHDwZ70abhOvrs5R-FQJOJKS_19c@t5_v^<yB6@lPJv}{^Tel2A
zQ-O=wZDHJ4{L#;jn~?fRP4P<B&t@oF_{nn)x3wwd*+L&7Q)j1K?U9+$Bpd5jnR7pS
z;G@qC`Xzt-_yOM}19u3*bQ>&2IKY)?O8Cl<KPqN_zLwCJFJBl$L~0_m*61umv0DXu
zc&C7^1K!XLYBUZGrRZ?+*;*ysQ9v2+mJd@w<;tH`aY3lrl(00K>plu%k<=W;m|;?L
z)~UUhfR+K$*&XhR(?jQnS5=GO=0mmPUSc+a)?9GA7`_aA*W&_Ef_LD$havdcGG;T?
z_XY<C!B5l(06zyq-TA*;Q=+(6{?KXFT%kPpIT2VPloVqn<Saj_Mf%Jsz65*QDrU<z
z?2hj%j_0BhRHG4y7&(-?{7}J!Cmro?tLEl#wp%)JA5|d}GG&NMth`{j@Mv*Mh<`U^
zR_RVeAc$vMEuc9^rK`IezUHyNJ*W4*&iCqayvh;OsgposFKAmWl>pap%FS}R?J`aD
zP0#IaLOToPL+>}=2c?CJrT`3~;%i5~B{<Y0(bm?jF)5uR{Mu%fzvAuvy+g}NxAm(E
z*R6#YF-LS)TQ*yat{v!7SPbNkc%ML|NCXkV+fk2iBcoCyhw1?`U-y+uv&x2<t?Ota
z?|D3X@vWRJN4l1goF_Dti6M=G8^zxa*PrlvmCbh%$4hXQ5|UkiCr_MhZSRS?r)+4?
zCAhoJZJ*n0BVy6}3CN2chNVhqG;VL^U_zV|>Xs)IO__=BC9#-nLV@Cbx^M0#N(a&p
z`$zJdH`D6tXU|S@atoS!gyzEFA8iBf7i*PU8PGX`du3)eHiwtAfB>0oMn2AyPbRye
z-gMbTA(`t88Ienu7qo2_10hNu5yR}N^Jzx!GAZB>%E!>9$_EE^%(x5>Te<H_kS0y$
zu)NJMoB%wrGBOri(FH9veq~&W8{!MC8ApfumthN)Y4=5}Hv6euQZ;MH!>x7P_D>yB
zUo8+JMT@*JIWSBYdu1_0hV9xMmeCDaqtxm6myeLLJBiK+%N{htA3bZ4xa##)(dhm3
z<VArJ8Ea#<Tg~06v~8u)Cw90@zvaaUNHA6$wy(}lQO8}sqc1*l-07?zI-b*NXAo{$
z&{25GfJP+Ing@XfZyuT(A<dJR5I#P>xaz|aWi_?W)zwsVk?n<xXm6+V#>0m4b&j2`
z%xT;<){4p=$?}Tr8ANvPw|{tMuUr3px)-xX;a7fg`|fD(V&&doQ@U9O`N!BNyoC2f
zk<DSOP0ePRH%l}cXDKABpi+I4y>@iY`0=s4#*t<VLunl&5#7rTUTD5W3?K5tKa)@R
z^x>Ew_XvV!$9?dioypP9mDW;k<h8i&#WIAlGnh8a0tS_xm)8m|{oU*y%hw74UHYfa
z8#y@~!^6X?)$bm4{uxdADRrlyL^4CXMw`LO>EuSzrp_}2?X!v2QmCt-*@2Sc-I+A%
zTNc9b163|R2LtmS6tg<Ku=DB1+#8ZNUzb}z8>@%gK`+-Q?*Ctvy=7QbYy9`SW?&E;
zlx~m~l$Ms35JZWM(%njT!_c9C0um}AsGxK=46U@1(%s$7xo7X^`JZ#0^Sn7Pxn@{v
z&AsmU<@Y02>oza+3-Oe&Soyggq^GA>91!53=5J@m1?u|)LHQP_+A{{uXcg!x<g=d;
z(RcpEifJfZTU&EHp7aPW9x8r4#<ZZ``ug;C=POwq7VX}=8q}gDOB&iMmO-Iy=DtCa
zdHDlwEVF06-`AnN33|<=g1;$p-GdAQ8n_?aZdp$L2x4fqEY)PtizzJ~w@SGsG&vuK
zOlB}m${|Mq6^b@cS683D-1G4Ulq}&RgejTQv#&-(M@MT{IWnqz5(xxl{CS`T4|MU~
z)QX?K85^jrhWx5X`AQCpf(_YrQI12gcW{2bGuN)0m-iur@$xL$&h#gtJkhk@mIm6g
zrPJpwxc_(kg@$w2>K>i%Wt=FVH9cO-S=|fUPPGaCMe&096B5~+GKks&wjWtU_qca1
zTv=$ILMs}*5&1RB%~BuTLpNEq79f-{_4ijj0n#eG5W+sluXujrE#_yq!&E`*dnQQZ
za~aeO*7isoPXvPku%<<4Ot)jzMoHI&q=TEAU}tA1um6Q-V=URN9o-*;-b!0s1Jv=%
z#jPQ~pHgrxz*|>-r1@X#iHJ>ln&vw_YjSk1DMyOM3YjgxE46;LO+*kBm?La})BX|r
zPwhtZW=H>@)pmNV{b~Be=Z?eQvam!-P@-*%JVw_y^SLW4C*IQXYnEFW4I<<&U=pVP
zAPp!SvHc(81>xeWb3rZgU)mrXnkgA+1EIE2c+Leao>}w@ilUA$NJ<pb`2*s2Mq%c+
z-mhn&$b*jf{fE-O&va`Ocp0FT@HWyl`6J2LQhiJNcF?3KZ*sKa$v?!{M5KxkaoEmn
zZAo&lPzEWr{mibKrAD>$vDhhY*O^&9F*OgkTu5RxNd3*fSwe}m;&4kzp1kOn&`*QQ
zu+R+A{MX)n!*6n4S@Ts~?MB&KW$U?Ar|EqxLE$mEe=hW(-QNhG_O{YcW5#9j-yKt#
zXtWi3SbK^?X+%+Et0q(g{b*~AVYaI7{na%rYvnj%RGC>&oa#ewsG9k_Pt9~7P$BQ~
z+Z`>*Lq2Zr4L@5kUj>&&HEl><y)}r;$mAE>jc1$tJ2XaS#kTbM+7g?T(l%RK0bHoO
zhp0^Uxp$QfO{MhyT@zXb0dMrt3J=`^WRY8E>*T)U{hbqGT2R5%i&&3_hbC32n$9=+
zeKx-mG`waT*|N}u4xb#$$z~GE5DrJIvAlI8dg%t`@3oHkL8xt2%7}NNDN%0p0X{bi
zK@0ktrzE$ryyds;Blj2N*}nR94QCn}hdpnqR<9+opii&dW_x}pb#LZrL(}7K&3Z}C
z=CTWI_pbu>ul>S;U;7ravhtn4clLv=c4B$FE%T~v_6Q67+-XH$H#!t(aK>+is;A2B
zr~FvfN0W1+oL^@CgeTIikB4b;PP*bkydGn`J@dGCIZ+DT{!Yl4)Xo0MFs<j4mz8$y
zG%)K;*DMG6Y53s{l|#exbjgX>^K2WVM#DK|8$>L(@rYa+*{X}5QQ9&hIJor3onrO#
zotDMmnnVkE$%>F4VY9RCLX(G@rqEYxJn}qcWGJ%gdwth|q;A2Z)m<vGpkbdFs$Y1X
zYd&P1^G_1n7}}tU&NIHggMh0eS?1jv*i%2+pUkO^{56l%Tm4X8cE<un0&WU_i&D;V
z4Sz2jaJa)X;;S1R`v=Y*!UFGCv}Q-B7{~l#A|;p3x3US*B+qAAk_TCHLr$ag1Z;gN
zOVrYle=Ult{9<9#qHX(E<#FZk9fH8&=iI%7w}#7=`Z84G3S}cWQP@PaZy^?e`3h}^
ztD5p%*EngWHn_Dbwfe4<(Z@H$lz-y9L(}v*@2;Ci;oLkeUCg)1VIBVJ&e64SVxOHP
zZ&Uf#_w0S5#qXO@l+yA_l$&;>c5k_EYf2?dpG)fv)riQdmziG5MNFM%CRINrl-rQu
zLxn7%(KnxaAeo5i@SSo<uyV+(zNGzPByGoG{><pC4UKHup%aKsr}`8!86`wv*}D2U
z-iOK)jwXeOHzV+qvx+QFzi}e*>)<B~U)Y>BnHd8r{*?OI4%rV1d&~tiN-4M;k>#&f
z2b|un^FL;da=RXL$DdBKZQGa1liR9xU)7N<fAHk1WXpp(H&-v-<*z(N)kpX^OW)?&
zi`%LK&KxI&Dm|)qgIUw(2QH(N#q$b1s&%0>d$-SP+eULUt5ZcN`xoc*Uh6a>Vl}%9
z$M=(u*Q^4@!`_8bRp}429gH3CPHCL%Ig;FI{0{%>Hvl_mUBBErECln<EBOHvec=yN
z-ovWh)3lS}-LInBzULy{mmNtbt^rwn?z-e?se-B3IuxWZh?uT{fL)-_0$U~o%X_g@
zIuFdE66n<%1ax3v?rmWVcKMqgrr;GSm_GBr#aS$}%xbZb{Vs>IN4Z5~{b=s>7{8}u
z+sM>n9Up4nXgsoyAgzi2VM-q7LZ2qF^lM?-Rz0!UtfdeQ7kVy^sfw0oqdq0_PWg0`
zcWetJve>QjJp#TaMcMS?-ImtG{oq7kH1_;;Pq~E7;NxD`nZo5bYKXl=dSPoaz+PgG
zOgi1yN7sc0@_aQZ3I07;gR8#$Xv~5yIFsFU^_#4A@g?QaV5N1%_I_}*ww?=dpKlet
zlzxeh3ynG{*|DptAF_h>RrAG&yLZy$`I@_}#Kgj;9zp$Xd7smpQRf`IX`_qkZzouz
zRAc{qD^5i4GKbn?V-w}Rh03J!SOcwVP*h7*_QF0Otjt#eb$0kxGBlG1u}d?nW<5YA
z6J!+?%Z1bBb4QCP$IA~ttm*F9|HXxx3^G1>3ROhd*lzx*3)tIZ@`kHLvH2{WQ+Vgs
zPd!Avxr`CI7k^)y+j6mPU~E|nSQi3u2Grx;@s<l639scu_b4)B>^GMO7KsGLxm5c_
zf7^Z%Z<h5D7W3+|P9!gfd~LQ^p5-#g5EjGyYVkH%2o*lXO)+FvgiyU7!8Jc(<w<iH
zK$NIi!fxEf>E9&6BHp%QRp??RVYQF_`F;UCw)9jl>8QNZY{hjel;w=po#xEy>(R!2
z9f+#$I`97L6gST-QD2`5bwjNn1^OhUgjc-w-$6WDrm02pnc=GJJ-?n_!)7M7XgVh2
zgN-3m9<d?)Dwqu6Zv>un^K<+RQHr7~e@SvQVJqZ~tu9O?mqLgRcq&nYU}S*zM+ttD
zWrPp~)!v7#$?8iyvEbB<MZ7JaUe)}%V;X6fb{a6mq`c0$47@t%$bQ=F@<2jZRVk_0
zKK2_rew!wTB7-z_ZcahIW{8JU0EsKpDs(^=E0cWKOB$NubDe~p;Y($s9gh?}^pYET
zP`^hV#PH*Z2X?~+&bs7E&k=*FIUjYx*LfNp2l1dB(e?0$IR6L~sqE=OizAbFU1-4W
zQS(BlX==kT=G5`ySgqjpy+SAfc}EG~8^JJNGgP2e`8cF&bNA(qK3l<89G?;qYQ(4@
zhWiNPk6p_RxIsVP-`iRv7cfFC9LsL|Sm4xuUpuh8fKV(ioGE(Xj|huz;h`z#KhF11
zmO647PH5+9Uv*lAV@xVvzcMPD#_x~Y97eoH+^P|@4=2NlV+&f+B~QqEd+jIMajlbZ
zeq7$-s@m~b-wB|n-zji$^Wok6b%@!;peKAgE48(|m3;(Q&{dT;LD=P*MBCFFf!vKp
z;X^*WSB=vnZ}O=xKbA-J?ZLDPWUmfceeNbOTaBrot~|Ur>oM?L!n#~|V;>t~9vu>U
zp16tLB2lT^A98wr;m(D_qaC`QcA0Q5(~}eR_@npO;b3(uzL$(c<zuy~CpIb~eBcKp
zNS<)t6+e=s{*<~rW&_&MjJ~izWIEH!y}R4XmQPJK*~*nNyO@nz3pLGt%I1>t`MO!X
z$v&x_t$ubhFy9Wg(M&TQn>kl$Q>>mS{m<oZSl!-VleUWYU<_cX_oi9=%bh8>bq+rF
zIj>%%Z%O5Qa+{GC9R5vA+H~8vM;fh<zFqswBs7~E96jTh^~+)flR9YU21cRwiAuET
zX+Ztytxu`;7juWHC#xLe{Y)e{+?A+~k&U*$NA}{?_Vk9+o#c`R%W=|}JAD9q@cHqP
z=g8)Fho;Z%?sY!gxoTYGn2(^D>cbnpIAAYekg*K>siG|4@>f#4@d~0IuTi`F?LTd!
zb}Pg=H&UOWu7ICx%4MA(<9j^~Pp(`bxjn%+;sRqVZRL*F_sy@H^^Ee6kU3Y<#nPO9
zFF)@eoQk%RoY*c6sJxG}Gwb4qSRs<^%a@udjr|nEP0)hiyf*5CkDg!eE~#vL9Uj?~
zzCCHqY+G#C`J2ZD_IURs8UoJgNb)2-zEpZBefznFkMNbxkbTR(f8BP%lCu)@gy$)-
zE`KpLoSCAi#a<&q@&h7-!IXv=MIN!hX`GdAo9|T7#pQTCstj-bnTkzGVURdaBxmt7
zBi*Is!%HkxobS~9169>j+{RFU%~LC@LdAC-JFHIH(9qm)mgDAH2lRwoaJY1n96L;n
z-<=+7qv{`ZQY>Puj)Bm{W6j*Y+;xb$Dc-mF5iv@<j|@6HQfmG@ZIqS6`VuGWL^XU;
zg<Lu)f>{x2r_?kYGq5KpttzE1gajh0iLqURYxG#5{HG@05UO_zeF>NCoD?1e_?6km
zqTfspyh_*5)cmS(7xHR1D|aM+o~he+YjkZ*l&9;T1)?%D()=paqN`U33K#89^($;#
zfJz(csc1=eS6<$3(XmnOq^{8YAYT|l0_6n2f5H|2Td2Vn7QeH)xES8?FBW#$zy>L-
zxrMuQzeIa8Y#v3x?%nQ**UKz}_EO_L`f<ENC~P=~hzXrJUetCYI+=xh<Lnk(X1%cu
zi2B#!I8kGL!<$5cW`c{${cbS+t-!wx%}meCxb+10oVj(~C}sI59Jfz>>~XidLP(k3
zp-)S`wU5HBH6pyY0x|ZDG@Q4+Xs&#94|gIs%AAY8v&OYrVu=%V$7r2c^%pU-XGkcM
zSVHNq@3f{ZKE8o=vww6aI7|Qe^;&9wFlxX9_PXY{?C=vZ*#hRuuaADk!HgX4QnIRK
zV^ES7*U8=U$+b{)@sTqRvpzzGzeF1M9f9feJmw|B%M?14B*7-qdkekc)3z6q(4T}?
z>M7l4HYLScaYlaQ^bA_jQwwJ6?LVn%AFtv9!>X@W`PiJCclcq9IG*$!EJ{LTh9Q22
zvJ~blDlbJ*Eo-z8qYAdr+K?BgrUJQiECW+lA2y3&9k;OxGNXk9@+*vgEVri(Eyn(g
z5)NdDSP8^7#@=5M6FixX>ybR{s*!-(8(dT~VQDpYD*Gfoo^d~(INa0Bn;th_fj6ua
z69_2((%<i9`!Jqs24u&h*reLW#bRB`m3#NseUt}wTA?SJxtd8dAY`=sumkKmPL&;-
zCP>=+Gmn4pjSnB6JI(KZ^scnPML(W8!zneofZvgMt~~?d63_W4wk8X0TLLc3#7a|d
zsFu(Exc-Z3i5_EpQ6rc}1lf*y+*D2D5X;iT{jh7ZAD>h?w=3vVuJr-+xJXe4t(x~Y
zSEnIckMQj~T54F;=?oQS(;W77iL>!pW#y8Yw}`gtH5_(zCtf>$j9CeGijXeDa|`1{
z+q|~+%k14>R|8Rt^jj~{RySzK1W#D<>dHNQuzO@e+dd^#Xjk<5xlDYoo?yn6ly_6D
ztZ^)iF*ex^AF6Lo?kHi2ECMdA6VUP@a<3N@oDBNO>6$kr`O@B?oZU1s_v^>ov-5u0
zMVpTCOs@|E7sbfNMa3qqc?n@YsueS(!O<Vq#UB=INTqU!wTO{Ke3Cfis<E3}w9*Kp
zwr8Fkw-W5cf=S?UeOGGV8kA<>X=`oau>u-S31k-c6OWZt&~F=7{VUmI$gmX8V*lH4
zaNVeqZ@=IwQ)&OQk1B=+KUAZcBW_j_$67y#VtepfujAPoLC{aBY=<I!L;G0cPQ*kf
zA&vqc%3emT`@keIu=;z=daZ;lBL`#th_l9?cEbIrQv6C2Cx7+3hL^wOqbypGXY7)d
zj)SA~n>?S*I|>V12Z>H0lnuw3*+B4sTItSx-`#$)(K2Id=YjI&%^qddW|ZybvbTv+
z#a!v6y@t3E$1DrBj2r<Alm*pSaSLktm-=8EJ80p`?S;M<!ZsTvlY7VE1)p)r(y_r3
z6MdBXk`wk*csl>kYNRsOpNcHj_)`j(8y#112XoP7af9fF5UEx6y+n)b`=RfiuPG1&
zoS85z`3^IQJ@cZwEY*0In1H(ICN=-Y@}SL4Q<q7D_IBrsDUR?S@`NPbAO<E9Vw5^+
zZ0uWh9}vyLR)1~|yD?VN5&RfG2B9`cWb`VC6@q1SW!aSvfuK?k9QQ;kS)@nWw^|~o
z7_svSDu0>4vRn-5<~!Xuz@^MG$CX58EZ$`ZWu_6PT$$7xepsGUrfV1mmRkt6F=@t~
z!NWXW%{g~qCK1L2{%k1!gL7@mGj^a2ofc%P5(Z!EWa@T(7$5s?_w|qW)|$oDU@`xF
z*14q;=qPW*n_$*UbhjhOvXaJXup}?nImnA3SUyOFPPGDhe&o@4#u{-F^zU8X9^~;V
zNX_XcQpM(wGAWe$Vq-~gq)PGW1hG5oL83M$dd&%t01;Ns782-j2aknGgk~IWe=XBp
zf456{tj+&zXQpTz{}}wRGc%Ylzr1md@w3&$)E}g%ghOS*UHOGeT_VX7TxsNh-6~@a
zr&!>Jupa!iYSiM1**ntKG>5r&m;bvK;Ln}eFfy(0tSWjBCqHCrtykOhWSG7ekVR%>
zh<X%z(~yh9gFOuOd*)pZF3H2(Uc^Z870o)s)<09w|16~g);Jy>nq69gVU=QBFu*sK
z;eH%ZGZEHGB5PnyzKFOc4PH3|@iT2G7_aNc*yne@C#M#Flh*5!OS>8E^ubpek=lO(
zG!4*KU|U=&xK)ARea>p$jV5+axfdBVjl2VU_TMRRQ>}0MK85Ih;<j3zp5B1obN8mG
z!YXW;9z3=*-O9dmQCdD!z_jnE&EXSGoj^@rFVf1!A}XotWtB3IL;RU(T8DOnfXOJ<
zbXsiaVspRcq-9Nlnrh%eiJ{3Hk>%*>lT=ZD2pe)ic&D9bZ4b_*>%|y@n9b6q%Z=M<
zuQY7uW1*))jz)LvZ99&bpLHL0RSvG|X3M_AHVx_GnqlK{7@Ay?bID#5`oa?O7+MQe
zDhhE@c#_HF{uUDyFbc8!(?yAiG7k+6?eIPj!-`HrCbM!vGxSEZP0?nr`3eL^22Y!3
z;ZqvZb3M(^o)lq~c(=Va=TGbA<Y_%9fnQj($ka8SvrJ9<5xd+?oYUg)fI-|e!>UgD
zGK1;YLjU~!@XmZ7a6n{rqnfXJRY_!EIF#U6fW3l`|GI?w&hh$>!oU1qFZPU#vW#$Q
z6YyiDks*j87U5fNR^FEUr(2^i_U<Q5dL>jA!$Uhgb)WBY2>#Yn{2*WT;H8f0<?t?R
zck>jM^FKav)76Dycwmp_`3biURHFzI(zwy@VYDA4Z3pvR?U>)as_QCVcs#!|_sL1W
zgYr+`L^pO&9{!s&+_z)HcS}FHb&mOb%U3S0XIP`?jk&upCtIWP@)bXx)Iqw@urS``
z_n9(8`)NrE*FxV_qviY0XBEZWp!S8Vg*2TNwtqCCy!+?VuJ+ZyYp1TNz?1oB6f^Wv
z)yX={E}k;<7wc9@yluIYICUP==6QZI55#@OL%WY4iQ}Cc6z^B;TTFl7%G$jkXg@Q5
zP}mSq@^v1Drc1hg<WxWHcdAyWBZ!ApqCu7!iw%+*6qwzw9MP30(`8GZFK%eJSJaUF
zkcobI@wf@Rj;Ay_sLwca_H+H>PsN07U;#8fp&tzMk`v4sV&&r}no)(?<nr%UEu)3-
zt-Sd7)+5QA$Wg$ykPE-X-+8kxeR1o0;y_N{MTThKQ!C9VgHZX;Zs{LlJ~wkoZtA!j
zwUc29ayPiCK7`{dL?*vvMyiRjQ$*s!1bBsSEmK%zGVmit-dI-NSRqo!dVxsvqDqbr
zOUZie+gZ9`^@VaP_i(f$X$fsod1`KCBw0rfvMWC095NQhxJ(TUFiOOU84Nq&7nb<G
z9cA^?=#Zcn%0cFn16?fdu@$nrpG|J9T^eziC~*8{qULk{d?xlWYWTp;@^`-auVgy8
zDWe5)=`8!S88vdj2lwaW+u2>1-K>UvN1aW~6D+p-BoelQ$H@Bj-TwIOtx*R$3%dmw
zoCgFkn9^hR@rQV=w*LhG-HbMcqH$ybmI;Rj(&Lif_r-D+eq3fwL`+vFWRVAtuWj4j
zziORUQtW%2`(5|!N5`X~+VYFVW;N&s_Um`jmKG~d;IXWzZda>t`ZgBU-JBTag;UAZ
zNsjWJM5Jyg;w|PCl3`vUe`rQIub7l=c6w;Wr69{{j~9nd@<VgG=eW|9iHIUAKL$?y
z*;e+JksP_#M1hVlWxr>+th(?XlhS)3NzHI;#{%qdgU<rdoPBxJesO41R>$OfvCYoR
z`wTh^L*!zTNCuii3sFi<sVErBf1CTv{IGy%|HpFZ%!Y4YI8l)&CT|2j%5k<(J~^nw
zlZor<dN|RTH#qa`ko@MG&i#{k7(TWFw8l#JQYYC~sDgnn%IawmhXGn4vE;cz3XRfC
zah`|eS*aA$lN$HK*HMklPyGyT{_b$R0nF2LV4lJ+%AroH=9#}Q?(kc+eO)Q!P*Uyi
ztOG%HWR)<9CRd{TK|5smO|B9W@Fur3lO4kT^XSJa90b3{-jhr7`*qm<BGbgraD6g-
z_|Yrfq>X5AlS7ATh+Nu++xTTljnYkuqK$wjaK`_2bXELF2KVEV71v|};qI$m_Kf9{
z;Wx02!AKaXPktzsx;;#wdD<D&5Vo`jC$cKjVX<$EDM$>NRHj4;TU4W1fBz~$r4*x2
z&E(B=pi}O5jgWBe0kRr|?f;#d$tg%}WHmB5o4o`KKV#uwtcWL6KM?t{P%H_N7XjO_
zD;#@vzYpH~G;*_M*^;Z}6?_&x9Gr@<9hqvMaVFk$*BBO^XCb@>X9{UhVsD}UZIJW&
zWB<y8?OJ8vDFZf9BWBPBUE)8e!dE9HNeAjq>9BHK{On~y#+lheY?ZBmK8IaBsX0*l
zfY%m>O)D|Leu6u6Ao4)Qrxp6(1}o)630=;o{Fc9puwL~slZVzmH*D`wC?+D_mVt>U
ziJ5rQf9_gxq3X{+rjoS)^O`VTT0Ce*V?;F#k=G!po`~o!f!c0r#z`+=`4yFZ@mhmA
zuZ4jDBmCn{7WyarCm5qh<%5o>yLg7>G$|a&Q8_Bg>QDo2@5M*&@&dt|Z>>;bQCK;O
zu-<l_2X^aD(-uysd|$+|M|$g7ef@r0%?0Ofvn=n8Sde2hyPVR9vfv4D2R?KO*JIN4
zhKFNs3S)$@pg&YfP^0f92j}@}8_fRYO3L+_%D_3x=%%5~t9~<xid<Tg>*4$Z-lek?
zb$o^{Z>y!y_Vfq5aS<Ao!MV3H+9&t!2Id?ku_5_v;Qu@VVJUHQSef+y<gQNMekvSi
zp6R2fZ~fq)@_{%2+G%drr4<=ky6h&wvTg<F8rP9?G0zFNRpX@^p4+KS!!6GeW5gKP
z-g|YI3$k(1@y8XJ=3gF`TR^Ci%%=x^!wpiJIUn59`W{plqzIK*{XmP|eg7a{`YwJi
z0(wqr^)ijQ$(&g6W!e#`-JeyR%i%ZQ%ntePe5sCb$fGs(o(5*{IcKfgqe#n-IKQ@n
zC)U=B#&;%>KD`ElL&v8?7>^Ip4}bCEMb3{OK?AFDnu$j<q}FR6#coyJ>wYRzKj10G
zZyg!69ZeHaf0*yNJD}U+r;W7~%l6*1r@)(5xGGVMjiBA59UmRPe&n6+XCa@GBRVTE
z<SzJ9>Gnk*7n!GVb>Y{%@Ab%(@9jq6q9n@2t^<tQ%SlUb9?MwXEbw0<DG=gJ79|dt
zawN1GtEn2l1yS9@i}DV?DluMC?52DX@uyBeUpp?k$!`E-I?p2finh&j`;O9is^Ann
zXQ=g2jD4`x6=u5Ub-8mgRzF%)xJ=cV@6&hf$ayqWndvKiS9ANvF}KHPAYEevakgG9
zZ<!H2gf2-p48qFR<y%*!A=@neYx_+fEgVY|@uy++<Z7;utc!KF<4>G8k3c}p;iH2t
z#rBXt`kYq_8B0RQM*f?P{`)jNZn1_&F$XSuKQtaMm`!BRYVbUa4?k@2Iew-vE%{62
z&tMEoI9=A66&&(+Rl27@YRMq@JFxD++&r7r>o{JICZ2fX-y<Dl+SR9|1R8BXu%4g<
zQovIvm4?v$7YBr8UpiP{NHWA%r!B$@XcK~oLaD+!D#vXp4L*em=yYourn96<oZS00
zQ~}nuk=NeOlRx@xHDxR}I?qbCLqEJpkbCodGPt>$wB@oLzk5mY2El2{FUkpFI-!rZ
zz=L_&EJT~)Uxe!1`}u6y2M5b#z_{{QT-IyXqS}o|v?=g+n@-VpQ)p9&req+_(a@iM
zyJQet`^G^<u0y&xsU_7kzu@iKTNDJp_J83BNA1oNp=8g#&d)J=)pR_usLFtSebH-K
zI(xx%CCQ5M*v>3i%xslzciu4f%FhD-ND_Y37RoBhuC61q2<QE6@;I|41{qcQQdcA>
z0!tALgm{5oz!IZ{$ya>*F0SRG-Rg}mTni&v!k!-j-Td9YmD84g?4~bsSAEJWX_--A
z6f~p-e}^0QCE1|4s#`IL{#cmw^DE%sY2s0do(+_TK6bo^V2G3qieUTq;^b)K3xB;x
z4zoq#(alfXV&DB!Z0Dsdc1mz`>+f~LMHOG^XnH;gFCwPQYrd~d8=;HZGaD5pMlN*q
z=%oem=z=h*er1o`H~X9e<G77Q=20bS;7F1mM8^^zU`NUEa-srT?Mt0**~UFhFj9T;
z$QYZ@hIfQy)LCY}@3~Rl{yu(PU?mi$0NsCeQOt>Qw-a{y$?MrQOM<$ZtaN;Dpk6nD
zU45HHhVUK2Z$1nA4$}C`)1}^5ullrGX5mM@#{IRNYUI+xA|TfiZ;q8EWg!WLA(LfY
zS=n%oay!>JlwgD~XU3-cyG$J$uIA1X<hb?L2d~=pe|k0ec+Zl~-mlG!fw6#mafX9$
zLIy?F-Pwq@ly@{gzxvqIE|~nvj+mW-UD<f29USej!G}_@dZPWo{K{_-k~nI$t*>dl
z@H2E6Nu%!cF+cID_{}fy<CtJbIK>pgWAbN&QL=_~!L2Qv`2d;eMj`TKLSaw&b^<bi
zmG>adb|LsnW5L6G7ScOT6xOhxYaD^N%*K;E!>r(}#i7m;(MoG&#h4o9B1aDYNck_A
zredzzfO$+b85Sku&^e+#Vy$Q#9go9g$0()p1f`#G1s*dMJF{3Uc3kitdYCG)|E3(h
zDW3Fjy_SlM<(59o2}aWnQPa0fm)x7ZIn7GJE7Qt@c~w;<s9+O`V&f=d9mr0o0j4*m
zzzty$eZ(ocv2QPY{wSVQ>6U*Din$%0<5wsBDa;Ng5CePDdhe%agO)x>?z_Ue=JoF3
zg)q$BJZpc9C+-II^hS3Ezki6zqm%}PF?%i6ZBshGZ{{`WkHFbE-kX@i1pdH)a9j)}
z8Y1Y;J12~Qqxb$W>Fc2Hu0vy_*XPrQS$BHp;Y(4lK~as-HE8VgTnIRx2;g`!+JaQF
zr|S}j5Wh$61U$#WL`n65RJ=)%85GT_tyzkaCC8$4q$tX}%fNxxN`N#r)<>VMdyTce
zz#WsLL#r`L@nAC2zB1D%>@t1w_ht)y9;g-RqoHh@b?}NtIk{%vTYeV6x5-$HhLzEn
zKcKm_pgYpUzZ1rZ!wWrXwWX83(P7It8?rpSuLB+Ses+e1H!&&lVVu5xVaX>m#gM|2
z3sMd|Inq-cp_e4ptm<74-ly44fw^$^Ez5gokY*#=6*bI%GaT`D6@I%2@z&}tp#?E+
zz%>LX;jq%b!OP2S%zsY9`VBi+y6rEho1nv77grD87>bvM8ep};$SRMN;c}f4s3cC>
z71$hNe{Ii#(2AVa`AaEC5myUiI3bqxFnmRo4h`XIG8ebG+%6dZ=91<J;sJ6`7`=iR
z9?4E1(SRs(10&H6paDhZzv3K)!<DK`q5M%sZ6yXKxo5C1_7ws%)YT}iHc0nAc7B=l
z>ZDZ`<wH=#O-qhue*%Ap)J=Pc5@+K3v}5gX=N;uL_l)DGc!r1_6MyVz#tD02AhR0|
zl-+TG93@7$)4pmJ2cQ+gYF|q348q<d2z%DE5GEoASxJmPfIK`U+I*i}x&o=qgVY91
zDTH3+`smpRn0aC#9?(H$Ym#Lo_IiZ<NR#DjxLSy&oZ<<$455>p{`Cf?;1ftMlVTL!
zLGO#c-{FvUspYe(k6$Cxa^i5K5dA>}+QLX}vlP`^WiIrHP(*)aE)Ga+3gh(Sd~A5%
z?Wl=2ZGpKJUX0@)45TJGEW(4HWpJ9+xIw?p6bR)aB45F(01iptXR~|-=!Dt|(E#$(
zYH5#4pF}uDB$bsF0a|}By5)82G4xU~Ff1HTDGCaTdC&#I_|k_rB0;9Lg?d%a5dnKb
zen*}-LP51W5s_E_Nf<{hB!{CcdO`UMP%aE9=ZIv5s5*{1Hff_C@CMc#dh=+!TRFRn
z%Txg#VRU#nEGp`zIN&UOSO<)S7z`T>Fc)|$fL80Tcs4Ttfy&9v#mcx0rtNX!jmaPl
z5{mUFj&PtEik0!k-dhAw7#k&qH<Z%74|wm$43a73;ahK?mYYH?Z?TCd9`M$8s8yl7
zI8k@S^rf*gWOxV?x^%WVKfBwIfNk9ke+E=~^0_QMT{_E7^S%kD4;w~>8v0kxUeIQE
z1|U2-Kp`rxF&YYO+Xe)is+G*w$sI1$`i|B5Q++Dg{E(JOciI_w!6p+c&f8S%<`k}b
zbR8LluFjdEl+p@VAo9|35wJC=pOit2^49IH$|>4@P(Uet`<-=|u_cUSvY}n@Y9Z+1
z9xeLJMQ=aEk`O!>le!&jFe^6~32LcYfEqYymsNLiAP|~^L3f`fqX2|s7Q6-PmoJ-u
z3a#VN3oX9bYM|R@ZeihdjdPopa?pmi#iRY;v7vX#u}hdjbX_;;zV~%!34GwZ+jv7G
z?qis8_~D`dR?_c*DSG|qnWHMi*aHenSdjBTyg<G^YxT=-<KNigVud|kaVN1En`hsB
zS@Ynv%RexvQbo=1wfDfMGM03|{WF%?wcnwT_g3YZDC<Mpm2tMeR#N;=X+LD2eCjMp
zi{b1(xC;2Y;}4<M@1MD(v;8*F(Eh!Dc3|jWU$b3)yfJXjpOEVm;60uQgQ_Yrf7aR#
zBP8r-5~ss!qhE2e_YNDRcT^oqrHF@|)8318*H`=0rl0;P=i*+2r=m6S@B23vLl(rr
zaNWTS*F0d-?Rbi_v#UkD19ESJBO_Q4Xm{8F6WA0`7YMOo*Qe>%k#%+A`7JProKqVG
zbl|+C#8P`B2m>+ppuUrT!$|WV9%d@!^miF$*<91w>nc;qe|mL#K<+Tf<5Xih7Y!_%
z9Er&&qR<r2#wC^wAqFunylYKE>l%q`|5LRKr^7qj%dc^k=NdS|OpNWRqg?)Z_h|;b
zGRMgxx-sg!f^V5XX6&s4J4&@}LbUyz3MvDJZf1b{{q|d(O-%!KQ7+<6E{KAB_D4F`
zPhA0rXnG>j>j84A?;Czwtx>A<WQu#6Ki+LIP0eSIdYstqc+cBx??ugVZitWNc}V~R
z(4i+4J4sJkQ#aEx8Qu9qY1DTIzkrhiJ(Pu1cWI7PzbxfFZ{z7-bH;-ForZAnSF=g)
z15>X}Fzeh*L5I;Yk-}u{r!TF|8k(X!`lND?2~>^YkYSaoC%lgW_I-zN$H!ez$tAf=
z|28$s%t}mP{2D~d-lnGn9XCh@9&r;W5d7Rb!V+Y@$-lElN1~SeJ}5JfG;iAHrq-wQ
z!s`4Fj|;e3s{Z-vSZ!)9d3Ahl_BJg6_4%?){zFSaT_fk!Zv5-r)xd$Mq`32Jf|i`C
zlw^@I9v3NzB?D1f3WD9s-s$H$Z^pUrH%*<9ch_Vn7TeGBWSfrgu@z9iK)nuM@NR1)
z=kG!~8OR3!2P3#SkF-v}N*rhL>*t1QlD{vNR#uMC_Q8GoRj-~iUitd4c1As*dVCun
zW8%lrN4FHg*Wn*P9G%4ykrCouL@uvhyiv1W(Bd{?(BYN`L?T((&aEQx1TeFaqqKiQ
zTc>+M2uuOt7vvz@$c{bpW&aEl*$O`Yh6P!wa+Xh|fuEmFh3Mt!Y;E25#<Bj`^NsMm
zR_qp)KF^)FWqv|7k&UTCXP5|s7@2TtZErDaJ^7n!e-B}hk%kWNGeER<j|c_vS{e%h
zW4}G~*gflO>Elm5lrMggqEN-}bAZjR@1nx~hNI_@=#g}xyhiUW3UcOqO)+0Wrj|uA
zd9j##b!xBj>$H5I(xt?jAp8UnGj<g3XFzhxFEMH9>ulXcn%U%ohE}V3^0-9;RHnnK
zdU8sTXgtuyR&^ZRP36N9I8rlp4S+GQN&K?ejaO80JmKUI9_EL6pKXlB(aK-PvG*#;
zI;q4SdUfLZIoVc@^SYGnQNee-h}6tV|2ht_@ADXGCfhf4suIc_kd$)IfL77wSx3)&
zKjQz&mxCB^gBT(_nJf{;K%($I2S>5}<ioZ-K>ReBYmM}$s|7)4AOndZL>P52g74x6
z;oeZ+gxlNMwSjKa`g9u`8&IQv8=S<M0QL&76fJ-s7z033vvJDYO`tVFT}vw*RPv7(
z?rXky@%hJ(n=08PIyxI@`~PmO$npa*0}wBKZfnoCt?tQSln1qIT<_UeFOmxg37O6|
zhl)7OVPk6WK|caug|O>CyZhgBh=Ib=?58QFA}{`8YG46Ii%pJ;>EF*Hd`*RLc$3xe
zLA?()D6+xFlSV!VxBvDH2NbU3d1(CmErzGE@VA%&z-8)XfGJ|WiA`h$${-+M)cyNM
z&;RQWmjABK4{A>_pUbpTsT<%voKTd{Nma2@i{YsnyH&Z1>)x|pD>m+X<ltZz2!a4f
zdTMz#P7mKjackJwIN1HRO|Kom${vX9J<{Cv*7C`nKF-PidhENUBD3$A)+>ZtU=K;#
zye65X>-6dWJ)Ak(H~%VlX3uk^c&pO8G0kUu)h=lu<)>}s1#Lc;ipaoc>!gFHo-ds$
zY%1c1{5`TMi>~f9Ha0BpVsH8oTKlF&4&Xp5Yb%?jYgYC+r==@5rKj0&nRXPXm9u`a
zm{NVD?9+01x3=~Q6tcH0I?v8D1<%^w{&(2OMI2|KFnlWFEz1xDG0NZZXhR27=4TWX
z+y)Y)lItC@^xImQiUQl)+tn>z{@^EpK9M<qe*!JL_@}2W^#KBvsYkI}4fH*ik`v3>
zmpz-)6BBO$6iz@;kWox*e9tS3${BVZI5|b~DCPb8ucv0Kjv}1b5eny5S7~Czz3C|_
zR2BP=_e--bvwQ*=eEp<8ocmSvsh;bN8&P-66Lo;DpFPDY8lwCveeMfDQ|bspAVHU1
zUUd&(-d;R>{NJOw`KK*Ycke%LzWP1kJ|<Oj?k{kB?wFo_v^*`P<|pxik)nuRBvl2R
z;h(Fk|85IEW^&G0e$vV@cpT{&c=k{Sx@@0y#3-37Dl#~DbISC+kI>55CfR*dckEX3
zKQm#M=JJ0{p==N$eDI%DN3~^1D5v<@U$vEaa68iY5uCRh)8GOqP<~Y(We6EV&m@Rq
z)f(BD@Q6|W&L~1B96U5`W?302K>C!K{!QjHla3q<44rCbMy?%y4nFf=kiJaA6=s3(
z{#`(G_{1nV{ZjA4SNh(o7;SY006+#Ds!-5O?>Ryb)MCNW1h`IUQ1^~edIc7r@6lf?
z6I(%<6M{p(pa=&rmz|#J>Cs^9YoO8!kgAfXN&60u#&ZMQpp4Nizy)o;INisra(sG8
z+R7<!jtP5@WZh?kSF(QT409h(I0NWDk8Zl>tUG&nc!Yl!+-(A^u$+?%0H0!zlA;$B
z6lCY&Ip$6W{Lq5xC29!m5OA*dhFb6@27?Nwi|p8xcF38ZXGR0PVa_zkY#1ksNDkm=
z9Oqi`wynUl8XFtqtzbVA1=G>Kc2fwS>H#*!3jlpk5SWZA#ghmM{h7-~AVClH_xBUl
zgthYyZnt0^UhX{lZ>*Rj0D*7zy_Vs>+*4r0nSgD^QG|BZUh}C7(ALi^E+z^%?J_Fz
z+PAcws-uGXhlak6a`W;1K5y)?#DeIxs7!9H9nS`y`Fnzw8A1d6G%6XwIudmr?IH-^
zDq8`l>+SpZq0^UJ9tR4Pa?~_5#XiUO7l2pV4hV%X2w;uyU2^BJz(qpAMKb(V<%n6J
zmZ!3^SiNG~;*hkZ<=sRcqXKOy&`I8A<<^aBk_s3l00C^WJJ-g3|NbKb13Cb3ZE_Um
z;Lh*|1GOD~W9?XuMTUEZ_xIpn2*5T`0HA9Ju*F&?KUF>QMplk*YFF4&0lnlf(9<Bk
zgT}&;j9Vt_lEc7Ye{P_^0|xsYt`5}FO6M^Vh<W$RQrDsl`>ER@wW3a;7hJvoBbD?0
zyLw(yqgzs9;#pm&m6g@N$7=bU>2Qj*b1)ndM3JL%XF?=Sk2Ew$gzUybXdLQ@0Anoc
zUU5WK;F+Wh;VV!W)OzcHTm`e9@PgE2L5aq{hmJ@v=r{VGIs8A9Uz+KzPMI|k0G|T-
z0Y1>8od;6K9YD%}Y#c1|2>|56FoS^(fYYSg$SiORqU<n8ujiVYOroMxX6<i&izl&1
zVE~o9fXD+_1!gnoKn#O5$lVURiRt;})w{=c0(&g?PnA_w3Dyol&{hO20lr^AeEJP&
zi}3{{BMblxc$|Q?6<ARrv@Hav5>gnVE`|>XUZk9j4F_0ah}e$#Nd;1Q3^}@LF;Oph
zxn{fJ95}zk;s7KC+Vu&h{Lk$GmYgwX<T0Uc$d73ti$M>tU}_h@y_*G~#@lCtz$0gn
z1K~MpYHB%X4m<;AR3<x+&d>z96r@R9dP|c_z~9K6w5I?f4V$Ic4j@umoSE5d<@M}Y
zbn@G`3|oIo**v4!a9?BRPUrpwctTw13Bc{1p$1338Y7Z{h(bVE7`PW0O$Xr9$a;8S
ze%I{eH_*ujmSqRvYE{LaV_0`T-!ghGr+Q#eeHcVpLG4yEAgIB8VAv*s#)m$F%88s!
zfUsP@e-q@6PC=o%=^b^_vumMl2!dZI$;mwDbC?px>_XtOHz42zFav^wsHkX^9rET)
zH6x=wzYRcingf^c1WRrQz|Ju^2n<clzGn3s$`DH?nKk*+Fs5s<dHVXwW5R1W)xR=@
zA);Rfbektz0WR7J;9C~~Bx;MDyn0BF<-BAC11@j|JplxY54>y$jiKFjc0R%2<v=W=
zXu>{dC*1hRM-&6LQ&vX$A7s(%jHQF1f;71_)3<8Cx5AJLXR~4@9|6FphZleZ)zsAV
z4-S4AZTS2dhCm=DI(Wd^d-Nzs>jH)bE-3*7eB%AmLu`|1<6|FD%$R_s5762xS|35P
z3l|F%nJh;t-<>Lkfexa9phpC7={bf0yOaq4On~0@KDQV!XaLJP6trT29yY+GaIBpK
zM=i?ZFEGy_2y`Cp?cc|3rv1EjOk7@GeggQ0KQEOD)p4s)k;z|x302r|b|1rP1NdCe
z`Hz4d{}wci18Hc?kez@nCkD7$z#_xouK*5205F!fBzl3l_IqqB>g>$h)z#G)w26Y8
zNXY|C%P3gon25?&)%<Po%Yy;nuYn9k)m|qDnWjt-E&}ry7-l$Ny79U!b@v;x%Mf0X
z^pctf;gUlF*M{D!tlL6#baWU3a~tUK;u92n@k!7oQCmwZTcj7@XbZePjg9GU3xRu*
zIDoKCF(9v*%E$y_keh%+#wVxsgs?uOWZFmkPH|P0w(rX4@JFt0*fQ9}AK9Z!*%562
zS|iv*hU_^1Cv)xpYn1&z56Ay|WBK3P4#@6eOg*_zGXF>G@BioJ=C5I8eW|#g8%lw2
QDFi9WtH>2RG7kE`0KLR@a{vGU

literal 0
HcmV?d00001

diff --git a/hparams.py b/hparams.py
old mode 100644
new mode 100755
index ca39cbf..8edf66e
--- a/hparams.py
+++ b/hparams.py
@@ -40,6 +40,105 @@
     decoder_stability_loss=0.0, # max 100
     decoder_activation_loss=5e-06,  # max 0.001
 )
+
+
+
+
+
+# Test setting with multiple attention heads
+#python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --no_eval --no_forward_split --asgd_decay=0.99 --max_steps=11500 --save_from_step=200
+params_TEST_attn_head = dict(
+    batch_size=256,
+    #train_window=380,
+    train_window=283,
+    train_skip_first=0,
+    rnn_depth=267,
+    use_attn=True,#!!!!!!!!!!!!!!!! Set True
+    attention_depth=64,
+    attention_heads=2,#!!!!!!!!!!!!!!!! Set True
+    encoder_readout_dropout=0.4768781146510798,
+
+    encoder_rnn_layers=1,
+    decoder_rnn_layers=1,
+
+    # decoder_state_dropout_type=['outside','outside'],
+    decoder_input_dropout=[1.0, 1.0, 1.0],
+    decoder_output_dropout=[0.975, 1.0, 1.0],  # min 0.95
+    decoder_state_dropout=[0.99, 0.995, 0.995],  # min 0.95
+    decoder_variational_dropout=[False, False, False],
+    decoder_candidate_l2=0.0,
+    decoder_gates_l2=0.0,
+
+    fingerprint_fc_dropout=0.8232342370695286,
+    gate_dropout=0.9967589439360334,#0.9786,
+    gate_activation='none',
+    encoder_dropout=0.030490422531402273,
+    encoder_stability_loss=0.0,  # max 100
+    encoder_activation_loss=1e-06, # max 0.001
+    decoder_stability_loss=0.0, # max 100
+    decoder_activation_loss=5e-06,  # max 0.001
+)
+
+
+
+# Test setting with multiple GRU/LSTM layers
+#python3 trainer.py --name TEST_stacked --hparam_set=TEST_stacked --n_models=3 --no_eval --no_forward_split --asgd_decay=0.99 --max_steps=11500 --save_from_step=200
+params_TEST_stacked = dict(
+    batch_size=256,
+    #train_window=380,
+    train_window=283,
+    train_skip_first=0,
+    rnn_depth=267,
+    use_attn=False,
+    attention_depth=64,
+    attention_heads=1,
+    encoder_readout_dropout=0.4768781146510798,
+
+    encoder_rnn_layers=2,
+    decoder_rnn_layers=2,
+
+    # decoder_state_dropout_type=['outside','outside'],
+    decoder_input_dropout=[1.0, 1.0, 1.0],
+    decoder_output_dropout=[0.975, 1.0, 1.0],  # min 0.95
+    decoder_state_dropout=[0.99, 0.995, 0.995],  # min 0.95
+    decoder_variational_dropout=[False, False, False],
+    decoder_candidate_l2=0.0,
+    decoder_gates_l2=0.0,
+    fingerprint_fc_dropout=0.8232342370695286,
+    gate_dropout=0.9967589439360334,#0.9786,
+    gate_activation='none',
+    encoder_dropout=0.030490422531402273,
+    encoder_stability_loss=0.0,  # max 100
+    encoder_activation_loss=1e-06, # max 0.001
+    decoder_stability_loss=0.0, # max 100
+    decoder_activation_loss=5e-06,  # max 0.001
+)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 # Default incumbent on last smac3 search
 params_definc = dict(
@@ -185,6 +284,9 @@
     'foundinc':params_foundinc,
     'inst81':params_inst81,
     'inst83':params_inst83,
+    
+    'TEST_attn_head':params_TEST_attn_head,
+    'TEST_stacked':params_TEST_stacked,
 }
 
 
@@ -196,3 +298,4 @@ def build_from_set(set_name):
     return build_hparams(sets[set_name])
 
 
+
diff --git a/percent_dense.png b/percent_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..2ad21ffeb72843d2afbbc89a5217875410c2cbe5
GIT binary patch
literal 8162
zcma)h2{@GB`}c#SEM@Fld>e`EYj%pUm8C^e_6SLqvG0;)WF7misD$j<4N~@<WM67X
z!q^Rs;XTv;|GVDbd%eH^d%doQ$DDc2dCs}-^SSTO{W+1h^tI@yIjA8BqSMhv+<_o6
zFYy1-nbY8JbnD`6a69Xw{lE=^Xc$PpWFxbIN1##O*U;GguCuMXmlg6c1f5Cm)f2wb
z)h*WDt$Rn{rikD`-{4*G5el^nVbKW*<U#>N(>ij@QJ#AMRKQKY-I2_;13^3nI*99r
z-dXFjex`|bIafD@IEEdVnXi9(MIUnJx`51kWj(D66%MI60_~o@X#>K_<<12G^S{@Q
zCG-o5#|-QgMk%rf?Vdbabt)*&9$0HFvMesC!M<i|uenizx^>oj;hzwJf<^o>Oh==Y
zUVHnJLFq=2j?<3m+^@$?J4*O`7>o=CE9^P*mVzFJCREHsq6|?>`iM7b!d`GV^P_r3
zSRs9HHcC6p7>7UzenIlX?_5k#grTXQBr9wtUAaro%>4H2_ej*y9S;u~4i1j2*t;KQ
z^7Ha6uVyAID8bMclIglHI65D*|Bs^;EFB+U<9LOIX(J;eiOr&L@1N;*mf~#jNlBqE
zsLnSiUxLBvJD$|5?oU_?T)D!&x%cD8BSS;OS9y8lK0ZF)zY7ayIO0(#)8$<yJP+}g
z*89>@?#;cnHZ`T~F5V$~Y2$KBvW(13*WX{SghoU(O7Z*X)Q*KG>akM!6LoJ=?Hehy
z1c#tb*T@A5Ch+q6?y`MuMiti7u%OZC*iMD)bJf+=#1ubsbMEc!Z3x=i+xx66UH_^Y
zLt~Wrs;QK6KKltJ#HBd;a=RhL9NS!(#k1?@Ang>R%_QD-IeLqT+}Jpt${;)bW@l%|
ztEkA?x=>aLAFuX$u(dK=B~ap^=3&jL|Fza^XY^pbO6`IM5nFdlmzy(m8mmUdr7C5^
z<+QoUqYLNAu}W>!FHqOhi@{d3Ke~=Ud~umiF&&zG_E|M{9J}$@LqHzMkv&^uUdF!~
z0`oT$$LXY>i#eU9=*D2H%S{8ZalX-rPvO^t7510+Dl02rsIRZ@>gj1qwFrz{Sg<Uu
z^?Qz5NZgk4CPWD)8Oh4$<ce!XnLJV}zs3{7clIuf+y6o-4M(Bn9r&G)QKUsZlFt)2
z6B!<U`j@-H(SB`<#{K)O&_XwV?w9SQ-U^RZBR1O8(H9h1Hkt{go$6GQuum3!EJxEP
z{uIUi&6ijtMbF^h=ynp{X@Ak<EOSwVZ&T{`DyyoVWRyLX=4d+nndwA>pdjr2Vplq}
zpc;NsE!Nl<l@Xna(+j;A9j1@KT&Y<SqcgXwrBj{%$YWtRVQ@W*RhixSgYKuVXRVp^
zH4)7(mvGpDY7h9*ysE3x+)<E|#*bY~N=6u3)v}&Y{xeFI$<P1b6w}X{HYpR7{Vijh
z%5OC-Stsvd^EK7T^Pi{V-U()*P)BD~Pycx};Ka!gR7D3xWuwBwmvAp|-g|D)epHO{
z8bX>$*o$X}@NE7&LY-6dsn$Y1zw;f1bFjlJDUIjleH$q2b1zUqYsA`i3f!<dVv)9Y
zHsWZM^8|t%{lA8GV?&tq)e*<hOE})SKS!q;K7=fGMH+Ld-dmAFEG~mdwc0X%nEoRI
zEf8}D@5<4^3dwqBshaBQxuE!Dj>s)*%_~{qsIS3GrRDpgLUYr1>7WCABcbK2JPciI
zjH6yX{CXUe+Sm3TedJ1y_AA$8vZ+3+a<F{~TCiZ@Aidk64S9d)>2w?!l+&bqnpbl(
z%M*^|NoiC=oGJ@)Xt7*5dE0-7^kTYRvx`CUmSm7{u2js^b;P2lBXd}jDUt~)KhC)n
zmPn80sF-1P*>$Jk5Os&kI532CZa0CkR8tzs%|#E-LFGU7^V@V#+9k#~bI-!jmj-b;
z?H>7>k78r-YJBiP%X+^_#l~U?8WfJbCwCTB$lg2KyRD*11|2L<=P~Q6Asjl8_`IG!
zG;%?OG_swc*Lp{JBD-Eph@0>Bbnc7Yxi1oPm(=e<@9a&-tDn9(>#OIJ-LcgrxucK^
zG`?&uqJWNm7?oBtqL(XXFmIM$e&xa)Ya`5Ck@%3RlJ)nB^RYH&;+JEWV#*@Bc23{J
z@$vDYqM}}WvTO~f<NVVUxV@+&Y+6Tu<;oT98#mg%m6*(Of6c|zV924tXZ-LD*M~Tx
zVrF@Hc>z&TO&=fm?VX+5=H_!x`=+X&qSMnOD=Q_Ln3xt;3Uc?2ErSfpTTVmea)Cv_
zDgw9UMDQaccK~jF?fIUfq`W93L=zSkhUu`E=J@#WBLofPs>Y#EX9nNwsdo;1Y;$0o
zVNujr5kN3q`#5m=>$h)kNy(8d<ATxey<#nEGC#vu&96Ot$T8=QrTbW2ZAI8$#~J)C
z@O8?^uNl-`Qb){TcM}^?yFU8)D*-p_j*k45K!`0j7M^JE5aJFzu$pkJQE3qs6FYx7
z@hrGVkq@=5S`+9-4Y}}=7AuB)hywvqMn)#KlO^iyU}bMd3@2FRS*UL1UFOi-Tx@uF
zxXMOyb~atrByH7X^w7}IcU!!V$hw)_^we{o;Y7mG3vI-r4nV$)3^Vw7UsR-KXU9)X
zO&y<;1B@2UE>&*bc@Ao=oezxK-`@`waJJj_oRmG=dn3kOG6B_v$x3N7D%Nmv5={{`
zdwFo+ZDVWOW-tAyKSM@9P>>vI+U^nZUM;DO&&rBU7P|ifgP+6l--CEE)=#kY{Er(*
zb|12T)1u)aCZ4V<p4oqcYYN*?a+QRylX|>gU$*?&pn%`DZ+T(p8?-fwK4C>vM^4jT
z2ZNY-iiLCWh&Qwa1!@VaSJz8ULx#;Cxp=i)d|slO;&NwZ*A}|n>^CZ{blMDA6k%6D
z*s!h7<)(s;Rx2l|`7h>O>p*((!afyf4L;hg`{zrjd6_#GoZ3>@OLoemFK}%A4ptgq
z>h>$^`o`hn)Y|=kSV9*)y3H8p1c%ERjz+&i^U`0niXL%A3A9$s%r1iP11(gT<R#Go
z<fb%EAGJG@K{F~*e!uC_+n`q^p)P>6FLU=Icscx0eWQj}E%W@>oX)i;Scwo;Q5gEX
zWqsrIE!hGxsF&e032ru>NUs+s&HfZ?5zZNfE(~zEVhTY#^2~mY{g=8|=rko!Z4jjp
zkTLFa&5k1BULk(&Jqdur=RwQv^%Whcb(97KV>QGs1?Y%6qjE*{#cf`hGv}N2NgdA#
z<E;+%4TYeE+ypr3v9FxH0c=;-3r|BzzHUi*@+eek2a+c|M<g(jmtQa|6!l&<g+QPE
z<VojBil^|9fsq0=>^6@u{^IWk2|m#`B@v4+jB)B;-KZ~ZG$0Vos3nzC1p^jpJME%<
z8NSNW98OFiFGPUWjST`fv{}|`D?}PJ2#he{xkw8w)ELjDy+HXh_ReaU8F9wuVuCsC
zbiceuXQ{xq^&I(GF|~~xP`Up(yc0isz8&dx`%@s{f#ghMHp(9aE6Q0TPQle_aVZ;N
zkO%}XM4;@iy%5AFBrtqwZ~tT|ZuvQ!ju?Bm`T0CLJ|4;+Mp5zj50zXD&=|h&0R`s{
zEEjmfx1T+<qxfJXghV1YK{mZsFnSSSV`XJUTU*;Dy72w`)<1iz6@GXp-3;loc7r*4
z_V#a&_Fhp!@#!2S7v(*5`_qm2_>t-K3o>5}_x0=7L6%puva%|ZaK`MhfMLa-j9h}Q
z85^hBjR-p8w={LR6sbRr$J_z79F~z6!B0&+L}(sw>f9?BeO*&iGoEJM8T);r!n$rL
z#iZQw6M0fn(k{Uqa4!;K(skdLmGRiRZ~U1Wp@ob;RoS4r&dZslz|bm#W&zy+US6_-
zf`a94%lgK`EHj?d&*c0M#4qcwVVIpfJ+D?)RgpoL6Q9V{V@fT9=%5@vkP1Jo@Ox$J
zM`UDhbXrHXhoDd>6>D6>s%SKa4E6BvFo?$L8X94QQNdq9=cZ0-CH<yO0i3|(mp{(j
zy?gie?c2|biUi#~J@a0@f`HvH9Ww#=*@LyJK|t{7=;S=t*miv}b__xbkL+YcJswne
z*C@2y{0F`VLifWG7C?)P8{Dt_z+y>}4ERXq>9<$!-=BfX-B<2}hKBxg@gf-wtLT@F
zK)fRO=&jULn`9(I206uH&B6ZTksg`PYhX>&y`0u>%Xe<Q(vGHQ6R+-Z;6J9;cZR+A
zFT`K0>zlt;IHR09^I#3JC}xb)cQ<)PN<H;b{8ui7q3nl1lfSu@Do+rsMVAk*+J^M{
zu`X5wL4Y&eCjo4sVkTQaaguiZ3?2Nkd4W3<Jny=7y@h(fQ!3Ju;E^Wh=+Qw|^%kE`
z%;$dK&;1Q0u=?<&Ksu8ac}j>xFuq?xAWBJ(pc`0DS;$gff2hw0J7lf-2Kj!Q_MyH6
zJLF9vKq?BBEvpQm1~`KC20*zNs^<m8`@8nQMpYHY2cpj0D9)spKU9eR7HbHyEV|bi
zN0jp(3A~AEuR1IQy%>8jJjw;vZ2QKp0I1p9gYOTtG$^2dE@st!<%OSoLAJa^Ei_7%
zXs1iScs1>_PRQYK9?;r@!^S|tAW4uSk+i!ozSvRO(ZV^%Wsa8QZn_J-L_vc7glhdD
zEqeiJ<G%(5SV9DFf`o6Va2hsTUOrQHYux`V6we0xKziUiSnTWNqkW!!Y7)H|-UJV9
zPI~PVZJeUA^ZxaOmGfg>wa&PXwA(WV;ix0dUSef9&4u!_fC%T2>}&;I^Q~$wC#AK&
z4b^p0F(bA*X<&QNdEh!iuoc;|hTWD*Lk7^7@|4^oA*#G$hRlC{;xts}+Ckz-u=>td
zJ(xFdPPw|einGy9lv{lc#;W}J#S5}L*fO=0yW#8eQwGod?LX=MeLP|mS^Kc0tGoNU
ztE)s$Z!bDGHx85{D%gi6CR=}2%dC%&4!O_%*5Toir-SO`!|i(L(F*^WN(6Ke|BSbE
z{2nWou<c`uJGJ*_yMaZQEx-Le+Vm|P?%^_J3#uJdbhNwDw-xvIB_-=J?>>B>1qr@x
zXFzc_cj&XI$HU>N;h5>VB?wlC8ojHIcr)n6(SP1cpmNgD8&|))Ip?<_uWbBp<&x-^
z-K@N_wUb-@Q9t_SQQFE`fyi+%dSRifQjM`NfVYd1)3ayo-#<reVR=s20DvcIoq0w^
zM+w9`GN59ztzrk)>(|X&I`zxFUw0SJ@biL-6WGN-PftTbgR-coXlQaWG9-ljO+kU8
z`T29_G}YB9ii(SGo0-K>(y)-=dpWT?|I_T!j`iLdPF0S?4oVWvx}N<%>BF|BW?cQm
zWK1j||M1HKqz)ygJ>-9q3LO8#ZSz6Q!H+0n>}<W;a=E%sZ>QD(1O+k-5^Yf0X5@cn
zT0~N1X&R-jn38~oV;)*+CQeJ8VP_4Ew634iNBm|eUkZGk&k1j(LpQNsg%ujKrV#GS
z^h3eYyyMf6+5zM>GqOQuRFL@A4?g&v*TEuqUJnr{IBjT^gowpJvIjVLNSVkoS}uFJ
zmJzK$X;Yt?TN&hF5V=UQb2fS9j8~xD{Kqw1(h4c%Z$}3U<6BZEy+RhB0ODdqb{_|F
zoZx_YYhQc?o4gwKh?O38_&Qh!KT>oVYMqsJ(E){p&_WNfj0(JWm$NQN5mtD=HJMQJ
zc|X%?=g1R&rz%(we>2<YqsK$nL=^<!q6lv0TTA9fZh-7(nH({odngU-Q%%hi>UI9w
zWtO~X#%~}=A~c2+#?W?@y7kkJA^8O+5>EwNwE5tBG-=3_o=5X7#Qg`a|4zCo_zJ#F
zeV;fzn&`8|04ua;O(GmS5PTJD5Bc)0(Tgt~uNSiP<y__Fpu;C#1OuRu&d)YN^nxcx
zy;Epml;Obw_#e(|%eC`dQW_*r%)#iFnv2^4-D0J&h9rdfXkAMp4Br)DQ`L-$w0@Bk
z3yjN5b{m(x_Y9TU10o$YSCJV0?y?veWV_XC2&euBe7rE9L)j{HH&USdee=x8?O_P2
zYj2f$j1q96Et<J`;|4t|E7gln!E~HgcYTQGgAOPd7#K*BMtc-1max&{<u~+Nuvk8#
z;ySM@(Y|ElHVV}QL=KY@qs7I=S9Nu4OG`^#gM(Ti%a%!iS_3fC1Auj)Njx7bzV9bo
zE+p#ijs^0CQ|$NbRFpvZe+E3JgH)@4{7nf78<#^YQSDMs%nZHXvp_CFsWyN&f^L7)
zy`}>9s=ETHA<oGKVmo4tYaAc6lz};XvXKPW>*nUbGp=TC3x0%Wdy=|neNEtqsL?Qc
z*vih#wM=Ks$uR-UaH9I@{g$%-);S<c$#m0h`AZS{SFQWr!%Us|^MbJ-_uLgOI5;?P
z#fh-cz~S&e(bLTVilB&!Fl+R*+Wj?*)00Q=b4<lcTIY>O0GkY~RO&i700tp=>)ySv
zfdPF`JpH4rj2TY@g_P3P7e>JH)c}?0Y-`A?JSK-0!nJ`sMaf;TR66E>&diK@dovZT
zqQae?o-WQ2V_aoR_wwaS2r4!zX$zxabvYs8vEiIfzwRF2nDG44%($4~?Zl#qc&^1u
z6e;q1-fa^y>9{{ZP~GJ5+j@MDvDw6r3J^QrHNaixq$hBO>&!(F4jC|RrSNur1es3q
zQXr^mLAdy-^_QBW;f4=ziY-;nEGd%K382<MCB5!J4$eEPdbmwyJt6JyJK=)80Tz#W
zLo{QrO&mzv&;m`Nzk7(LCj1H|SA{dyIhUsyVOOquEFCY4txwa<r*oBpOa)(hUpjO0
zG#4|oA&$gkqsUW>ddv_m@7_{W6~sh%z9|ZPia?0ozDL2WGzn-~N*5nHoZ7KS5FdAL
z!kHn#b}>*A!8Bob4@cGPOb&gCFt|m6W2NC;5K^q|ME^+#*o6UQEiSiuW8NvFTZ9UQ
z0$&pJrO6?d&K(et;A2UCe`zac{R1`_+P8;ToUy~kD$QW6>3ZS`hu=%`17rlO;e+;%
z!*v8A_(l(A^A?aXSUxk6P<MFxUJ_xMnGOp6cnZg_jyU!H?M&PluaY-yDa{XGMHuW=
z2=b%@$qk9e;cxQ6k(&hMiSrJqC{*X){f!+Pphc{ta)UonPT80Q?B>rKy~K%hnl$BS
zO{C4^|I#AW@7WF?)Jh(JA{pO5$|AL~eyaIWB1gN}-AGjF;`mZ??MU;3336pXaDLqg
zG{Z$WtksUu)CYe2R|mrIXo>d7mM}X!ubA%tf;J{UpW?jICPaIh;^$y)-0$BGUdwCw
z4X^1S7mAH01mf%ekiE^VuR6`3bP_LrKiI<l&6~v*#2)MCevZde%|V2OQ**SLs9z5H
zz5kiX16ja(cUX%IVrORu%$y4GMJiH^@MGK7s`iTuzc!fazj*w=0b8?p3@#_f784WW
z6zhQhe#{|!_*vBd!B}X#>vc8>H7ptnydqUnMMexDAGenljUx+gYEoKUS|WpfudY&E
z*uKHo8LPJTbrq(#=c^>={4;*;1xkAzq|pCEmqfH!1CTc~Ho6`k`RL}%;AeK$)*Nl@
z>?k1J%cJq+1?&YE`?~_Bf&JV6M|AJ*x;svNJPU0OYw=hO=V=fFf%FXDaQV}1238>}
z%X%|aG;GYF9UG^5?!P#CtBvGEplYg~91d=Q0_!uVwze0$1q1{(HNkG=2-Z{zc-RYO
zz}RP7#Sn2BzHI_ncuTWdG79yl^MC8w|0aRI%odS)+{1*>(4box|296(D=qyM^IR!d
zem3gw)oUVOTwFXnG_>HJ^=L_j>ln$w=hndb_ew2WTCAi6I{Dx6>=$Xonl?j5E`~qC
z&D7t}Vq^O0F7{uSMbAp=Q=qN8KQm1M(7)Is<Ygzj>RAh_A5!Sv+S&qf%X__g9D?-p
z^o%QQ&OWlXuCVE5$Wrt=7fkHn8&uk%1-maHA;D#H;d-avGhnugiV9RrjE5td7NR=G
z)tV*m1_brZKB+<4RhBRyPl1F&B6MWXy?gi4RD)E&kXx-3=U-J-F}7AaJ2VFpeSr!E
zYF4Z1nrE@gvoiSp{{F76uGY25Di{w>9L5j-XVL$7D=sK>*dFod5!|TMG;)96GdFFp
zlP8D^6r0+*y6#HLF^8PL*h&{os~wna2XkWHyty>vHmGdx<ix$LM7LxoF_EPlctV(0
z&mWf4S2jedm|YHh)7WSN$}>YFqZ?XU5tsP*y3xE^O1^y!4GQuu3&AK9P{Onw9Y1i%
zBN0X=5BXNd%UEIPz18tvz-l!RHoYk#$j%F1sD9f-Fw34MpB)6^O-04+y!`yP;Fgh*
zaY;epA-EwBmt<t_?XOMpUd`0Q)nU&~Hh8SY?=%C!s1mt!<3YuvFcfMg=!BS%n=1x~
z8z^ms9<3MzF~HEW^Zs{pynmd9!MX<rQ~UaKVK8~*A_~k^$Iq`ZOU_jdEWq8%tI-3;
z<Fg3!0rC@=Lw>&cK(^wt+s=~;<4b~qH$=^wex-GK%?FJ0|8yHKHA616l1XsH#4a~U
z1W{c0oSX$rpIBNd3WGhs_*P?;!zB$$kA+RUufUN&g4DTrGm%}&Ry!a7^J8a4utgBB
z?6<G|@L>ja>!ujyk{D(;;l31L!@a;C!d15Y3BZ1^iW3U)r{9FZyaxtS&Z``>N;KQe
z014us$;rvsWxtNGu`wM}(=6;^ac6zryLXx_A`dSqdalJY3mK8@86QvIY4J{ISxiJk
zk7PGE+|0}j&>HyF%#y*Z{b;xGnOxN%^Sr#gFSb>CzH{S0M;}$7^a~O@Iy6^)|4snI
zXzS=i4))oBZC56_-El<cPJ16|zTzB2$W%I5cmW^5<2M|0!nf_F$FlIVUb$Qf?%K8k
zSt|lwZ<j}z1FMCl(lR~#NJmXQJw4(<$NpevpZ@;+I~2UR>5>I3kilNI*Q8&tx^r|(
ziLh4XeYlh=@hFtoIW;kXie%(hD<0eT*?8{cfO81EpDpJ~{`&RngUP|b@T4TBqaUm-
z*Gx<@2m6<IS4Ovvb_T6GqS?=!Kd<HDBK~tAd(~}c@-yd-?2ttG?`*HRPnC~*!;{<u
zf@;ie8ybeLj27_9%h!+ZG%Mxa-zi3>UY|ovGm4tl1ru<M=gXS^2wv7r6JMx%F4^#>
zsrah%Ty%MPxp~1OvsFxT&*bcE3^?lgQCN_p_TT|K5K+DQ_iW!Ha{$Caq$i1@W@Z<r
zrlxM0nkp^$2_=uRTl7~u%_`VeKUCTsQg<4ZpPBy*m}LcMYXUV%fcehY;aZ$UGQra6
z+xl7HraEv8vpJ-G{yQkoC?VZ!1?B?%Y;q_|!Gjv0b-d5JWLbozw>-$8(Q8BV_H7t&
zVtjIPn1L#hX;6hA49?GnXJ==ZF=c&jOM3lg{TqVW`cy!qIow&MfFva)NwFt4b8vmS
zmRCW61DKUf#(@ck!JC7K(YgGyDnj<B^DFB*htYR>6HPu+$4ASmiZ2JovhKdqbv?9`
zFiuWRCM^?*lmu_KtT1-L#>oBveo&Fmr9iI2<03pfJapXMKLT{MZ7ubHT2a|oH?#lp
zdDWxxqwUm|wLRc4)%n%l_<O}hk*Kbp{h59N-xD%21mXK)reMrBg@rmK@B`#=aB^xO
zzSu~1&td#7SkyUARz5aPS0$`Tb+Q(Le2Qsy_uc82)j(X5m8~65)yK~VwV+U3D*koj
ztR_Hz8kwD4|5o3!_swMK*Kj@@zMVOLq?r?R>;u*xK-kw&Qwsrddn5pi7Pk+=Jc#I#
zn?6`Pj>7ymuG`z&8F+ZafONMm<0P=N_<OJ|IqQA{vfpgw&^^o|xe-O2{yi~aJiNMB
z>AC(ej%OoEteGB?v>V{k(brE|T(kibj*^y^?#k1M`eG?gx^by$17iSheFR4bI=Z?^
zL;_xM?kQmM42+Dt5)u;I1Bx4RIrA++Eo?EGVC00vMC@)ak6o!SzNhDgj-Fm}S{hq-
zS64iU;^4g1eI>!h#s&oO=r`})-FW=?Eq1)jtkDR>wZ06Q;_>v$xC{ay;OUB9rG4pA
zS|oMuiG#y-_n~Q(Z4Ag8k;C>$xyQQ&BXe^&#+~%!WIo_{#u6i;(E@{1pyf{sk$_~{
z%?@U#kUg;FRag)jA!(p~_<;86a&oD#7vJ*ey(W;GwZo1a`m<4}(-Hq)r_#hAI}@u@
U6!Ll%;Cvp^(a=Yfs#%8oFWw8v?f?J)

literal 0
HcmV?d00001


From aeabb2a253360174be76ee0d111a3e36d8999023 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Tue, 26 Jun 2018 10:33:59 -0700
Subject: [PATCH 02/42] simple imputation

---
 PREPROCESS.py | 61 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/PREPROCESS.py b/PREPROCESS.py
index 747d5fc..ca2869a 100644
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -19,6 +19,7 @@
 #from statsmodels.tsa.seasonal import seasonal_decompose
 #stl = seasonal_decompose(x)
 
+from sklearn.preprocessing import Imputer
 
 
@@ -126,19 +127,40 @@ def imputation_big_gaps(df):
         return df_filled    
     
     
-    def imputation__simple(df):
+    def imputation__simple(df,imputation_method):
         """
         Juat as placeholder for now,
         fill all missing with zeros,
         or mean or median imputation
         """
-        df_filled = df
-        return df_filled     
+        missing_values = [-1]#['NaN', -1]
+        imp = Imputer(missing_values=missing_values,
+                strategy=imputation_method,
+                axis=1)
+        vals = imp.fit_transform(df.values)#[:,1:]) #The data is only [:,1:]. 
+        #"Some rows only contain missing values: [ 35 251 281]"
+        #But get some rows with all missing vals. Since we don't actualyl care about this and never will use this 
+        #for now just use the "Page" number as well to avoid this.
+        
+        
+        cols = df.columns
+        new_df = pd.DataFrame({cols[i]:vals[:,i] for i in range(vals.shape[1])})
+        new_df['Page'] = df['Page']
+        #Put "Page" at left
+        cols = new_df.columns.tolist()
+        new_df = new_df[cols[-1:]+cols[:-1]]
+        new_df.reset_index(drop=True,inplace=True)        
+        return new_df   
+    
     
+    if (imputation_method == 'median') or (imputation_method == 'mean'):
+        df = imputation__simple(df,imputation_method)
     
+    else:
+        raise Exception('not implemented other methods yet')
     
     #First deal with small gaps (missing gaps fewer than e.g. 7 days):
-    df = imputation_small_gaps(df,imputation_method)
+    #df = imputation_small_gaps(df,imputation_method)
     
     #Deal with longer gaps [e.g. by removing enough blocks of length S, where
     #S is the seasonality, to completely get rid of gaps]
@@ -157,7 +179,7 @@ def imputation__simple(df):
 
 
-def format_like_Kaggle(df, myDataDir, start_date=None, end_date=None):
+def format_like_Kaggle(df, myDataDir, imputation_method, start_date=None, end_date=None):
     """
     Take my data and format it exactly as needed to use for the Kaggle seq2seq
     model [requires making train_1.csv, train_2.csv, key_1.csv, key_2.csv]
@@ -165,7 +187,7 @@ def format_like_Kaggle(df, myDataDir, start_date=None, end_date=None):
     """
     
     
-    def make_train_csv(df, save_path, start_date, end_date):
+    def make_train_csv(df, save_path, imputation_method, start_date, end_date):
         """
         Make the train_1.csv
         """
@@ -184,6 +206,11 @@ def make_train_csv(df, save_path, start_date, end_date):
         idx = pd.date_range(earliest,latest)
         OUT_OF_RANGE_FILL_VALUE = -1 #np.NaN #0 #puttign as nan casts to float and cannot convert to int
 
+
+
+    
+    
+    
         #Reorganize data for each id (->"Page")
         unique_ids = pd.unique(df['Page'])
         df_list = []
@@ -196,12 +223,22 @@ def make_train_csv(df, save_path, start_date, end_date):
             dates.index = pd.to_datetime(dates.index).strftime('%Y-%m-%d')
             dd = pd.DataFrame(dates).T 
             dd['Page'] = u
+            
+            #If doing imputation / other
+            #for each series individually
+            #...
+            
             df_list.append(dd)
         
         df = pd.concat(df_list,axis=0)
         cols = df.columns.tolist()
         df = df[cols[-1:]+cols[:-1]]
         df.reset_index(drop=True,inplace=True)
+        
+        #Imputation, dealing with missing seasonality blocks / out of phase
+        df = do_imputation(df,imputation_method)
+
+            
         df.to_csv(save_path,index=False)
         return df
     
@@ -217,12 +254,12 @@ def make_key_csv(df):
     
     #Make the train csv [for now just do 1, ignore the train 2 part ???]
     save_path = os.path.join(os.path.split(myDataDir)[0],'train_1_my_data.csv')
-    df = make_train_csv(df, save_path, start_date, end_date)
+    df = make_train_csv(df, save_path, imputation_method, start_date, end_date)
 
     #For the prediction phase, need the key ????
 #    make_key_csv(df)
     
-    return
+    return df
 
 
@@ -240,8 +277,8 @@ def make_key_csv(df):
     #     PARAMETERS
     # =============================================================================
     # TOTAL COMPLETED TRIPS:
-    myDataDir = r"/Users/......../Desktop/exData/totalCompletedTripsDaily"
-    imputation_method = 'STL'
+    myDataDir = r"/Users/kocher/Desktop/forecasting/exData/totalCompletedTripsDaily"
+    imputation_method = 'median' #'STL'
     START_DATE = '2015-01-01' #None
     END_DATE = '2017-12-31' #None
     REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful
@@ -264,8 +301,6 @@ def make_key_csv(df):
     df = remove_cities(df,REMOVE_ID_LIST)
     
     #Put into same format as used by Kaggle, save out csv's    
-    format_like_Kaggle(df, myDataDir, start_date=START_DATE, end_date=END_DATE)
+    df = format_like_Kaggle(df, myDataDir, imputation_method, start_date=START_DATE, end_date=END_DATE)
     
     
-    #Imputation, dealing with missing seasonality blocks / out of phase
-    df = do_imputation(df,imputation_method)
\ No newline at end of file

From 21731eddca950989de9a3760ddd5f8817123e975 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Wed, 27 Jun 2018 14:41:57 -0700
Subject: [PATCH 03/42] making features from our data

---
 .gitignore       |   2 +
 PREDICT.py       |   0
 PREPROCESS.py    |  76 +++++++++++++++++++--
 QUICKLOOK.py     |   0
 Readme.md        |   4 +-
 cocob.py         |   0
 extractor.py     |   0
 feeder.py        |   0
 input_pipe.py    |   0
 make_features.py | 170 ++++++++++++++++++++++++++++++++++-------------
 model.py         |   0
 trainer.py       |   0
 12 files changed, 198 insertions(+), 54 deletions(-)
 mode change 100644 => 100755 PREDICT.py
 mode change 100644 => 100755 PREPROCESS.py
 mode change 100644 => 100755 QUICKLOOK.py
 mode change 100644 => 100755 Readme.md
 mode change 100644 => 100755 cocob.py
 mode change 100644 => 100755 extractor.py
 mode change 100644 => 100755 feeder.py
 mode change 100644 => 100755 input_pipe.py
 mode change 100644 => 100755 make_features.py
 mode change 100644 => 100755 model.py
 mode change 100644 => 100755 trainer.py

diff --git a/.gitignore b/.gitignore
index 4959db7..fc3b504 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,5 @@ data/submission.csv.gz
 data/*
 */.DS_STORE
 .DS_STORE
+images/*
+ex_figs/*
diff --git a/PREDICT.py b/PREDICT.py
old mode 100644
new mode 100755
diff --git a/PREPROCESS.py b/PREPROCESS.py
old mode 100644
new mode 100755
index ca2869a..1331230
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -10,17 +10,17 @@
 
 #import matplotlib
 #matplotlib.use('Agg')
-#import matplotlib.pyplot as plt
+import matplotlib.pyplot as plt
 
 import os
 import pandas as pd
-#import numpy as np
+import numpy as np
 
 #from statsmodels.tsa.seasonal import seasonal_decompose
 #stl = seasonal_decompose(x)
 
 from sklearn.preprocessing import Imputer
-
+from collections import Counter
 
 
@@ -83,6 +83,62 @@ def get_earliest_latest_dates(df):
 
 
+
+
+def __missing_vals_distribution(df):
+    """
+    Look at two things:
+        - What fraction of our time series are desne vs. have >= 1 missing value?
+        - Of the series that have missing values, what is distribution of gap lengths?
+          [important to know since will be doing imputation on it]
+    
+    df - in format like Kaggle competition: cols are dates, rows are series
+         start/end missing, nd intermedite gaps have been filled with -1
+    """
+
+    def make_cdf(v):
+        c = Counter(v)
+        x = c.keys()
+        x = np.array(x) - 1 #-1 to go from diff in days from present data -> gap length
+        y = c.values()
+    #    print(c)
+        plt.figure()
+        #plt.plot(x,y,drawstyle='steps')#,marker='o')
+        plt.plot(x,y,linestyle='None',marker='o')
+        plt.title('Distribution of Missing Data Gap Length',fontsize=20)
+        plt.xlabel('Gap Length [days]',fontsize=20)
+        plt.ylabel('Count',fontsize=20)
+    #    plt.axis([-1,10,0,550])
+        plt.show()
+
+    #get fraction dense vs sparse:
+    dd = df.values[:,1:]
+    sparse = (dd==-1).sum(axis=1)
+    Nsparse = float((sparse>0).sum())
+    print(Nsparse)
+    Ntotal = float(dd.shape[0])
+    fraction_dense = (Ntotal - Nsparse) / Ntotal
+    print('Nsparse', Nsparse)
+    print('Ntotal', Ntotal)
+    print('fraction_dense', fraction_dense)
+    
+    #Look at distribution of INTERMEDIATE gap lengths
+    #ignore the leading / lagging unfilled since could just be from the series 
+    #not officially starting yet, or it got closed out.
+    all_gaps = []
+    for row in dd:
+        inds = np.where(row!=-1)[0]
+        x = np.diff(inds)
+        t = list(x[x>1])
+        if len(t)>0:
+            all_gaps.extend(t)
+    make_cdf(all_gaps)
+
+
+
+
+
+
 def remove_seasonal_blocks(df):
     """
     For places in the data where there are missing gaps of length > 1 seasonality,
@@ -189,7 +245,7 @@ def format_like_Kaggle(df, myDataDir, imputation_method, start_date=None, end_da
     
     def make_train_csv(df, save_path, imputation_method, start_date, end_date):
         """
-        Make the train_1.csv
+        Make the train_2.csv
         """
         #Rename columns to be as in Kaggle data:
         df.rename(columns={'id':'Page'},inplace=True)
@@ -235,6 +291,11 @@ def make_train_csv(df, save_path, imputation_method, start_date, end_date):
         df = df[cols[-1:]+cols[:-1]]
         df.reset_index(drop=True,inplace=True)
         
+        
+        #Just for analysis: look at kinds of gaps in series    
+        __missing_vals_distribution(df)     
+            
+        
         #Imputation, dealing with missing seasonality blocks / out of phase
         df = do_imputation(df,imputation_method)
 
@@ -253,12 +314,15 @@ def make_key_csv(df):
     
     
     #Make the train csv [for now just do 1, ignore the train 2 part ???]
-    save_path = os.path.join(os.path.split(myDataDir)[0],'train_1_my_data.csv')
+    save_path = os.path.join(os.path.split(myDataDir)[0],'train_2[ours].csv')
     df = make_train_csv(df, save_path, imputation_method, start_date, end_date)
 
     #For the prediction phase, need the key ????
 #    make_key_csv(df)
     
+    
+   
+    
     return df
 
 
@@ -303,4 +367,4 @@ def make_key_csv(df):
     #Put into same format as used by Kaggle, save out csv's    
     df = format_like_Kaggle(df, myDataDir, imputation_method, start_date=START_DATE, end_date=END_DATE)
     
-    
+
diff --git a/QUICKLOOK.py b/QUICKLOOK.py
old mode 100644
new mode 100755
diff --git a/Readme.md b/Readme.md
old mode 100644
new mode 100755
index d651938..b8b7841
--- a/Readme.md
+++ b/Readme.md
@@ -46,11 +46,11 @@ GK modifications for own data:
 2. $source activate gktf
 3. $cd ..../kaggle-web-traffic
 4. $python3 PREPROCESS.py
-5. $python3 make_features.py data/vars --add_days=63
+5. $python3 make_features.py data/kaggle/vars kaggle --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full} depending on using default Arturius kaggle vs. own custom for this application
 6. $python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
 7. $python3 PREDICT.py
 
-- confirmed it runs with 2 layers stacked, or with attention mechanism. Performance is worse in both cases, at least initially.
+- confirmed it runs with 2 layers stacked GRU (for both encoder and decoder modules), or with attention mechanism. Performance is worse in both cases [SMAPE], at least initially.
 
 
 To do:
diff --git a/cocob.py b/cocob.py
old mode 100644
new mode 100755
diff --git a/extractor.py b/extractor.py
old mode 100644
new mode 100755
diff --git a/feeder.py b/feeder.py
old mode 100644
new mode 100755
diff --git a/input_pipe.py b/input_pipe.py
old mode 100644
new mode 100755
diff --git a/make_features.py b/make_features.py
old mode 100644
new mode 100755
index 23d547f..82664cd
--- a/make_features.py
+++ b/make_features.py
@@ -28,7 +28,7 @@ def read_cached(name) -> pd.DataFrame:
                 return df
 
 
-def read_all() -> pd.DataFrame:
+def read_all(data_type) -> pd.DataFrame:
     """
     Reads source data for training/prediction
     """
@@ -43,40 +43,43 @@ def read_file(file):
         df = pd.read_pickle(path)
     else:
         # Official data
-        df = read_file('train_2')
-        # Scraped data
-        scraped = read_file('2017-08-15_2017-09-11')
-        # Update last two days by scraped data
-        df[pd.Timestamp('2017-09-10')] = scraped['2017-09-10']
-        df[pd.Timestamp('2017-09-11')] = scraped['2017-09-11']
+        filename = f'train_2[{data_type}]'
+        df = read_file(filename)
+        
+        if data_type=='kaggle':
+            # Scraped data
+            scraped = read_file('2017-08-15_2017-09-11')
+            # Update last two days by scraped data
+            df[pd.Timestamp('2017-09-10')] = scraped['2017-09-10']
+            df[pd.Timestamp('2017-09-11')] = scraped['2017-09-11']
 
         df = df.sort_index()
         # Cache result
         df.to_pickle(path)
     return df
 
-# todo:remove
-def make_holidays(tagged, start, end) -> pd.DataFrame:
-    def read_df(lang):
-        result = pd.read_pickle('data/holidays/%s.pkl' % lang)
-        return result[~result.dw].resample('D').size().rename(lang)
+## todo:remove
+#def make_holidays(tagged, start, end) -> pd.DataFrame:
+#    def read_df(lang):
+#        result = pd.read_pickle('data/holidays/%s.pkl' % lang)
+#        return result[~result.dw].resample('D').size().rename(lang)
+#
+#    holidays = pd.DataFrame([read_df(lang) for lang in ['en']])#['de', 'en', 'es', 'fr', 'ja', 'ru', 'zh']]) #!!!!!!!!!!! can play around with this: english only
+#    holidays = holidays.loc[:, start:end].fillna(0)
+#    result =tagged[['country']].join(holidays, on='country').drop('country', axis=1).fillna(0).astype(np.int8)
+#    result.columns = pd.DatetimeIndex(result.columns.values)
+#    return result
 
-    holidays = pd.DataFrame([read_df(lang) for lang in ['de', 'en', 'es', 'fr', 'ja', 'ru', 'zh']])
-    holidays = holidays.loc[:, start:end].fillna(0)
-    result =tagged[['country']].join(holidays, on='country').drop('country', axis=1).fillna(0).astype(np.int8)
-    result.columns = pd.DatetimeIndex(result.columns.values)
-    return result
 
-
-def read_x(start, end) -> pd.DataFrame:
+def read_x(start, end, data_type) -> pd.DataFrame:
     """
     Gets source data from start to end date. Any date can be None
     """
-    df = read_all()
+    df = read_all(data_type)
     # User GoogleAnalitycsRoman has really bad data with huge traffic spikes in all incarnations.
     # Wikipedia banned him, we'll ban it too
-    bad_roman = df.index.str.startswith("User:GoogleAnalitycsRoman")
-    df = df[~bad_roman]
+#    bad_roman = df.index.str.startswith("User:GoogleAnalitycsRoman")
+#    df = df[~bad_roman]
     if start and end:
         return df.loc[:, start:end]
     elif end:
@@ -164,7 +167,7 @@ def find_start_end(data: np.ndarray):
     return start_idx, end_idx
 
 
-def prepare_data(start, end, valid_threshold) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]:
+def prepare_data(start, end, valid_threshold, data_type) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]:
     """
     Reads source data, calculates start and end of each series, drops bad series, calculates log1p(series)
     :param start: start date of effective time interval, can be None to start from beginning
@@ -173,7 +176,7 @@ def prepare_data(start, end, valid_threshold) -> Tuple[pd.DataFrame, pd.DataFram
     ratio is less than threshold
     :return: tuple(log1p(series), nans, series start, series end)
     """
-    df = read_x(start, end)
+    df = read_x(start, end, data_type)
     starts, ends = find_start_end(df.values)
     # boolean mask for bad (too short) series
     page_mask = (ends - starts) / df.shape[1] < valid_threshold
@@ -262,6 +265,10 @@ def normalize(values: np.ndarray):
 def run():
     parser = argparse.ArgumentParser(description='Prepare data')
     parser.add_argument('data_dir')
+    
+    parser.add_argument('data_type', help="Which data set to use: {'kaggle','ours'}")
+    parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full'}")
+
     parser.add_argument('--valid_threshold', default=0.0, type=float, help="Series minimal length threshold (pct of data length)")
     parser.add_argument('--add_days', default=64, type=int, help="Add N days in a future for prediction")
     parser.add_argument('--start', help="Effective start date. Data before the start is dropped")
@@ -269,8 +276,11 @@ def run():
     parser.add_argument('--corr_backoffset', default=0, type=int, help='Offset for correlation calculation')
     args = parser.parse_args()
 
+    print(args.data_dir, args.data_type, args.features_set)
+
     # Get the data
-    df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold)
+    df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type)
+    
 
     # Our working date range
     data_start, data_end = df.columns[0], df.columns[-1]
@@ -281,7 +291,11 @@ def run():
 
     # Group unique pages by agents
     assert df.index.is_monotonic_increasing
-    page_map = uniq_page_map(df.index.values)
+    #Only do this wikipedia web scraping if doing the kaggle comp. Not for ours
+    if args.data_type=='kaggle':
+        page_map = uniq_page_map(df.index.values)
+
+
 
     # Yearly(annual) autocorrelation
     raw_year_autocorr = batch_autocorr(df.values, 365, starts, ends, 1.5, args.corr_backoffset)
@@ -298,39 +312,99 @@ def run():
     quarter_autocorr = normalize(np.nan_to_num(raw_quarter_autocorr))
 
     # Calculate and encode page features
-    page_features = make_page_features(df.index.values)
-    encoded_page_features = encode_page_features(page_features)
+    if args.data_type=='kaggle':
+        page_features = make_page_features(df.index.values)
+        encoded_page_features = encode_page_features(page_features)
 
     # Make time-dependent features
     features_days = pd.date_range(data_start, features_end)
     #dow = normalize(features_days.dayofweek.values)
     week_period = 7 / (2 * np.pi)
-    dow_norm = features_days.dayofweek.values / week_period
+    dow_norm = features_days.dayofweek.values / week_period #S.dayofweek gives day of the week with Monday=0, Sunday=6
     dow = np.stack([np.cos(dow_norm), np.sin(dow_norm)], axis=-1)
-
+    
+    #index of week number
+    year_period = 52. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]
+    woy_norm = features_days.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday
+    woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)
+    
+    
     # Assemble indices for quarterly lagged data
     lagged_ix = np.stack(lag_indexes(data_start, features_end), axis=-1)
 
-    page_popularity = df.median(axis=1)
-    page_popularity = (page_popularity - page_popularity.mean()) / page_popularity.std()
+
+    count_median = df.median(axis=1)
+    count_median = normalize(count_median)
+
+
+    #Play around w a few other basic summary stats
+    percentiles = []
+    for pctl in [5,25,75,95]:
+        percentiles.append(normalize(np.percentile(df.values,pctl,axis=1)))
+    count_variance = normalize(np.var(df.values,axis=1))
+    #filled_len = df.values.shape[1] - np.count_nonzero(np.isnan(df.values),axis=1) #non-nans
+    #series_length = (df.values>0).sum(axis=1) #actually it has already been log transofmred so this is not correct
+
 
     # Put NaNs back
     df[nans] = np.NaN
 
-    # Assemble final output
-    tensors = dict(
-        hits=df,
-        lagged_ix=lagged_ix,
-        page_map=page_map,
-        page_ix=df.index.values,
-        pf_agent=encoded_page_features['agent'],
-        pf_country=encoded_page_features['country'],
-        pf_site=encoded_page_features['site'],
-        page_popularity=page_popularity,
-        year_autocorr=year_autocorr,
-        quarter_autocorr=quarter_autocorr,
-        dow=dow,
-    )
+
+    #Compile the features
+    print(f'Using {args.features_set} set of features')
+
+    if args.features_set == 'arturius':
+        tensors = dict(
+            hits=df,
+            lagged_ix=lagged_ix,
+            page_map=page_map,
+            page_ix=df.index.values,
+            pf_agent=encoded_page_features['agent'],#ll-access_all-agents  all-access_spider  desktop_all-agents  mobile-web_all-agents
+            pf_country=encoded_page_features['country'],#de        en        es        fr        ja        ru        zh
+            pf_site=encoded_page_features['site'], #commons.wikimedia.org  wikipedia.org  www.mediawiki.org
+            count_median=count_median,
+            year_autocorr=year_autocorr,
+            quarter_autocorr=quarter_autocorr,
+            dow=dow,#N x 2 array since encoded week periodicity as complex number
+        )
+    
+    elif args.features_set == 'simple':
+        tensors = dict(
+            hits=df,
+            count_median=count_median,#this is just the median feature, can put in others too
+            dow=dow,
+        )    
+        
+    elif args.features_set == 'full':
+        tensors = dict(
+            hits=df,
+            page_ix=df.index.values,
+
+            year_autocorr=year_autocorr,
+            quarter_autocorr=quarter_autocorr,
+            count_median=count_median,#this is just the median feature, can put in others too
+            count_variance=count_variance,#variance
+            
+            #percentiles
+            count_pctl_5=percentiles[0],#5th percentile
+            count_pctl_25=percentiles[1],#25th percentile
+            count_pctl_75=percentiles[2],#75th percentile
+            count_pctl_95=percentiles[3],#95th percentile
+#            series_length=series_length,#length of series [number of samples] to get idea of how much history a series has #number nonzero
+            
+            #Other time-frequency/scale features
+            #...
+            
+            #N x 2 array since encoded week periodicity as complex number
+            dow=dow,
+            woy=woy,#and want want week number too, aggregating last ~10 days into week 52
+        )     
+    else:
+        raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full"]')
+    
+    
+    
+    
     plain = dict(
         features_days=len(features_days),
         data_days=len(df.columns),
@@ -341,6 +415,10 @@ def run():
 
     )
 
+
+    print(tensors)
+    print(plain)
+
     # Store data to the disk
     VarFeeder(args.data_dir, tensors, plain)
 
diff --git a/model.py b/model.py
old mode 100644
new mode 100755
diff --git a/trainer.py b/trainer.py
old mode 100644
new mode 100755

From 6b48e096b8fbac1968807dcf599b2dc7177ac743 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Wed, 27 Jun 2018 18:11:58 -0700
Subject: [PATCH 04/42] sampling_period daily weekly etc

---
 PREPROCESS.py    |  25 +++++++--
 Readme.md        |   5 +-
 make_features.py | 130 +++++++++++++++++++++++++++++++++--------------
 3 files changed, 114 insertions(+), 46 deletions(-)

diff --git a/PREPROCESS.py b/PREPROCESS.py
index 1331230..6c268b8 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -235,7 +235,7 @@ def imputation__simple(df,imputation_method):
 
 
-def format_like_Kaggle(df, myDataDir, imputation_method, start_date=None, end_date=None):
+def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, start_date=None, end_date=None):
     """
     Take my data and format it exactly as needed to use for the Kaggle seq2seq
     model [requires making train_1.csv, train_2.csv, key_1.csv, key_2.csv]
@@ -243,10 +243,20 @@ def format_like_Kaggle(df, myDataDir, imputation_method, start_date=None, end_da
     """
     
     
-    def make_train_csv(df, save_path, imputation_method, start_date, end_date):
+    def make_train_csv(df, save_path, imputation_method, sampling_period, start_date, end_date):
         """
         Make the train_2.csv
         """
+        
+        def aggregate(df, sampling_period):
+            """
+            Aggregate the data (average it) to downsample
+            to desired sample period, e.g. daily measurements -> weekly or monthly.
+            Should smooth out some noise, and help w seasonality.
+            """
+            return df
+        
+        
         #Rename columns to be as in Kaggle data:
         df.rename(columns={'id':'Page'},inplace=True)
         
@@ -298,6 +308,10 @@ def make_train_csv(df, save_path, imputation_method, start_date, end_date):
         
         #Imputation, dealing with missing seasonality blocks / out of phase
         df = do_imputation(df,imputation_method)
+        #Could do impoutation then downsampling, vs. downsampling then imputation ... unclear which is better here in general.
+        #for now assume we do ipmutation THEN aggregation:
+        df = aggregate(df,sampling_period)
+
 
             
         df.to_csv(save_path,index=False)
@@ -314,8 +328,8 @@ def make_key_csv(df):
     
     
     #Make the train csv [for now just do 1, ignore the train 2 part ???]
-    save_path = os.path.join(os.path.split(myDataDir)[0],'train_2[ours].csv')
-    df = make_train_csv(df, save_path, imputation_method, start_date, end_date)
+    save_path = os.path.join(os.path.split(myDataDir)[0],f"train_2[ours_{sampling_period}].csv")
+    df = make_train_csv(df, save_path, imputation_method, sampling_period, start_date, end_date)
 
     #For the prediction phase, need the key ????
 #    make_key_csv(df)
@@ -346,6 +360,7 @@ def make_key_csv(df):
     START_DATE = '2015-01-01' #None
     END_DATE = '2017-12-31' #None
     REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful
+    SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly'
 
 
@@ -365,6 +380,6 @@ def make_key_csv(df):
     df = remove_cities(df,REMOVE_ID_LIST)
     
     #Put into same format as used by Kaggle, save out csv's    
-    df = format_like_Kaggle(df, myDataDir, imputation_method, start_date=START_DATE, end_date=END_DATE)
+    df = format_like_Kaggle(df, myDataDir, imputation_method, SAMPLING_PERIOD, start_date=START_DATE, end_date=END_DATE)
     
 
diff --git a/Readme.md b/Readme.md
index b8b7841..2f8e2d2 100755
--- a/Readme.md
+++ b/Readme.md
@@ -46,7 +46,7 @@ GK modifications for own data:
 2. $source activate gktf
 3. $cd ..../kaggle-web-traffic
 4. $python3 PREPROCESS.py
-5. $python3 make_features.py data/kaggle/vars kaggle --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full} depending on using default Arturius kaggle vs. own custom for this application
+5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period
 6. $python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
 7. $python3 PREDICT.py
 
@@ -55,7 +55,6 @@ GK modifications for own data:
 
 To do:
 1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks]
-2. modify make_features / InputPipeline / VarFeeder  etc. to NOT do the lagged autocorrelations [if ts too short], to NOT use lagged_x, to NOT use wikipedia specific features.
-Use only features relevant to this data.  Still use the (tiled) median series value (before standard scaling), or few other quantiles, too. Keep day of week, add onehot encoded continent or use country like he has it.
+2. PREPROCESS.py  -  allow downsample in time to weekly, monthly
 3. Prediction intervals
 4. Architecture improvements
\ No newline at end of file
diff --git a/make_features.py b/make_features.py
index 82664cd..9321fe6 100755
--- a/make_features.py
+++ b/make_features.py
@@ -28,7 +28,7 @@ def read_cached(name) -> pd.DataFrame:
                 return df
 
 
-def read_all(data_type) -> pd.DataFrame:
+def read_all(data_type,sampling_period) -> pd.DataFrame:
     """
     Reads source data for training/prediction
     """
@@ -43,7 +43,7 @@ def read_file(file):
         df = pd.read_pickle(path)
     else:
         # Official data
-        filename = f'train_2[{data_type}]'
+        filename = f'train_2[{data_type}_{sampling_period}]'
         df = read_file(filename)
         
         if data_type=='kaggle':
@@ -71,11 +71,11 @@ def read_file(file):
 #    return result
 
 
-def read_x(start, end, data_type) -> pd.DataFrame:
+def read_x(start, end, data_type, sampling_period) -> pd.DataFrame:
     """
     Gets source data from start to end date. Any date can be None
     """
-    df = read_all(data_type)
+    df = read_all(data_type,sampling_period)
     # User GoogleAnalitycsRoman has really bad data with huge traffic spikes in all incarnations.
     # Wikipedia banned him, we'll ban it too
 #    bad_roman = df.index.str.startswith("User:GoogleAnalitycsRoman")
@@ -167,7 +167,7 @@ def find_start_end(data: np.ndarray):
     return start_idx, end_idx
 
 
-def prepare_data(start, end, valid_threshold, data_type) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]:
+def prepare_data(start, end, valid_threshold, data_type, sampling_period) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]:
     """
     Reads source data, calculates start and end of each series, drops bad series, calculates log1p(series)
     :param start: start date of effective time interval, can be None to start from beginning
@@ -176,7 +176,7 @@ def prepare_data(start, end, valid_threshold, data_type) -> Tuple[pd.DataFrame,
     ratio is less than threshold
     :return: tuple(log1p(series), nans, series start, series end)
     """
-    df = read_x(start, end, data_type)
+    df = read_x(start, end, data_type, sampling_period)
     starts, ends = find_start_end(df.values)
     # boolean mask for bad (too short) series
     page_mask = (ends - starts) / df.shape[1] < valid_threshold
@@ -267,7 +267,8 @@ def run():
     parser.add_argument('data_dir')
     
     parser.add_argument('data_type', help="Which data set to use: {'kaggle','ours'}")
-    parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full'}")
+    parser.add_argument('sampling_period', help="Sampling period for our data: {'daily','weekly','monthly'}")
+    parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full','full_w_context'}")
 
     parser.add_argument('--valid_threshold', default=0.0, type=float, help="Series minimal length threshold (pct of data length)")
     parser.add_argument('--add_days', default=64, type=int, help="Add N days in a future for prediction")
@@ -279,9 +280,13 @@ def run():
     print(args.data_dir, args.data_type, args.features_set)
 
     # Get the data
-    df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type)
+    df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type, args.sampling_period)
     
 
+    # =============================================================================
+    # STATIC FEATURES
+    # =============================================================================
+
     # Our working date range
     data_start, data_end = df.columns[0], df.columns[-1]
 
@@ -316,40 +321,64 @@ def run():
         page_features = make_page_features(df.index.values)
         encoded_page_features = encode_page_features(page_features)
 
-    # Make time-dependent features
-    features_days = pd.date_range(data_start, features_end)
-    #dow = normalize(features_days.dayofweek.values)
-    week_period = 7 / (2 * np.pi)
-    dow_norm = features_days.dayofweek.values / week_period #S.dayofweek gives day of the week with Monday=0, Sunday=6
-    dow = np.stack([np.cos(dow_norm), np.sin(dow_norm)], axis=-1)
-    
-    #index of week number
-    year_period = 52. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]
-    woy_norm = features_days.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday
-    woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)
-    
-    
-    # Assemble indices for quarterly lagged data
-    lagged_ix = np.stack(lag_indexes(data_start, features_end), axis=-1)
-
 
+    #To get idea of overall scale of a time series, to compare between time series, which would be lost if just used standard scaled values:
     count_median = df.median(axis=1)
     count_median = normalize(count_median)
-
-
     #Play around w a few other basic summary stats
     percentiles = []
-    for pctl in [5,25,75,95]:
+    for pctl in [0,5,25,75,95,100]:
         percentiles.append(normalize(np.percentile(df.values,pctl,axis=1)))
     count_variance = normalize(np.var(df.values,axis=1))
+    #entropy = normalize(entropy(df.values,axis=1))
     #filled_len = df.values.shape[1] - np.count_nonzero(np.isnan(df.values),axis=1) #non-nans
     #series_length = (df.values>0).sum(axis=1) #actually it has already been log transofmred so this is not correct
 
 
+
+    # =============================================================================
+    # TIME-VARYING FEATURES
+    # =============================================================================
+    
+    if args.sampling_period=='daily':
+        
+        features_days = pd.date_range(data_start, features_end, freq='D')
+        #dow = normalize(features_days.dayofweek.values)
+        week_period = 7 / (2 * np.pi)
+        dow_norm = features_days.dayofweek.values / week_period #S.dayofweek gives day of the week with Monday=0, Sunday=6
+        dow = np.stack([np.cos(dow_norm), np.sin(dow_norm)], axis=-1)
+        
+        #index of week number, when sampling at DAILY level
+        year_period = 53. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
+        woy_norm = features_days.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday
+        woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)
+    
+    
+    if args.sampling_period=='weekly':
+        #index of week number, when sampling at WEEKLY level (this is different than above)
+        fff = pd.date_range(data_start, features_end, freq='W')
+        #!!!!!!!!!!!!! still need to worry about alignment ... 
+        year_period = 53. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
+        woy_norm = fff.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday
+        woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)    
+    
+    
+    if args.sampling_period=='monthly':
+        #month index (only used if sampling monthly)
+        fff = pd.date_range(data_start, features_end, freq='M') #!!!!! need to think about alignment of starting month on particular dates ....
+        period = 12. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
+        moy_norm = fff.month.values / period #not sure if by default this starts on Monday vs Sunday
+        moy = np.stack([np.cos(moy_norm), np.sin(moy_norm)], axis=-1)    
+    
+    
+    
+    
+    # Assemble indices for quarterly lagged data
+    lagged_ix = np.stack(lag_indexes(data_start, features_end), axis=-1)
+
     # Put NaNs back
     df[nans] = np.NaN
 
-
     #Compile the features
     print(f'Using {args.features_set} set of features')
 
@@ -368,6 +397,8 @@ def run():
             dow=dow,#N x 2 array since encoded week periodicity as complex number
         )
     
+    
+    
     elif args.features_set == 'simple':
         tensors = dict(
             hits=df,
@@ -375,7 +406,7 @@ def run():
             dow=dow,
         )    
         
-    elif args.features_set == 'full':
+    elif (args.features_set == 'full') or (args.features_set == 'full_w_context'):
         tensors = dict(
             hits=df,
             page_ix=df.index.values,
@@ -384,23 +415,46 @@ def run():
             quarter_autocorr=quarter_autocorr,
             count_median=count_median,#this is just the median feature, can put in others too
             count_variance=count_variance,#variance
+            #entropy
             
             #percentiles
-            count_pctl_5=percentiles[0],#5th percentile
-            count_pctl_25=percentiles[1],#25th percentile
-            count_pctl_75=percentiles[2],#75th percentile
-            count_pctl_95=percentiles[3],#95th percentile
+            count_pctl_0=percentiles[0],#min
+            count_pctl_5=percentiles[1],#5th percentile
+            count_pctl_25=percentiles[2],#25th percentile
+            count_pctl_75=percentiles[3],#75th percentile
+            count_pctl_95=percentiles[4],#95th percentile
+            count_pctl_100=percentiles[5],#max
 #            series_length=series_length,#length of series [number of samples] to get idea of how much history a series has #number nonzero
             
             #Other time-frequency/scale features
             #...
             
-            #N x 2 array since encoded week periodicity as complex number
-            dow=dow,
-            woy=woy,#and want want week number too, aggregating last ~10 days into week 52
-        )     
+
+        )  
+        
+        if args.sampling_period=='daily':
+            tensors[dow]=dow
+            tensors[woy]=woy #and want want week number too, aggregating last ~10 days into week 52
+        elif args.sampling_period=='weekly':
+            tensors[woy]=woy
+        elif args.sampling_period=='monthly':
+            tensors[moy]=moy
+        else:
+            raise Exception('Must specify correct sampling period')
+            
+            
+        #If provide other info based on e.g. new location (any features that are not derived purely from the time series)
+        if args.features_set == 'full_w_context':
+            tensors['country'] = asdasdasd
+            tensors['region'] = asdasdasd
+            tensors['city_population'] = asdasdasd
+            raise Exception('not implemented yet')
+            #... can write scraper function to get these ...
+        
+        
+        
     else:
-        raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full"]')
+        raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full","full_w_context"]')
     
     
From 2a67dd1b15c3a66a63a871706d5a87590beb3f50 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 28 Jun 2018 11:10:19 -0700
Subject: [PATCH 05/42] py36 fixes

---
 PREPROCESS.py    | 22 +++++++++++++---------
 make_features.py |  8 ++++----
 model.py         |  2 +-
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/PREPROCESS.py b/PREPROCESS.py
index 6c268b8..c2385c3 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -98,9 +98,9 @@ def __missing_vals_distribution(df):
 
     def make_cdf(v):
         c = Counter(v)
-        x = c.keys()
-        x = np.array(x) - 1 #-1 to go from diff in days from present data -> gap length
-        y = c.values()
+        x = list(c.keys())
+        x = np.array(x) -1 #-1 to go from diff in days from present data -> gap length
+        y = list(c.values())
     #    print(c)
         plt.figure()
         #plt.plot(x,y,drawstyle='steps')#,marker='o')
@@ -297,13 +297,15 @@ def aggregate(df, sampling_period):
             df_list.append(dd)
         
         df = pd.concat(df_list,axis=0)
-        cols = df.columns.tolist()
-        df = df[cols[-1:]+cols[:-1]]
+        #cols = df.columns.tolist()
+        #df = df[cols[-1:]+cols[:-1]]
         df.reset_index(drop=True,inplace=True)
         
         
-        #Just for analysis: look at kinds of gaps in series    
-        __missing_vals_distribution(df)     
+        #Just for analysis: look at kinds of gaps in series
+        VERBOSE = False
+        if VERBOSE:
+            __missing_vals_distribution(df)     
             
         
         #Imputation, dealing with missing seasonality blocks / out of phase
@@ -313,7 +315,8 @@ def aggregate(df, sampling_period):
         df = aggregate(df,sampling_period)
 
 
-            
+        #SHould end up with a csv that is rows are series (each id), cols are dates
+        #:eftmost col should be "Pages" to be same as Kaggle format
         df.to_csv(save_path,index=False)
         return df
     
@@ -328,7 +331,8 @@ def make_key_csv(df):
     
     
     #Make the train csv [for now just do 1, ignore the train 2 part ???]
-    save_path = os.path.join(os.path.split(myDataDir)[0],f"train_2[ours_{sampling_period}].csv")
+    #save_path = os.path.join(os.path.split(myDataDir)[0],f"train_2[ours_{sampling_period}].csv")
+    save_path = os.path.join(os.path.split(myDataDir)[0],"train_2[ours_{}].csv".format(sampling_period))
     df = make_train_csv(df, save_path, imputation_method, sampling_period, start_date, end_date)
 
     #For the prediction phase, need the key ????
diff --git a/make_features.py b/make_features.py
index 9321fe6..1acdded 100755
--- a/make_features.py
+++ b/make_features.py
@@ -433,12 +433,12 @@ def run():
         )  
         
         if args.sampling_period=='daily':
-            tensors[dow]=dow
-            tensors[woy]=woy #and want want week number too, aggregating last ~10 days into week 52
+            tensors['dow']=dow
+            tensors['woy']=woy #and want want week number too, aggregating last ~10 days into week 52
         elif args.sampling_period=='weekly':
-            tensors[woy]=woy
+            tensors['woy']=woy
         elif args.sampling_period=='monthly':
-            tensors[moy]=moy
+            tensors['moy']=moy
         else:
             raise Exception('Must specify correct sampling period')
             
diff --git a/model.py b/model.py
index 4d658d8..6b2a8c2 100755
--- a/model.py
+++ b/model.py
@@ -66,7 +66,7 @@ def make_encoder(time_inputs, encoder_features_depth, is_train, hparams, seed, t
     def build_rnn():
         return RNN(num_layers=hparams.encoder_rnn_layers, num_units=hparams.rnn_depth,
                    input_size=encoder_features_depth,
-                   direction='unidirectional',
+                   direction='unidirectional', #Let's try bidirectional as well, or ,ay as well try keeping unidirectional but with order reversed, just see what happens
                    dropout=hparams.encoder_dropout if is_train else 0, seed=seed)
 
     static_p_size = cuda_params_size(build_rnn)

From a681e1ee7bdb46df16da13ce182198b46526c9c3 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Fri, 29 Jun 2018 13:38:08 -0700
Subject: [PATCH 06/42] incorporating my modified features

---
 PREPROCESS.py    |   2 +-
 Readme.md        |   7 +++
 input_pipe.py    | 147 ++++++++++++++++++++++++++++-------------------
 make_features.py |  14 ++---
 trainer.py       |  14 ++---
 5 files changed, 109 insertions(+), 75 deletions(-)

diff --git a/PREPROCESS.py b/PREPROCESS.py
index c2385c3..b1e4ffa 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -332,7 +332,7 @@ def make_key_csv(df):
     
     #Make the train csv [for now just do 1, ignore the train 2 part ???]
     #save_path = os.path.join(os.path.split(myDataDir)[0],f"train_2[ours_{sampling_period}].csv")
-    save_path = os.path.join(os.path.split(myDataDir)[0],"train_2[ours_{}].csv".format(sampling_period))
+    save_path = os.path.join(os.path.split(myDataDir)[0],"train_2_ours_{}.csv".format(sampling_period))
     df = make_train_csv(df, save_path, imputation_method, sampling_period, start_date, end_date)
 
     #For the prediction phase, need the key ????
diff --git a/Readme.md b/Readme.md
index 2f8e2d2..a09eb3d 100755
--- a/Readme.md
+++ b/Readme.md
@@ -47,13 +47,20 @@ GK modifications for own data:
 3. $cd ..../kaggle-web-traffic
 4. $python3 PREPROCESS.py
 5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period
+python3 make_features.py data/vars kaggle daily full --add_days=63
+
+
+#no reason to expect 10000 to 11500 is good range to save out. View loss along the way
 6. $python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
 7. $python3 PREDICT.py
 
 - confirmed it runs with 2 layers stacked GRU (for both encoder and decoder modules), or with attention mechanism. Performance is worse in both cases [SMAPE], at least initially.
 
+- tried bidirectional encoder but has input dimension issues, think about that more later.
 
 To do:
+0. get to work w my features
+0. save log files to view SMAPE etc metrics during training
 1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks]
 2. PREPROCESS.py  -  allow downsample in time to weekly, monthly
 3. Prediction intervals
diff --git a/input_pipe.py b/input_pipe.py
index 7627344..06b8766 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -42,7 +42,7 @@ def __init__(self, tensors: List[tf.Tensor], cluster_indexes: tf.Tensor, n_split
         self.seed = seed
         clustered_index = self.cluster_pages(cluster_indexes)
         index_len = tf.shape(clustered_index)[0]
-        assert_op = tf.assert_equal(index_len, size, message='n_pages is not equals to size of clustered index')
+        assert_op = tf.assert_equal(index_len, size, message='N_time_series is not equals to size of clustered index')
         with tf.control_dependencies([assert_op]):
             split_nitems = int(round(size / n_splits))
             split_size = [split_nitems] * n_splits
@@ -71,65 +71,70 @@ def prepare_split(i):
 
 class FakeSplitter:
     def __init__(self, tensors: List[tf.Tensor], n_splits, seed, test_sampling=1.0):
-        total_pages = tensors[0].shape[0].value
-        n_pages = int(round(total_pages * test_sampling))
+        total_series = tensors[0].shape[0].value
+        N_time_series = int(round(total_series * test_sampling))
 
         def mk_name(prefix, tensor):
             return prefix + '_' + tensor.name[:-2]
 
         def prepare_split(i):
-            idx = tf.random_shuffle(tf.range(0, n_pages, dtype=tf.int32), seed + i)
+            idx = tf.random_shuffle(tf.range(0, N_time_series, dtype=tf.int32), seed + i)
             train_tensors = [tf.gather(tensor, idx, name=mk_name('shfl', tensor)) for tensor in tensors]
             if test_sampling < 1.0:
-                sampled_idx = idx[:n_pages]
+                sampled_idx = idx[:N_time_series]
                 test_tensors = [tf.gather(tensor, sampled_idx, name=mk_name('shfl_test', tensor)) for tensor in tensors]
             else:
                 test_tensors = train_tensors
-            return Split(test_tensors, train_tensors, n_pages, total_pages)
+            return Split(test_tensors, train_tensors, N_time_series, total_series)
 
         self.splits = [prepare_split(i) for i in range(n_splits)]
 
 
 class InputPipe:
-    def cut(self, hits, start, end):
+    def cut(self, counts, start, end):
         """
         Cuts [start:end] diapason from input data
-        :param hits: hits timeseries
+        :param counts: counts timeseries
         :param start: start index
         :param end: end index
-        :return: tuple (train_hits, test_hits, dow, lagged_hits)
+        :return: tuple (train_counts, test_counts, dow, lagged_counts)
         """
-        # Pad hits to ensure we have enough array length for prediction
-        hits = tf.concat([hits, tf.fill([self.predict_window], np.NaN)], axis=0)
-        cropped_hit = hits[start:end]
+        # Pad counts to ensure we have enough array length for prediction
+        counts = tf.concat([counts, tf.fill([self.predict_window], np.NaN)], axis=0)
+        cropped_hit = counts[start:end]
 
         # cut day of week
-        cropped_dow = self.inp.dow[start:end]
-
-        # Cut lagged hits
-        # gather() accepts only int32 indexes
-        cropped_lags = tf.cast(self.inp.lagged_ix[start:end], tf.int32)
-        # Mask for -1 (no data) lag indexes
-        lag_mask = cropped_lags < 0
-        # Convert -1 to 0 for gather(), it don't accept anything exotic
-        cropped_lags = tf.maximum(cropped_lags, 0)
-        # Translate lag indexes to hit values
-        lagged_hit = tf.gather(hits, cropped_lags)
-        # Convert masked (see above) or NaN lagged hits to zeros
-        lag_zeros = tf.zeros_like(lagged_hit)
-        lagged_hit = tf.where(lag_mask | tf.is_nan(lagged_hit), lag_zeros, lagged_hit)
+        if self.inp.dow:
+            cropped_dow = self.inp.dow[start:end] #!!!!!!! only if using dow feature [sampling daily]
+            #!!!!!!!!!!!! do same for moy , woy if using those features
+
+        if self.inp.lagged_ix:
+            # Cut lagged counts
+            # gather() accepts only int32 indexes
+            cropped_lags = tf.cast(self.inp.lagged_ix[start:end], tf.int32)
+            # Mask for -1 (no data) lag indexes
+            lag_mask = cropped_lags < 0
+            # Convert -1 to 0 for gather(), it don't accept anything exotic
+            cropped_lags = tf.maximum(cropped_lags, 0)
+            # Translate lag indexes to count values
+            lagged_hit = tf.gather(counts, cropped_lags)
+            # Convert masked (see above) or NaN lagged counts to zeros
+            lag_zeros = tf.zeros_like(lagged_hit)
+            lagged_hit = tf.where(lag_mask | tf.is_nan(lagged_hit), lag_zeros, lagged_hit)
 
         # Split for train and test
-        x_hits, y_hits = tf.split(cropped_hit, [self.train_window, self.predict_window], axis=0)
+        x_counts, y_counts = tf.split(cropped_hit, [self.train_window, self.predict_window], axis=0)
 
         # Convert NaN to zero in for train data
-        x_hits = tf.where(tf.is_nan(x_hits), tf.zeros_like(x_hits), x_hits)
-        return x_hits, y_hits, cropped_dow, lagged_hit
+        x_counts = tf.where(tf.is_nan(x_counts), tf.zeros_like(x_counts), x_counts)
+        return x_counts, y_counts, cropped_dow, lagged_hit #!!!!!!!!!!!! return other cropped time dependent features as well
 
-    def cut_train(self, hits, *args):
+
+
+    def cut_train(self, counts, *args):
         """
         Cuts a segment of time series for training. Randomly chooses starting point.
-        :param hits: hits timeseries
+        :param counts: counts timeseries
         :param args: pass-through data, will be appended to result
         :return: result of cut() + args
         """
@@ -150,56 +155,72 @@ def cut_train(self, hits, *args):
         offset = tf.random_uniform((), self.start_offset, free_space, dtype=tf.int32, seed=self.rand_seed)
         end = offset + n_days
         # Cut all the things
-        return self.cut(hits, offset, end) + args
+        return self.cut(counts, offset, end) + args
 
-    def cut_eval(self, hits, *args):
+    def cut_eval(self, counts, *args):
         """
         Cuts segment of time series for evaluation.
         Always cuts train_window + predict_window length segment beginning at start_offset point
-        :param hits: hits timeseries
+        :param counts: counts timeseries
         :param args: pass-through data, will be appended to result
         :return: result of cut() + args
         """
         end = self.start_offset + self.train_window + self.predict_window
-        return self.cut(hits, self.start_offset, end) + args
+        return self.cut(counts, self.start_offset, end) + args
 
-    def reject_filter(self, x_hits, y_hits, *args):
+    def reject_filter(self, x_counts, y_counts, *args):
         """
         Rejects timeseries having too many zero datapoints (more than self.max_train_empty)
         """
         if self.verbose:
             print("max empty %d train %d predict" % (self.max_train_empty, self.max_predict_empty))
-        zeros_x = tf.reduce_sum(tf.to_int32(tf.equal(x_hits, 0.0)))
+        zeros_x = tf.reduce_sum(tf.to_int32(tf.equal(x_counts, 0.0)))
         keep = zeros_x <= self.max_train_empty
         return keep
 
-    def make_features(self, x_hits, y_hits, dow, lagged_hits, pf_agent, pf_country, pf_site, page_ix,
-                      page_popularity, year_autocorr, quarter_autocorr):
+    def make_features(self, x_counts, y_counts, dow, lagged_counts, pf_agent, pf_country, pf_site, page_ix,
+                      count_median, year_autocorr, quarter_autocorr): #!!!!!!!!!!!! if kaggle feats as is
         """
         Main method. Assembles input data into final tensors
+        
+        split into 3 sets of features: time-dependent, per series but static, and context features
+        input as dicts
+        ts_dynamic : {x_counts, y_counts, dow, woy, moy, lagged}
+        ts_static: {count_median, other percentiles...,  autocorrelations, }
+        
+            def make_features(self, ts_dynamic, ts_static, context):
+        
         """
+        
         # Split day of week to train and test
-        x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
-
-        # Normalize hits
-        mean = tf.reduce_mean(x_hits)
-        std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_hits, mean)))
-        norm_x_hits = (x_hits - mean) / std
-        norm_y_hits = (y_hits - mean) / std
-        norm_lagged_hits = (lagged_hits - mean) / std
-
-        # Split lagged hits to train and test
-        x_lagged, y_lagged = tf.split(norm_lagged_hits, [self.train_window, self.predict_window], axis=0)
+        if ts_dynamic['dow']:
+            x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
+        if ts_dynamic['woy']:
+            x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0)
+        if ts_dynamic['moy']:
+            x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0)            
+            
+
+        # Normalize counts
+        mean = tf.reduce_mean(x_counts)
+        std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean)))
+        norm_x_counts = (x_counts - mean) / std
+        norm_y_counts = (y_counts - mean) / std
+        norm_lagged_counts = (lagged_counts - mean) / std   #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ??
+
+        # Split lagged counts to train and test
+        if ts_dynamic['lagged_ix']:
+            x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
 
         # Combine all page features into single tensor
-        stacked_features = tf.stack([page_popularity, quarter_autocorr, year_autocorr])
-        flat_page_features = tf.concat([pf_agent, pf_country, pf_site, stacked_features], axis=0)
+        stacked_features = tf.stack([count_median, quarter_autocorr, year_autocorr])#!!!!!!! if kaggle feats. Else need also the oher quntiles too
+        flat_page_features = tf.concat([pf_agent, pf_country, pf_site, stacked_features], axis=0) 
         page_features = tf.expand_dims(flat_page_features, 0)
 
         # Train features
         x_features = tf.concat([
             # [n_days] -> [n_days, 1]
-            tf.expand_dims(norm_x_hits, -1),
+            tf.expand_dims(norm_x_counts, -1),
             x_dow,
             x_lagged,
             # Stretch page_features to all training days
@@ -217,9 +238,15 @@ def make_features(self, x_hits, y_hits, dow, lagged_hits, pf_agent, pf_country,
             tf.tile(page_features, [self.predict_window, 1])
         ], axis=1)
 
-        return x_hits, x_features, norm_x_hits, x_lagged, y_hits, y_features, norm_y_hits, mean, std, flat_page_features, page_ix
+        #!!!!! why no lagged_y alnoe, only in y_features??? 
+        #!!!! why no norm_y_counts ?????
+        return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_page_features, page_ix
+        #later on the above is assigned to:
+        #self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
+        #self.norm_std, self.page_features, self.page_ix = it_tensors
+
 
-    def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], n_pages: int, mode: ModelMode, n_epoch=None,
+    def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None,
                  batch_size=127, runs_in_burst=1, verbose=True, predict_window=60, train_window=500,
                  train_completeness_threshold=1, predict_completeness_threshold=1, back_offset=0,
                  train_skip_first=0, rand_seed=None):
@@ -227,7 +254,7 @@ def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], n_pages: int,
         Create data preprocessing pipeline
         :param inp: Raw input data
         :param features: Features tensors (subset of data in inp)
-        :param n_pages: Total number of pages
+        :param N_time_series: Total number of pages
         :param mode: Train/Predict/Eval mode selector
         :param n_epoch: Number of epochs. Generates endless data stream if None
         :param batch_size:
@@ -242,7 +269,7 @@ def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], n_pages: int,
         :param rand_seed:
 
         """
-        self.n_pages = n_pages
+        self.N_time_series = N_time_series
         self.inp = inp
         self.batch_size = batch_size
         self.rand_seed = rand_seed
@@ -293,7 +320,7 @@ def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], n_pages: int,
 
         # Assign all tensors to class variables
         self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
-        self.norm_std, self.page_features, self.page_ix = it_tensors
+        self.norm_std, self.page_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures
 
         self.encoder_features_depth = self.time_x.shape[2].value
 
@@ -305,5 +332,5 @@ def init_iterator(self, session):
 
 
 def page_features(inp: VarFeeder):
-    return (inp.hits, inp.pf_agent, inp.pf_country, inp.pf_site,
-            inp.page_ix, inp.page_popularity, inp.year_autocorr, inp.quarter_autocorr)
+    return (inp.counts, inp.pf_agent, inp.pf_country, inp.pf_site,#!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures
+            inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr)
diff --git a/make_features.py b/make_features.py
index 1acdded..84e37c5 100755
--- a/make_features.py
+++ b/make_features.py
@@ -43,7 +43,7 @@ def read_file(file):
         df = pd.read_pickle(path)
     else:
         # Official data
-        filename = f'train_2[{data_type}_{sampling_period}]'
+        filename = f'train_2_{data_type}_{sampling_period}'
         df = read_file(filename)
         
         if data_type=='kaggle':
@@ -383,8 +383,10 @@ def run():
     print(f'Using {args.features_set} set of features')
 
     if args.features_set == 'arturius':
+        if args.data_type == 'kaggle':
+            raise Exception('arturius features can only work with data_type "kaggle" since scrapes wikipedia pages')
         tensors = dict(
-            hits=df,
+            counts=df,
             lagged_ix=lagged_ix,
             page_map=page_map,
             page_ix=df.index.values,
@@ -397,19 +399,17 @@ def run():
             dow=dow,#N x 2 array since encoded week periodicity as complex number
         )
     
-    
-    
     elif args.features_set == 'simple':
         tensors = dict(
-            hits=df,
+            counts=df,
             count_median=count_median,#this is just the median feature, can put in others too
             dow=dow,
         )    
         
     elif (args.features_set == 'full') or (args.features_set == 'full_w_context'):
         tensors = dict(
-            hits=df,
-            page_ix=df.index.values,
+            counts=df,
+            page_ix=df.index.values,#!!!!!! 
 
             year_autocorr=year_autocorr,
             quarter_autocorr=quarter_autocorr,
diff --git a/trainer.py b/trainer.py
index 4ecb65b..4d40dbe 100755
--- a/trainer.py
+++ b/trainer.py
@@ -415,10 +415,10 @@ def train(name, hparams, multi_gpu=False, n_models=1, train_completeness_thresho
     with tf.device("/cpu:0"):
         inp = VarFeeder.read_vars("data/vars")
         if side_split:
-            splitter = Splitter(page_features(inp), inp.page_map, 3, train_sampling=train_sampling,
+            splitter = Splitter(page_features(inp), inp.page_map, 3, train_sampling=train_sampling,#!!!!!!!!!!!! will need to edit page_features function    and get rid of page_map
                                 test_sampling=eval_sampling, seed=seed)
         else:
-            splitter = FakeSplitter(page_features(inp), 3, seed=seed, test_sampling=eval_sampling)
+            splitter = FakeSplitter(page_features(inp), 3, seed=seed, test_sampling=eval_sampling) #!!!!!!!!!!!! will need to edit page_features function
 
     real_train_pages = splitter.splits[0].train_size
     real_eval_pages = splitter.splits[0].test_size
@@ -440,7 +440,7 @@ def create_model(scope, index, prefix, seed):
         with tf.variable_scope('input') as inp_scope:
             with tf.device("/cpu:0"):
                 split = splitter.splits[index]
-                pipe = InputPipe(inp, features=split.train_set, n_pages=split.train_size,
+                pipe = InputPipe(inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features
                                  mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose,
                                  train_completeness_threshold=train_completeness_threshold,
                                  predict_completeness_threshold=train_completeness_threshold, train_window=train_window,
@@ -449,7 +449,7 @@ def create_model(scope, index, prefix, seed):
                                  back_offset=predict_window if forward_split else 0)
                 inp_scope.reuse_variables()
                 if side_split:
-                    side_eval_pipe = InputPipe(inp, features=split.test_set, n_pages=split.test_size,
+                    side_eval_pipe = InputPipe(inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
                                                mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                verbose=verbose, predict_window=predict_window,
                                                train_completeness_threshold=0.01, predict_completeness_threshold=0,
@@ -458,7 +458,7 @@ def create_model(scope, index, prefix, seed):
                 else:
                     side_eval_pipe = None
                 if forward_split:
-                    forward_eval_pipe = InputPipe(inp, features=split.test_set, n_pages=split.test_size,
+                    forward_eval_pipe = InputPipe(inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
                                                   mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                   verbose=verbose, predict_window=predict_window,
                                                   train_completeness_threshold=0.01, predict_completeness_threshold=0,
@@ -581,7 +581,7 @@ def ema_vars(model):
 
         for epoch in range(max_epoch):
 
-            # n_steps = pusher.n_pages // batch_size
+            # n_steps = pusher.N_time_series // batch_size
             if tqdm:
                 tqr = trange(steps_per_epoch, desc="%2d" % (epoch + 1), leave=False)
             else:
@@ -665,7 +665,7 @@ def predict(checkpoints, hparams, return_x=False, verbose=False, predict_window=
     with tf.variable_scope('input') as inp_scope:
         with tf.device("/cpu:0"):
             inp = VarFeeder.read_vars("data/vars")
-            pipe = InputPipe(inp, page_features(inp), inp.n_pages, mode=ModelMode.PREDICT, batch_size=batch_size,
+            pipe = InputPipe(inp, page_features(inp), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features
                              n_epoch=1, verbose=verbose,
                              train_completeness_threshold=0.01,
                              predict_window=predict_window,

From 0e7b6282988cf407952bbb2b6f933c1c0d799a91 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 2 Jul 2018 09:48:07 -0700
Subject: [PATCH 07/42] continue putting in my features

---
 PREDICT.py       |   6 ++-
 input_pipe.py    | 119 ++++++++++++++++++++++++++++++++++++++---------
 make_features.py |  16 +++----
 trainer.py       |  19 ++++----
 4 files changed, 120 insertions(+), 40 deletions(-)

diff --git a/PREDICT.py b/PREDICT.py
index 0617d65..e8adbb6 100755
--- a/PREDICT.py
+++ b/PREDICT.py
@@ -25,6 +25,10 @@
 
 
+FEATURES_SET = 'arturius'# 'arturius' 'simple' 'full'
+
+
+
 
 # =============================================================================
 # Performance Metrics
@@ -65,7 +69,7 @@ def mean_smape(true, pred):
 t_preds = []
 for tm in range(3):
     tf.reset_default_graph()
-    t_preds.append(predict(paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63,
+    t_preds.append(predict(FEATURES_SET, paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63,
                     n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True))
 
 
diff --git a/input_pipe.py b/input_pipe.py
index 06b8766..ce9c59c 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -190,16 +190,27 @@ def make_features(self, x_counts, y_counts, dow, lagged_counts, pf_agent, pf_cou
         
             def make_features(self, ts_dynamic, ts_static, context):
         
-        """
-        
+                
         # Split day of week to train and test
         if ts_dynamic['dow']:
             x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
         if ts_dynamic['woy']:
             x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0)
         if ts_dynamic['moy']:
-            x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0)            
-            
+            x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0)                    
+                
+        """
+        
+        
+        
+        if self.sampling_period == 'daily':
+            x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
+            x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func
+        elif self.sampling_period == 'weekly':
+            x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0)
+        elif self.sampling_period == 'monthly':
+            x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0)
+
 
         # Normalize counts
         mean = tf.reduce_mean(x_counts)
@@ -208,14 +219,37 @@ def make_features(self, ts_dynamic, ts_static, context):
         norm_y_counts = (y_counts - mean) / std
         norm_lagged_counts = (lagged_counts - mean) / std   #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ??
 
-        # Split lagged counts to train and test
-        if ts_dynamic['lagged_ix']:
+
+        if self.features_set == 'arturius':
+            # Split lagged counts to train and test
             x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
+    
+            # Combine all page features into single tensor
+            scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr])#!!!!!!! if kaggle feats. Else need also the oher quntiles too
+            flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
+            series_features = tf.expand_dims(flat_features, 0)
+
 
-        # Combine all page features into single tensor
-        stacked_features = tf.stack([count_median, quarter_autocorr, year_autocorr])#!!!!!!! if kaggle feats. Else need also the oher quntiles too
-        flat_page_features = tf.concat([pf_agent, pf_country, pf_site, stacked_features], axis=0) 
-        page_features = tf.expand_dims(flat_page_features, 0)
+
+        if self.features_set == 'full':
+            # Split lagged counts to train and test
+            x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
+    
+            # Combine all page features into single tensor
+            
+            scalar_features = tf.stack([count_median, count_variance, \
+                                        count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, \
+                                        quarter_autocorr, year_autocorr])
+            flat_features = tf.concat([scalar_features], axis=0) 
+            series_features = tf.expand_dims(flat_features, 0)
+
+        #!!!!!!! also do for simple, full w context 
+        #....
+
+
+
+        #Any time dependent feature need to be split into x [train] and y [test]
+        #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths
 
         # Train features
         x_features = tf.concat([
@@ -223,9 +257,9 @@ def make_features(self, ts_dynamic, ts_static, context):
             tf.expand_dims(norm_x_counts, -1),
             x_dow,
             x_lagged,
-            # Stretch page_features to all training days
+            # Stretch series_features to all training days
             # [1, features] -> [n_days, features]
-            tf.tile(page_features, [self.train_window, 1])
+            tf.tile(series_features, [self.train_window, 1])
         ], axis=1)
 
         # Test features
@@ -233,20 +267,20 @@ def make_features(self, ts_dynamic, ts_static, context):
             # [n_days] -> [n_days, 1]
             y_dow,
             y_lagged,
-            # Stretch page_features to all testing days
+            # Stretch series_features to all testing days
             # [1, features] -> [n_days, features]
-            tf.tile(page_features, [self.predict_window, 1])
+            tf.tile(series_features, [self.predict_window, 1])
         ], axis=1)
 
         #!!!!! why no lagged_y alnoe, only in y_features??? 
         #!!!! why no norm_y_counts ?????
-        return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_page_features, page_ix
+        return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix
         #later on the above is assigned to:
         #self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
-        #self.norm_std, self.page_features, self.page_ix = it_tensors
+        #self.norm_std, self.series_features, self.page_ix = it_tensors
 
 
-    def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None,
+    def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None,
                  batch_size=127, runs_in_burst=1, verbose=True, predict_window=60, train_window=500,
                  train_completeness_threshold=1, predict_completeness_threshold=1, back_offset=0,
                  train_skip_first=0, rand_seed=None):
@@ -269,6 +303,10 @@ def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series:
         :param rand_seed:
 
         """
+        
+        self.features_set = features_set
+        self.sampling_period = sampling_period
+        
         self.N_time_series = N_time_series
         self.inp = inp
         self.batch_size = batch_size
@@ -319,8 +357,16 @@ def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series:
         it_tensors = self.iterator.get_next()
 
         # Assign all tensors to class variables
-        self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
-        self.norm_std, self.page_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures
+        if self.features_set=='arturius':
+            self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
+            self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures
+        if self.features_set=='simple':
+            pass
+        if self.features_set=='full':
+            pass
+        if self.features_set=='full_w_context':
+            pass
+        
 
         self.encoder_features_depth = self.time_x.shape[2].value
 
@@ -331,6 +377,35 @@ def init_iterator(self, session):
         session.run(self.iterator.initializer)
 
 
-def page_features(inp: VarFeeder):
-    return (inp.counts, inp.pf_agent, inp.pf_country, inp.pf_site,#!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures
-            inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr)
+def page_features(inp: VarFeeder, features_set):
+    
+    if features_set=='arturius':
+        d = (inp.counts, inp.pf_agent, inp.pf_country, inp.pf_site,
+                inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr)
+        
+    elif features_set=='simple':
+        raise Exception('not ready yet')
+    elif features_set=='full':
+        d = (inp.counts,
+            inp.count_median, inp.count_variance,
+            inp.count_pctl_0,
+            inp.count_pctl_5,
+            inp.count_pctl_25,
+            inp.count_pctl_75,
+            inp.count_pctl_95,
+            inp.count_pctl_100,
+                inp.page_ix, inp.year_autocorr, inp.quarter_autocorr)
+    elif features_set=='full_w_context':
+        raise Exception('not ready yet')
+    
+    
+    #!!!! does it actually need the dow, moy features???
+    #if this is required then would need the sample_period as an input to this function [follw pattern of features_set]
+    """if sample_period=='daily':
+        d += (inp.dow,inp.woy)
+    elif sample_period=='weekly':
+        d += (inp.dow,inp.woy)
+    elif sample_period=='monthly':
+        d += (inp.dow,inp.woy)"""  
+    
+    return d
\ No newline at end of file
diff --git a/make_features.py b/make_features.py
index 84e37c5..c4fe151 100755
--- a/make_features.py
+++ b/make_features.py
@@ -110,7 +110,7 @@ def single_autocorr(series, lag):
 def batch_autocorr(data, lag, starts, ends, threshold, backoffset=0):
     """
     Calculate autocorrelation for batch (many time series at once)
-    :param data: Time series, shape [n_pages, n_days]
+    :param data: Time series, shape [N_time_series, n_days]
     :param lag: Autocorrelation lag
     :param starts: Start index for each series
     :param ends: End index for each series
@@ -146,14 +146,14 @@ def find_start_end(data: np.ndarray):
     """
     Calculates start and end of real traffic data. Start is an index of first non-zero, non-NaN value,
      end is index of last non-zero, non-NaN value
-    :param data: Time series, shape [n_pages, n_days]
+    :param data: Time series, shape [N_time_series, n_days]
     :return:
     """
-    n_pages = data.shape[0]
+    N_time_series = data.shape[0]
     n_days = data.shape[1]
-    start_idx = np.full(n_pages, -1, dtype=np.int32)
-    end_idx = np.full(n_pages, -1, dtype=np.int32)
-    for page in range(n_pages):
+    start_idx = np.full(N_time_series, -1, dtype=np.int32)
+    end_idx = np.full(N_time_series, -1, dtype=np.int32)
+    for page in range(N_time_series):
         # scan from start to the end
         for day in range(n_days):
             if not np.isnan(data[page, day]) and data[page, day] > 0:
@@ -248,7 +248,7 @@ def encode_page_features(df) -> Dict[str, pd.DataFrame]:
     """
     Applies one-hot encoding to page features and normalises result
     :param df: page features DataFrame (one column per feature)
-    :return: dictionary feature_name:encoded_values. Encoded values is [n_pages,n_values] array
+    :return: dictionary feature_name:encoded_values. Encoded values is [N_time_series,n_values] array
     """
     def encode(column) -> pd.DataFrame:
         one_hot = pd.get_dummies(df[column], drop_first=False)
@@ -462,7 +462,7 @@ def run():
     plain = dict(
         features_days=len(features_days),
         data_days=len(df.columns),
-        n_pages=len(df),
+        N_time_series=len(df),
         data_start=data_start,
         data_end=data_end,
         features_end=features_end
diff --git a/trainer.py b/trainer.py
index 4d40dbe..2c33773 100755
--- a/trainer.py
+++ b/trainer.py
@@ -396,7 +396,7 @@ def process_eval_results(self, run_results, offset, global_step, epoch):
         return mae, smape, new_best, smooth_mae, smooth_smape
 
 
-def train(name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01,
+def train(features_set, name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01,
           seed=None, logdir='data/logs', max_epoch=100, patience=2, train_sampling=1.0,
           eval_sampling=1.0, eval_memsize=5, gpu=0, gpu_allow_growth=False, save_best_model=False,
           forward_split=False, write_summaries=False, verbose=False, asgd_decay=None, tqdm=True,
@@ -415,10 +415,10 @@ def train(name, hparams, multi_gpu=False, n_models=1, train_completeness_thresho
     with tf.device("/cpu:0"):
         inp = VarFeeder.read_vars("data/vars")
         if side_split:
-            splitter = Splitter(page_features(inp), inp.page_map, 3, train_sampling=train_sampling,#!!!!!!!!!!!! will need to edit page_features function    and get rid of page_map
+            splitter = Splitter(page_features(inp, features_set), inp.page_map, 3, train_sampling=train_sampling,#!!!!!!!!!!!! will need to edit page_features function    and get rid of page_map
                                 test_sampling=eval_sampling, seed=seed)
         else:
-            splitter = FakeSplitter(page_features(inp), 3, seed=seed, test_sampling=eval_sampling) #!!!!!!!!!!!! will need to edit page_features function
+            splitter = FakeSplitter(page_features(inp, features_set), 3, seed=seed, test_sampling=eval_sampling) #!!!!!!!!!!!! will need to edit page_features function
 
     real_train_pages = splitter.splits[0].train_size
     real_eval_pages = splitter.splits[0].test_size
@@ -440,7 +440,7 @@ def create_model(scope, index, prefix, seed):
         with tf.variable_scope('input') as inp_scope:
             with tf.device("/cpu:0"):
                 split = splitter.splits[index]
-                pipe = InputPipe(inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features
+                pipe = InputPipe(features_set, inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features
                                  mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose,
                                  train_completeness_threshold=train_completeness_threshold,
                                  predict_completeness_threshold=train_completeness_threshold, train_window=train_window,
@@ -449,7 +449,7 @@ def create_model(scope, index, prefix, seed):
                                  back_offset=predict_window if forward_split else 0)
                 inp_scope.reuse_variables()
                 if side_split:
-                    side_eval_pipe = InputPipe(inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
+                    side_eval_pipe = InputPipe(features_set, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
                                                mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                verbose=verbose, predict_window=predict_window,
                                                train_completeness_threshold=0.01, predict_completeness_threshold=0,
@@ -458,7 +458,7 @@ def create_model(scope, index, prefix, seed):
                 else:
                     side_eval_pipe = None
                 if forward_split:
-                    forward_eval_pipe = InputPipe(inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
+                    forward_eval_pipe = InputPipe(features_set, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
                                                   mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                   verbose=verbose, predict_window=predict_window,
                                                   train_completeness_threshold=0.01, predict_completeness_threshold=0,
@@ -660,12 +660,12 @@ def ema_vars(model):
         return np.mean(best_epoch_smape, dtype=np.float64)
 
 
-def predict(checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1,
+def predict(features_set, checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1,
             target_model=0, asgd=False, seed=1, batch_size=1024):
     with tf.variable_scope('input') as inp_scope:
         with tf.device("/cpu:0"):
             inp = VarFeeder.read_vars("data/vars")
-            pipe = InputPipe(inp, page_features(inp), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features
+            pipe = InputPipe(features_set, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features
                              n_epoch=1, verbose=verbose,
                              train_completeness_threshold=0.01,
                              predict_window=predict_window,
@@ -744,6 +744,7 @@ def predict(checkpoints, hparams, return_x=False, verbose=False, predict_window=
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Train the model')
+    parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full','full_w_context'}")
     parser.add_argument('--name', default='s32', help='Model name to identify different logs/checkpoints')
     parser.add_argument('--hparam_set', default='s32', help="Hyperparameters set to use (see hparams.py for available sets)")
     parser.add_argument('--n_models', default=1, type=int, help="Jointly train n models with different seeds")
@@ -782,5 +783,5 @@ def predict(checkpoints, hparams, return_x=False, verbose=False, predict_window=
     #                save_from_step=10500)
 
     # print("Training result:", result)
-    # preds = predict('data/cpt/fair_365-15428', 380, hparams, verbose=True, back_offset=60, n_models=3)
+    # preds = PREDICT('data/cpt/fair_365-15428', 380, hparams, verbose=True, back_offset=60, n_models=3)
     # print(preds)

From ad816133dfcc12811ae815296caf61bf7c9f47e0 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 2 Jul 2018 12:01:31 -0700
Subject: [PATCH 08/42] working with few example our features

---
 Readme.md        | 10 ++++++++--
 input_pipe.py    | 32 ++++++++++++++++++++++----------
 make_features.py |  5 ++++-
 trainer.py       | 19 ++++++++++---------
 4 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/Readme.md b/Readme.md
index a09eb3d..21af290 100755
--- a/Readme.md
+++ b/Readme.md
@@ -51,15 +51,21 @@ python3 make_features.py data/vars kaggle daily full --add_days=63
 
 
 #no reason to expect 10000 to 11500 is good range to save out. View loss along the way
-6. $python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
+python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
+--name TEST_attn_head --hparam_set=TEST_attn_head
+--name TEST_stacked --hparam_set=TEST_stacked
+
+
 7. $python3 PREDICT.py
 
 - confirmed it runs with 2 layers stacked GRU (for both encoder and decoder modules), or with attention mechanism. Performance is worse in both cases [SMAPE], at least initially.
 
 - tried bidirectional encoder but has input dimension issues, think about that more later.
 
+
+
 To do:
-0. get to work w my features
+0.  -- got working with few examples of our added features (one static, one time varying 2D), now just organize programmatically
 0. save log files to view SMAPE etc metrics during training
 1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks]
 2. PREPROCESS.py  -  allow downsample in time to weekly, monthly
diff --git a/input_pipe.py b/input_pipe.py
index ce9c59c..9459f9d 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -105,8 +105,11 @@ def cut(self, counts, start, end):
 
         # cut day of week
         if self.inp.dow:
-            cropped_dow = self.inp.dow[start:end] #!!!!!!! only if using dow feature [sampling daily]
-            #!!!!!!!!!!!! do same for moy , woy if using those features
+            cropped_dow = self.inp.dow[start:end]
+        if self.inp.woy:
+            cropped_woy = self.inp.woy[start:end]
+
+
 
         if self.inp.lagged_ix:
             # Cut lagged counts
@@ -127,7 +130,7 @@ def cut(self, counts, start, end):
 
         # Convert NaN to zero in for train data
         x_counts = tf.where(tf.is_nan(x_counts), tf.zeros_like(x_counts), x_counts)
-        return x_counts, y_counts, cropped_dow, lagged_hit #!!!!!!!!!!!! return other cropped time dependent features as well
+        return x_counts, y_counts, cropped_dow, lagged_hit, cropped_woy #!!!!!!!!!!!! return other cropped time dependent features as well    #added cropped_woy
 
 
@@ -178,8 +181,8 @@ def reject_filter(self, x_counts, y_counts, *args):
         keep = zeros_x <= self.max_train_empty
         return keep
 
-    def make_features(self, x_counts, y_counts, dow, lagged_counts, pf_agent, pf_country, pf_site, page_ix,
-                      count_median, year_autocorr, quarter_autocorr): #!!!!!!!!!!!! if kaggle feats as is
+    def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix,
+                      count_median, year_autocorr, quarter_autocorr, count_pctl_100): #!!!!!!!!!!!! if kaggle feats as is   #!!!! added woy, count_pctl_100
         """
         Main method. Assembles input data into final tensors
         
@@ -204,6 +207,9 @@ def make_features(self, ts_dynamic, ts_static, context):
         
         
         if self.sampling_period == 'daily':
+            print(dow)
+            print()
+            print(woy)
             x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
             x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func
         elif self.sampling_period == 'weekly':
@@ -225,13 +231,13 @@ def make_features(self, ts_dynamic, ts_static, context):
             x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
     
             # Combine all page features into single tensor
-            scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr])#!!!!!!! if kaggle feats. Else need also the oher quntiles too
+            scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too
             flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
             series_features = tf.expand_dims(flat_features, 0)
 
 
-        if self.features_set == 'full':
+        """if self.features_set == 'full':
             # Split lagged counts to train and test
             x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
     
@@ -242,9 +248,8 @@ def make_features(self, ts_dynamic, ts_static, context):
                                         quarter_autocorr, year_autocorr])
             flat_features = tf.concat([scalar_features], axis=0) 
             series_features = tf.expand_dims(flat_features, 0)
-
         #!!!!!!! also do for simple, full w context 
-        #....
+        #...."""
 
 
@@ -256,6 +261,7 @@ def make_features(self, ts_dynamic, ts_static, context):
             # [n_days] -> [n_days, 1]
             tf.expand_dims(norm_x_counts, -1),
             x_dow,
+            x_woy, #!!!!!! added
             x_lagged,
             # Stretch series_features to all training days
             # [1, features] -> [n_days, features]
@@ -266,6 +272,7 @@ def make_features(self, ts_dynamic, ts_static, context):
         y_features = tf.concat([
             # [n_days] -> [n_days, 1]
             y_dow,
+            y_woy, #!!!!!! added 
             y_lagged,
             # Stretch series_features to all testing days
             # [1, features] -> [n_days, features]
@@ -286,6 +293,8 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
                  train_skip_first=0, rand_seed=None):
         """
         Create data preprocessing pipeline
+        features_set  -  arturius, simple, full, full_w_context
+        sampling_period  -  daily, weekly, monthly
         :param inp: Raw input data
         :param features: Features tensors (subset of data in inp)
         :param N_time_series: Total number of pages
@@ -381,7 +390,10 @@ def page_features(inp: VarFeeder, features_set):
     
     if features_set=='arturius':
         d = (inp.counts, inp.pf_agent, inp.pf_country, inp.pf_site,
-                inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr)
+                inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr,
+#                inp.woy, 
+                inp.count_pctl_100
+                )#!!!!!!!!!!!! ading 2 more
         
     elif features_set=='simple':
         raise Exception('not ready yet')
diff --git a/make_features.py b/make_features.py
index c4fe151..d13d0cc 100755
--- a/make_features.py
+++ b/make_features.py
@@ -383,7 +383,7 @@ def run():
     print(f'Using {args.features_set} set of features')
 
     if args.features_set == 'arturius':
-        if args.data_type == 'kaggle':
+        if args.data_type != 'kaggle':
             raise Exception('arturius features can only work with data_type "kaggle" since scrapes wikipedia pages')
         tensors = dict(
             counts=df,
@@ -397,6 +397,9 @@ def run():
             year_autocorr=year_autocorr,
             quarter_autocorr=quarter_autocorr,
             dow=dow,#N x 2 array since encoded week periodicity as complex number
+            
+            woy=woy,#!!!!!!!!
+            count_pctl_100=percentiles[5],#max #!!!!!!!!!!!!!!!! just to see what happens: apend one of my features.
         )
     
     elif args.features_set == 'simple':
diff --git a/trainer.py b/trainer.py
index 2c33773..6f61da1 100755
--- a/trainer.py
+++ b/trainer.py
@@ -396,7 +396,7 @@ def process_eval_results(self, run_results, offset, global_step, epoch):
         return mae, smape, new_best, smooth_mae, smooth_smape
 
 
-def train(features_set, name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01,
+def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01,
           seed=None, logdir='data/logs', max_epoch=100, patience=2, train_sampling=1.0,
           eval_sampling=1.0, eval_memsize=5, gpu=0, gpu_allow_growth=False, save_best_model=False,
           forward_split=False, write_summaries=False, verbose=False, asgd_decay=None, tqdm=True,
@@ -435,12 +435,12 @@ def train(features_set, name, hparams, multi_gpu=False, n_models=1, train_comple
 
     all_models: List[ModelTrainerV2] = []
 
-    def create_model(scope, index, prefix, seed):
+    def create_model(features_set, sampling_period, scope, index, prefix, seed):
 
         with tf.variable_scope('input') as inp_scope:
             with tf.device("/cpu:0"):
                 split = splitter.splits[index]
-                pipe = InputPipe(features_set, inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features
+                pipe = InputPipe(features_set, sampling_period, inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features
                                  mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose,
                                  train_completeness_threshold=train_completeness_threshold,
                                  predict_completeness_threshold=train_completeness_threshold, train_window=train_window,
@@ -449,7 +449,7 @@ def create_model(scope, index, prefix, seed):
                                  back_offset=predict_window if forward_split else 0)
                 inp_scope.reuse_variables()
                 if side_split:
-                    side_eval_pipe = InputPipe(features_set, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
+                    side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
                                                mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                verbose=verbose, predict_window=predict_window,
                                                train_completeness_threshold=0.01, predict_completeness_threshold=0,
@@ -458,7 +458,7 @@ def create_model(scope, index, prefix, seed):
                 else:
                     side_eval_pipe = None
                 if forward_split:
-                    forward_eval_pipe = InputPipe(features_set, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
+                    forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
                                                   mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                   verbose=verbose, predict_window=predict_window,
                                                   train_completeness_threshold=0.01, predict_completeness_threshold=0,
@@ -504,14 +504,14 @@ def create_model(scope, index, prefix, seed):
     if n_models == 1:
         with tf.device(f"/gpu:{gpu}"):
             scope = tf.get_variable_scope()
-            all_models = [create_model(scope, 0, None, seed=seed)]
+            all_models = [create_model(features_set, sampling_period, scope, 0, None, seed=seed)]
     else:
         for i in range(n_models):
             device = f"/gpu:{i}" if multi_gpu else f"/gpu:{gpu}"
             with tf.device(device):
                 prefix = f"m_{i}"
                 with tf.variable_scope(prefix) as scope:
-                    all_models.append(create_model(scope, i, prefix=prefix, seed=seed + i))
+                    all_models.append(create_model(features_set, sampling_period, scope, i, prefix=prefix, seed=seed + i))
     trainer = MultiModelTrainer(all_models, inc_step)
     if save_best_model or save_from_step:
         saver_path = f'data/cpt/{name}'
@@ -660,12 +660,12 @@ def ema_vars(model):
         return np.mean(best_epoch_smape, dtype=np.float64)
 
 
-def predict(features_set, checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1,
+def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1,
             target_model=0, asgd=False, seed=1, batch_size=1024):
     with tf.variable_scope('input') as inp_scope:
         with tf.device("/cpu:0"):
             inp = VarFeeder.read_vars("data/vars")
-            pipe = InputPipe(features_set, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features
+            pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features
                              n_epoch=1, verbose=verbose,
                              train_completeness_threshold=0.01,
                              predict_window=predict_window,
@@ -745,6 +745,7 @@ def predict(features_set, checkpoints, hparams, return_x=False, verbose=False, p
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Train the model')
     parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full','full_w_context'}")
+    parser.add_argument('sampling_period', help="{'daily','weekly','monthly'}")
     parser.add_argument('--name', default='s32', help='Model name to identify different logs/checkpoints')
     parser.add_argument('--hparam_set', default='s32', help="Hyperparameters set to use (see hparams.py for available sets)")
     parser.add_argument('--n_models', default=1, type=int, help="Jointly train n models with different seeds")

From 858ff2aeed37e182fd7280a993f1d3452be0415d Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Tue, 3 Jul 2018 02:36:11 -0700
Subject: [PATCH 09/42] working w our data and features

---
 Readme.md        |   2 +-
 input_pipe.py    | 236 +++++++++++++++++++++++++++--------------------
 make_features.py |   2 +
 model.py         |   2 +
 trainer.py       |  10 +-
 5 files changed, 147 insertions(+), 105 deletions(-)

diff --git a/Readme.md b/Readme.md
index 21af290..1e11cb9 100755
--- a/Readme.md
+++ b/Readme.md
@@ -48,7 +48,7 @@ GK modifications for own data:
 4. $python3 PREPROCESS.py
 5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period
 python3 make_features.py data/vars kaggle daily full --add_days=63
-
+python3 make_features.py data/vars ours daily full --add_days=63
 
 #no reason to expect 10000 to 11500 is good range to save out. View loss along the way
 python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
diff --git a/input_pipe.py b/input_pipe.py
index 9459f9d..617caf4 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -101,16 +101,35 @@ def cut(self, counts, start, end):
         """
         # Pad counts to ensure we have enough array length for prediction
         counts = tf.concat([counts, tf.fill([self.predict_window], np.NaN)], axis=0)
-        cropped_hit = counts[start:end]
+        cropped_count = counts[start:end]
 
-        # cut day of week
-        if self.inp.dow:
+
+        # =============================================================================
+        # Ordinal periodic variables
+        # which features are here depends on what the sampling period is for the data
+        # =============================================================================
+        if self.sampling_period=='daily':
             cropped_dow = self.inp.dow[start:end]
-        if self.inp.woy:
             cropped_woy = self.inp.woy[start:end]
+            cropped_moy = 0*cropped_dow
+        elif self.sampling_period=='weekly':
+            cropped_woy = self.inp.woy[start:end]
+            cropped_dow = 0*cropped_woy
+            cropped_moy = 0*cropped_woy
+        elif self.sampling_period=='monthly':
+            cropped_moy = self.inp.moy[start:end]
+            cropped_dow = 0*cropped_moy
+            cropped_woy = 0*cropped_moy            
+            
 
-
-
+        
+        # =============================================================================
+        # Other features that are also time-varying
+        # that can be used, which depend on the choice of feature_set
+        # self.features_set = features_set
+        # =============================================================================        
+        
+        #If used Arturius' original feature set then will include the lagged data:
         if self.inp.lagged_ix:
             # Cut lagged counts
             # gather() accepts only int32 indexes
@@ -120,20 +139,30 @@ def cut(self, counts, start, end):
             # Convert -1 to 0 for gather(), it don't accept anything exotic
             cropped_lags = tf.maximum(cropped_lags, 0)
             # Translate lag indexes to count values
-            lagged_hit = tf.gather(counts, cropped_lags)
+            lagged_count = tf.gather(counts, cropped_lags)
             # Convert masked (see above) or NaN lagged counts to zeros
-            lag_zeros = tf.zeros_like(lagged_hit)
-            lagged_hit = tf.where(lag_mask | tf.is_nan(lagged_hit), lag_zeros, lagged_hit)
+            lag_zeros = tf.zeros_like(lagged_count)
+            lagged_count = tf.where(lag_mask | tf.is_nan(lagged_count), lag_zeros, lagged_count)
+
 
+
+        #Will always have the count series (the series we predict on):
         # Split for train and test
-        x_counts, y_counts = tf.split(cropped_hit, [self.train_window, self.predict_window], axis=0)
+        x_counts, y_counts = tf.split(cropped_count, [self.train_window, self.predict_window], axis=0)
 
         # Convert NaN to zero in for train data
         x_counts = tf.where(tf.is_nan(x_counts), tf.zeros_like(x_counts), x_counts)
-        return x_counts, y_counts, cropped_dow, lagged_hit, cropped_woy #!!!!!!!!!!!! return other cropped time dependent features as well    #added cropped_woy
 
 
+        if self.features_set=='arturius' or self.features_set=='full':#for now, for full just do sam [include lagged]
+            return x_counts, y_counts, cropped_dow, lagged_count, cropped_woy, cropped_moy
+#        elif self.features_set=='full':
+#            return aaaaaaaaaaa #can drop lagged 
+        else:
+            raise Exception('problem with features_set')
+
+
     def cut_train(self, counts, *args):
         """
         Cuts a segment of time series for training. Randomly chooses starting point.
@@ -181,35 +210,18 @@ def reject_filter(self, x_counts, y_counts, *args):
         keep = zeros_x <= self.max_train_empty
         return keep
 
-    def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix,
-                      count_median, year_autocorr, quarter_autocorr, count_pctl_100): #!!!!!!!!!!!! if kaggle feats as is   #!!!! added woy, count_pctl_100
+
+
+    def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix,
+                      count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, 
+                      count_pctl_75, count_pctl_95, count_pctl_100, count_variance):
+                                                    
+#        def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix,
+#                                    count_median, year_autocorr, quarter_autocorr, count_pctl_100):
         """
         Main method. Assembles input data into final tensors
-        
-        split into 3 sets of features: time-dependent, per series but static, and context features
-        input as dicts
-        ts_dynamic : {x_counts, y_counts, dow, woy, moy, lagged}
-        ts_static: {count_median, other percentiles...,  autocorrelations, }
-        
-            def make_features(self, ts_dynamic, ts_static, context):
-        
-                
-        # Split day of week to train and test
-        if ts_dynamic['dow']:
-            x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
-        if ts_dynamic['woy']:
-            x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0)
-        if ts_dynamic['moy']:
-            x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0)                    
-                
         """
-        
-        
-        
         if self.sampling_period == 'daily':
-            print(dow)
-            print()
-            print(woy)
             x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
             x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func
         elif self.sampling_period == 'weekly':
@@ -217,7 +229,6 @@ def make_features(self, ts_dynamic, ts_static, context):
         elif self.sampling_period == 'monthly':
             x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0)
 
-
         # Normalize counts
         mean = tf.reduce_mean(x_counts)
         std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean)))
@@ -225,66 +236,86 @@ def make_features(self, ts_dynamic, ts_static, context):
         norm_y_counts = (y_counts - mean) / std
         norm_lagged_counts = (lagged_counts - mean) / std   #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ??
 
+        # Split lagged counts to train and test
+        x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
+
 
         if self.features_set == 'arturius':
-            # Split lagged counts to train and test
-            x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
-    
+
             # Combine all page features into single tensor
             scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too
             flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
             series_features = tf.expand_dims(flat_features, 0)
-
-
-
-        """if self.features_set == 'full':
-            # Split lagged counts to train and test
-            x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
-    
-            # Combine all page features into single tensor
             
-            scalar_features = tf.stack([count_median, count_variance, \
-                                        count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, \
-                                        quarter_autocorr, year_autocorr])
+#            print(scalar_features) #4
+#            print(flat_features) #18
+#            print(series_features)
+#            print([pf_agent, pf_country, pf_site]) #4, 7, 3   #the one hot encoded features
+            
+            
+        elif self.features_set == 'full':
+            # Combine all page features into single tensor
+            scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr,
+                                                    count_pctl_0,
+                                                    count_pctl_5,
+                                                    count_pctl_25,
+                                                    count_pctl_75,
+                                                    count_pctl_95,
+                                                    count_pctl_100,
+                                                    count_variance]) 
             flat_features = tf.concat([scalar_features], axis=0) 
             series_features = tf.expand_dims(flat_features, 0)
-        #!!!!!!! also do for simple, full w context 
-        #...."""
-
-
-
+            
+            
         #Any time dependent feature need to be split into x [train] and y [test]
         #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths
 
-        # Train features
-        x_features = tf.concat([
-            # [n_days] -> [n_days, 1]
-            tf.expand_dims(norm_x_counts, -1),
-            x_dow,
-            x_woy, #!!!!!! added
-            x_lagged,
-            # Stretch series_features to all training days
-            # [1, features] -> [n_days, features]
-            tf.tile(series_features, [self.train_window, 1])
-        ], axis=1)
+        # Train features, depending on measurement frequency
+        x_features = tf.expand_dims(norm_x_counts, -1) # [n_days] -> [n_days, 1]
+        if self.sampling_period == 'daily':
+            x_features = tf.concat([x_features, x_dow, x_woy], axis=1)
+        elif self.sampling_period == 'weekly':
+            x_features = tf.concat([x_features, x_woy], axis=1)            
+        elif self.sampling_period == 'monthly':
+            x_features = tf.concat([x_features, x_moy], axis=1)             
+        #Regardess of period/frequency will have below features:
+        x_features = tf.concat([x_features, x_lagged,
+                                # Stretch series_features to all training days
+                                # [1, features] -> [n_days, features]
+                                tf.tile(series_features, [self.train_window, 1])], axis=1)
 
         # Test features
-        y_features = tf.concat([
-            # [n_days] -> [n_days, 1]
-            y_dow,
-            y_woy, #!!!!!! added 
-            y_lagged,
-            # Stretch series_features to all testing days
-            # [1, features] -> [n_days, features]
-            tf.tile(series_features, [self.predict_window, 1])
-        ], axis=1)
-
+        if self.sampling_period == 'daily':
+            y_features = tf.concat([y_dow, y_woy], axis=1)
+        elif self.sampling_period == 'weekly':
+            y_features = y_woy + 0
+        elif self.sampling_period == 'monthly':
+            y_features = y_moy + 0
+        #Regardess of period/frequency will have below features:
+        y_features = tf.concat([y_features, y_lagged,
+                                # Stretch series_features to all testing days
+                                # [1, features] -> [n_days, features]
+                                tf.tile(series_features, [self.predict_window, 1])
+                                ], axis=1)
+
+#        print(x_features)
+        
         #!!!!! why no lagged_y alnoe, only in y_features??? 
         #!!!! why no norm_y_counts ?????
         return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix
-        #later on the above is assigned to:
-        #self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
-        #self.norm_std, self.series_features, self.page_ix = it_tensors
+        #Must match up with setting self.XYZ = it_tensors below in __init__. 
+
+
+
+
+#    def make_features__full(self, x_counts, y_counts, dow, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix,
+#                      count_median, year_autocorr, quarter_autocorr, count_pctl_100): #!!!!!!!!!!!! if kaggle feats as is   #!!!! added woy, count_pctl_100
+#        """
+#        Using different features than the arturius default set
+#        """
+
+
+
 
 
     def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None,
@@ -352,12 +383,15 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
 
         # Choose right cutter function for current ModelMode
         cutter = {ModelMode.TRAIN: self.cut_train, ModelMode.EVAL: self.cut_eval, ModelMode.PREDICT: self.cut_eval}
+        #Choose the right feature maker function, depending on feature_set used:
+        #feature_maker = {'arturius': self.make_features, 'full': self.make_features__full}
+        feature_maker = {'arturius': self.make_features, 'full': self.make_features}#!!!!!!just for now always use art
         # Create dataset, transform features and assemble batches
         root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch)
         batch = (root_ds
                  .map(cutter[mode])
                  .filter(self.reject_filter)
-                 .map(self.make_features, num_parallel_calls=num_threads)
+                 .map(feature_maker[self.features_set], num_parallel_calls=num_threads)
                  .batch(batch_size)
                  .prefetch(runs_in_burst * 2)
                  )
@@ -366,18 +400,19 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         it_tensors = self.iterator.get_next()
 
         # Assign all tensors to class variables
-        if self.features_set=='arturius':
+        if self.features_set=='arturius' or self.features_set=='full':
             self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
             self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures
         if self.features_set=='simple':
             pass
-        if self.features_set=='full':
-            pass
+#        if self.features_set=='full':
+#            pass
         if self.features_set=='full_w_context':
             pass
         
 
         self.encoder_features_depth = self.time_x.shape[2].value
+        print('self.encoder_features_depth',self.encoder_features_depth)
 
     def load_vars(self, session):
         self.inp.restore(session)
@@ -387,37 +422,40 @@ def init_iterator(self, session):
 
 
 def page_features(inp: VarFeeder, features_set):
+    """
+    Other than inp.counts, these features are the static features.
+    So do not need to pass in here the time-varying ones like day of week, 
+    month of year, lagged, etc.
+    
+    DO NOT return dow, woy, moy
+    """
     
     if features_set=='arturius':
         d = (inp.counts, inp.pf_agent, inp.pf_country, inp.pf_site,
                 inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr,
-#                inp.woy, 
                 inp.count_pctl_100
-                )#!!!!!!!!!!!! ading 2 more
+                )
         
     elif features_set=='simple':
         raise Exception('not ready yet')
+        
     elif features_set=='full':
-        d = (inp.counts,
-            inp.count_median, inp.count_variance,
+#        print(inp.counts)
+        dummy = tf.zeros_like(inp.counts)
+#        print(dummy)
+        d = (inp.counts, dummy, dummy, dummy,
+            inp.page_ix,
+            inp.count_median,
+            inp.year_autocorr, inp.quarter_autocorr,
             inp.count_pctl_0,
             inp.count_pctl_5,
             inp.count_pctl_25,
             inp.count_pctl_75,
             inp.count_pctl_95,
             inp.count_pctl_100,
-                inp.page_ix, inp.year_autocorr, inp.quarter_autocorr)
+            inp.count_variance)    
+        
     elif features_set=='full_w_context':
         raise Exception('not ready yet')
     
-    
-    #!!!! does it actually need the dow, moy features???
-    #if this is required then would need the sample_period as an input to this function [follw pattern of features_set]
-    """if sample_period=='daily':
-        d += (inp.dow,inp.woy)
-    elif sample_period=='weekly':
-        d += (inp.dow,inp.woy)
-    elif sample_period=='monthly':
-        d += (inp.dow,inp.woy)"""  
-    
     return d
\ No newline at end of file
diff --git a/make_features.py b/make_features.py
index d13d0cc..772ad8f 100755
--- a/make_features.py
+++ b/make_features.py
@@ -412,6 +412,8 @@ def run():
     elif (args.features_set == 'full') or (args.features_set == 'full_w_context'):
         tensors = dict(
             counts=df,
+            lagged_ix=lagged_ix,
+            page_map=np.zeros(len(df)),#just set to a dummy all 0's
             page_ix=df.index.values,#!!!!!! 
 
             year_autocorr=year_autocorr,
diff --git a/model.py b/model.py
index 6b2a8c2..6145305 100755
--- a/model.py
+++ b/model.py
@@ -67,6 +67,8 @@ def build_rnn():
         return RNN(num_layers=hparams.encoder_rnn_layers, num_units=hparams.rnn_depth,
                    input_size=encoder_features_depth,
                    direction='unidirectional', #Let's try bidirectional as well, or ,ay as well try keeping unidirectional but with order reversed, just see what happens
+                   #assume merge mode default is concat??
+                   #need to fix dimensions error. If could change merge mode to sum or mean or something then at least output dimension is same so might be easiest way to avoid  error ?
                    dropout=hparams.encoder_dropout if is_train else 0, seed=seed)
 
     static_p_size = cuda_params_size(build_rnn)
diff --git a/trainer.py b/trainer.py
index 6f61da1..2bface6 100755
--- a/trainer.py
+++ b/trainer.py
@@ -415,10 +415,10 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
     with tf.device("/cpu:0"):
         inp = VarFeeder.read_vars("data/vars")
         if side_split:
-            splitter = Splitter(page_features(inp, features_set), inp.page_map, 3, train_sampling=train_sampling,#!!!!!!!!!!!! will need to edit page_features function    and get rid of page_map
+            splitter = Splitter(page_features(inp, features_set), inp.page_map, 3, train_sampling=train_sampling,
                                 test_sampling=eval_sampling, seed=seed)
         else:
-            splitter = FakeSplitter(page_features(inp, features_set), 3, seed=seed, test_sampling=eval_sampling) #!!!!!!!!!!!! will need to edit page_features function
+            splitter = FakeSplitter(page_features(inp, features_set), 3, seed=seed, test_sampling=eval_sampling)
 
     real_train_pages = splitter.splits[0].train_size
     real_eval_pages = splitter.splits[0].test_size
@@ -440,7 +440,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed):
         with tf.variable_scope('input') as inp_scope:
             with tf.device("/cpu:0"):
                 split = splitter.splits[index]
-                pipe = InputPipe(features_set, sampling_period, inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features
+                pipe = InputPipe(features_set, sampling_period, inp, features=split.train_set, N_time_series=split.train_size,
                                  mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose,
                                  train_completeness_threshold=train_completeness_threshold,
                                  predict_completeness_threshold=train_completeness_threshold, train_window=train_window,
@@ -449,7 +449,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed):
                                  back_offset=predict_window if forward_split else 0)
                 inp_scope.reuse_variables()
                 if side_split:
-                    side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
+                    side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
                                                mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                verbose=verbose, predict_window=predict_window,
                                                train_completeness_threshold=0.01, predict_completeness_threshold=0,
@@ -458,7 +458,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed):
                 else:
                     side_eval_pipe = None
                 if forward_split:
-                    forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
+                    forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
                                                   mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                   verbose=verbose, predict_window=predict_window,
                                                   train_completeness_threshold=0.01, predict_completeness_threshold=0,

From 983fb9c035ae9c7c516c34793c50aef7f1cc5b03 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 5 Jul 2018 16:33:53 -0700
Subject: [PATCH 10/42] finished weekly aggregation'

---
 PREPROCESS.py    | 122 ++++++++++++++++++++++++++++++++++++++---------
 hparams.py       |   3 +-
 input_pipe.py    | 109 +++++++++++++++++++++++++++++++-----------
 make_features.py |  32 +++++++++----
 trainer.py       |   2 +-
 5 files changed, 207 insertions(+), 61 deletions(-)

diff --git a/PREPROCESS.py b/PREPROCESS.py
index b1e4ffa..6d4b1cd 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -22,7 +22,7 @@
 from sklearn.preprocessing import Imputer
 from collections import Counter
 
-
+from copy import deepcopy
 
 
 def load_my_data(myDataDir):
@@ -248,15 +248,75 @@ def make_train_csv(df, save_path, imputation_method, sampling_period, start_date
         Make the train_2.csv
         """
         
-        def aggregate(df, sampling_period):
+        def aggregate_to_weekly(df, aggregation_type):
             """
             Aggregate the data (average it) to downsample
             to desired sample period, e.g. daily measurements -> weekly or monthly.
             Should smooth out some noise, and help w seasonality.
+            
+            **ASSUMES WE HAVE DAILY DATA TO START.
             """
-            return df
+            dfc = deepcopy(df)
+            dfc['month-day'] = dfc['date'].apply(lambda x: str(x)[5:])
+
+            #Differentiate by year
+            years = pd.DatetimeIndex(dfc['date']).year
+            #years -= years.min()
+            dfc['year'] = years
+            
+            #Manually define as below, as generated by pd.date_range('2015-01-01','2015-12-24',freq='W-THU')
+            fixed_start_dates = ['01-01','01-08','01-15','01-22',
+            '01-29','02-05','02-12','02-19',
+            '02-26','03-05','03-12','03-19',
+            '03-26','04-02','04-09','04-16',
+            '04-23','04-30','05-07','05-14',
+            '05-21','05-28','06-04','06-11',
+            '06-18','06-25','07-02','07-09',
+            '07-16','07-23','07-30','08-06',
+            '08-13','08-20','08-27','09-03',
+            '09-10','09-17','09-24','10-01',
+            '10-08','10-15','10-22','10-29',
+            '11-05','11-12','11-19','11-26',
+            '12-03','12-10','12-17','12-24']#This combines last ~10 days of year together
+
+
+            _ = [np.searchsorted(fixed_start_dates,str(x),side='right') - 1 for x in dfc['month-day'].values]
+            _ = np.clip(_,0,51).astype(int) #clip 52 to 51. This means lumping last few days of year into 2nd last week of year starting 12/24.
+            _ = [fixed_start_dates[i] for i in _]
+            #Overwrite the actual date with the predefined week start date:
+            dfc['week_start_date'] = dfc['year'].map(str) + '-' + _
+            
+            #For each page-year-week, aggregte over the N<=7 days of that week to get the aggregted value:            
+#            _ = dfc.groupby(['Page','year','week_start_date']).agg({'y': [aggregation_type,'size'], 'year':'min', 'date':'min', 'Page':'min', 'week_start_date':'min'})
+            _ = dfc.groupby(['Page','week_start_date']).agg({'y': [aggregation_type,'size'], 'date':'min', 'Page':'min', 'week_start_date':'min'})
+            new_df = pd.DataFrame({'Page': _['Page']['min'].values,
+                                   'date': _['date']['min'].values,
+                                   'y': _['y'][aggregation_type].values, #This is no longer necessarily an int
+                                   'week_start_date': _['week_start_date']['min'].values
+                                   })
+
+            #After above process, can still have missing blocks for a given time series, so will deal with them later.
+
+            #now that done, delete uneeded columns
+            new_df.drop(columns=['date'],inplace=True)
+            new_df.rename(columns={'week_start_date':'date'},inplace=True)
+            
+            return new_df
         
         
+        def remove_downsample_columns(df, out_of_range_fill_value):
+            """
+            When doing any kind of daily --> weekly or monthly aggregation,
+            will have many days that are now empty (all data aggregated to single
+            date marking 1st date of week / month)
+            
+            So remove those obsolete columns
+            """
+            bad_cols = [i for i in df.columns if np.alltrue(df[i].values==out_of_range_fill_value)]
+            df.drop(columns=bad_cols,inplace=True)
+            return df
+            
+        
         #Rename columns to be as in Kaggle data:
         df.rename(columns={'id':'Page'},inplace=True)
         
@@ -269,14 +329,16 @@ def aggregate(df, sampling_period):
         if end_date:
             latest = min(latest,end_date)
         
-        idx = pd.date_range(earliest,latest)
-        OUT_OF_RANGE_FILL_VALUE = -1 #np.NaN #0 #puttign as nan casts to float and cannot convert to int
+        idx = pd.date_range(earliest,latest) #!!!!!! fro now doing daily. When doing weekly also keep with default freq='D' . If change to 'W' alignment gets messed up. Just do daily 'D', then later can correct easily.
+        OUT_OF_RANGE_FILL_VALUE = -1. #np.NaN #0 #puttign as nan casts to float and cannot convert to int
 
 
+        #Do aggregation from DAILY --> WEEKLY before doing any kind of imputation
+        if sampling_period=='weekly':
+            AGGREGATION_TYPE = 'median'
+            df = aggregate_to_weekly(df, AGGREGATION_TYPE)    
 
     
-    
-    
         #Reorganize data for each id (->"Page")
         unique_ids = pd.unique(df['Page'])
         df_list = []
@@ -284,7 +346,7 @@ def aggregate(df, sampling_period):
             d = df.loc[df['Page']==u]
             #Nan / zero pad start and end date range if needed {end missing}
             dates = pd.Series(d['y'].values,index=d['date'])
-            dates.index = pd.DatetimeIndex(dates.index)        
+            dates.index = pd.DatetimeIndex(dates.index)
             dates = dates.reindex(idx, fill_value=OUT_OF_RANGE_FILL_VALUE)
             dates.index = pd.to_datetime(dates.index).strftime('%Y-%m-%d')
             dd = pd.DataFrame(dates).T 
@@ -302,18 +364,33 @@ def aggregate(df, sampling_period):
         df.reset_index(drop=True,inplace=True)
         
         
-        #Just for analysis: look at kinds of gaps in series
-        VERBOSE = False
-        if VERBOSE:
-            __missing_vals_distribution(df)     
+        #If we did aggregation, then above reogranization will have many of the columns Nan / -1,
+        #since e.g. went from daily to weekly, then 6 days of the week will look empty. So remove them.
+        if sampling_period=='weekly':
+            AGGREGATION_TYPE = 'median'
+            df = remove_downsample_columns(df, OUT_OF_RANGE_FILL_VALUE)
+            
+        
+        
+        
+        # =============================================================================
+        # Just for analysis: look at kinds of gaps in series, for DAILY data
+        # =============================================================================
+        #VERBOSE = False
+        #if VERBOSE:
+        #    __missing_vals_distribution(df)     
             
+
+        
         
         #Imputation, dealing with missing seasonality blocks / out of phase
-        df = do_imputation(df,imputation_method)
-        #Could do impoutation then downsampling, vs. downsampling then imputation ... unclear which is better here in general.
-        #for now assume we do ipmutation THEN aggregation:
-        df = aggregate(df,sampling_period)
+        if imputation_method:
+            df = do_imputation(df,imputation_method)
+            #Could do impoutation then downsampling, vs. downsampling then imputation ... unclear which is better here in general.
+            #for now assume we do ipmutation THEN aggregation:
+            #df = aggregate(df,sampling_period)
 
+        print(df)
 
         #SHould end up with a csv that is rows are series (each id), cols are dates
         #:eftmost col should be "Pages" to be same as Kaggle format
@@ -321,6 +398,8 @@ def aggregate(df, sampling_period):
         return df
     
     
+    
+    
     def make_key_csv(df):
         """
         Make the key_1.csv, key_2.csv
@@ -360,12 +439,11 @@ def make_key_csv(df):
     # =============================================================================
     # TOTAL COMPLETED TRIPS:
     myDataDir = r"/Users/kocher/Desktop/forecasting/exData/totalCompletedTripsDaily"
-    imputation_method = 'median' #'STL'
+    IMPUTATION_METHOD = None #'median' #'STL' #None
     START_DATE = '2015-01-01' #None
     END_DATE = '2017-12-31' #None
     REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful
-    SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly'
-
+    SAMPLING_PERIOD = 'weekly' #'daily', 'weekly', 'monthly'
 
 
     # =============================================================================
@@ -374,8 +452,9 @@ def make_key_csv(df):
     print('START_DATE',START_DATE)
     print('END_DATE',END_DATE)
     print('REMOVE_ID_LIST',REMOVE_ID_LIST)
-    print('imputation_method',imputation_method)
+    print('IMPUTATION_METHOD',IMPUTATION_METHOD)
     print('myDataDir',myDataDir)
+    print('SAMPLING_PERIOD',SAMPLING_PERIOD)
     
     #Load
     df = load_my_data(myDataDir)
@@ -384,6 +463,5 @@ def make_key_csv(df):
     df = remove_cities(df,REMOVE_ID_LIST)
     
     #Put into same format as used by Kaggle, save out csv's    
-    df = format_like_Kaggle(df, myDataDir, imputation_method, SAMPLING_PERIOD, start_date=START_DATE, end_date=END_DATE)
-    
+    df = format_like_Kaggle(df, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, start_date=START_DATE, end_date=END_DATE)
 
diff --git a/hparams.py b/hparams.py
index 8edf66e..dfab587 100755
--- a/hparams.py
+++ b/hparams.py
@@ -5,7 +5,8 @@
 params_s32 = dict(
     batch_size=256,
     #train_window=380,
-    train_window=283,
+    #train_window=283,
+    train_window=65,#try 65 w our data to see if allows more samples through filter
     train_skip_first=0,
     rnn_depth=267,
     use_attn=False,
diff --git a/input_pipe.py b/input_pipe.py
index 617caf4..b86707c 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -212,7 +212,7 @@ def reject_filter(self, x_counts, y_counts, *args):
 
 
-    def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix,
+    def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix,
                       count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, 
                       count_pctl_75, count_pctl_95, count_pctl_100, count_variance):
                                                     
@@ -240,33 +240,17 @@ def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_age
         x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
 
 
-        if self.features_set == 'arturius':
-
-            # Combine all page features into single tensor
-            scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too
-            flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
-            series_features = tf.expand_dims(flat_features, 0)
-            
+        # Combine all page features into single tensor
+        scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too
+        flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
+        series_features = tf.expand_dims(flat_features, 0)
+        
 #            print(scalar_features) #4
 #            print(flat_features) #18
 #            print(series_features)
 #            print([pf_agent, pf_country, pf_site]) #4, 7, 3   #the one hot encoded features
-            
-            
-        elif self.features_set == 'full':
-            # Combine all page features into single tensor
-            scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr,
-                                                    count_pctl_0,
-                                                    count_pctl_5,
-                                                    count_pctl_25,
-                                                    count_pctl_75,
-                                                    count_pctl_95,
-                                                    count_pctl_100,
-                                                    count_variance]) 
-            flat_features = tf.concat([scalar_features], axis=0) 
-            series_features = tf.expand_dims(flat_features, 0)
-            
-            
+        
+
         #Any time dependent feature need to be split into x [train] and y [test]
         #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths
 
@@ -308,13 +292,80 @@ def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_age
 
 
-#    def make_features__full(self, x_counts, y_counts, dow, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix,
-#                      count_median, year_autocorr, quarter_autocorr, count_pctl_100): #!!!!!!!!!!!! if kaggle feats as is   #!!!! added woy, count_pctl_100
-#        """
-#        Using different features than the arturius default set
-#        """
+    def make_features__full(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix,
+                      count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, 
+                      count_pctl_75, count_pctl_95, count_pctl_100, count_variance):
+        """
+        Main method. Assembles input data into final tensors
+        """
+        if self.sampling_period == 'daily':
+            x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
+            x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func
+        elif self.sampling_period == 'weekly':
+            x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0)
+        elif self.sampling_period == 'monthly':
+            x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0)
 
+        # Normalize counts
+        mean = tf.reduce_mean(x_counts)
+        std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean)))
+        norm_x_counts = (x_counts - mean) / std
+        norm_y_counts = (y_counts - mean) / std
+        norm_lagged_counts = (lagged_counts - mean) / std   #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ??
+
+        # Split lagged counts to train and test
+        x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
+
+        # Combine all page features into single tensor
+        scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr,
+                                                count_pctl_0,
+                                                count_pctl_5,
+                                                count_pctl_25,
+                                                count_pctl_75,
+                                                count_pctl_95,
+                                                count_pctl_100,
+                                                count_variance]) 
+        flat_features = tf.concat([scalar_features], axis=0) 
+        series_features = tf.expand_dims(flat_features, 0)
+            
+            
+        #Any time dependent feature need to be split into x [train] and y [test]
+        #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths
 
+        # Train features, depending on measurement frequency
+        x_features = tf.expand_dims(norm_x_counts, -1) # [n_days] -> [n_days, 1]
+        if self.sampling_period == 'daily':
+            x_features = tf.concat([x_features, x_dow, x_woy], axis=1)
+        elif self.sampling_period == 'weekly':
+            x_features = tf.concat([x_features, x_woy], axis=1)            
+        elif self.sampling_period == 'monthly':
+            x_features = tf.concat([x_features, x_moy], axis=1)             
+        #Regardess of period/frequency will have below features:
+        x_features = tf.concat([x_features, x_lagged,
+                                # Stretch series_features to all training days
+                                # [1, features] -> [n_days, features]
+                                tf.tile(series_features, [self.train_window, 1])], axis=1)
+
+        # Test features
+        if self.sampling_period == 'daily':
+            y_features = tf.concat([y_dow, y_woy], axis=1)
+        elif self.sampling_period == 'weekly':
+            y_features = y_woy + 0
+        elif self.sampling_period == 'monthly':
+            y_features = y_moy + 0
+        #Regardess of period/frequency will have below features:
+        y_features = tf.concat([y_features, y_lagged,
+                                # Stretch series_features to all testing days
+                                # [1, features] -> [n_days, features]
+                                tf.tile(series_features, [self.predict_window, 1])
+                                ], axis=1)
+
+#        print(x_features)
+        
+        #!!!!! why no lagged_y alnoe, only in y_features??? 
+        #!!!! why no norm_y_counts ?????
+        return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix
+        #Must match up with setting self.XYZ = it_tensors below in __init__. 
 
 
diff --git a/make_features.py b/make_features.py
index 772ad8f..b6fc82f 100755
--- a/make_features.py
+++ b/make_features.py
@@ -339,6 +339,11 @@ def run():
     # =============================================================================
     # TIME-VARYING FEATURES
     # =============================================================================
+    #Could determine week of year number in several ways: 1) as in Pandas as starting on a particular day of week,
+    # 2. just use day of year / 365
+    WEEK_NUMBER_METHOD = 'floor7'#'pandas' #'floor7'
+    WEEK_NUMBER_MAX = 53. #52.
+    
     
     if args.sampling_period=='daily':
         
@@ -349,19 +354,30 @@ def run():
         dow = np.stack([np.cos(dow_norm), np.sin(dow_norm)], axis=-1)
         
         #index of week number, when sampling at DAILY level
-        year_period = 53. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
-        woy_norm = features_days.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday
+        if WEEK_NUMBER_METHOD=='pandas':
+            week = features_days.weekofyear.values
+        elif WEEK_NUMBER_METHOD=='floor7':
+            week = np.floor((features_days.dayofyear.values - 1.) /7.)
+        year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
+        woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday
         woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)
     
+        #To catch longer term trending data, can also include year number. [depending on size of train / prediction windows and random sampling boundaries could be same value over whole series]
+        year_nmumber = features_days.year
+    
     
     if args.sampling_period=='weekly':
         #index of week number, when sampling at WEEKLY level (this is different than above)
         fff = pd.date_range(data_start, features_end, freq='W')
         #!!!!!!!!!!!!! still need to worry about alignment ... 
-        year_period = 53. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
-        woy_norm = fff.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday
-        woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)    
-    
+        if WEEK_NUMBER_METHOD=='pandas':
+            week = fff.weekofyear.values
+        elif WEEK_NUMBER_METHOD=='floor7':
+            week = np.floor((fff.dayofyear.values - 1.) /7.)
+        year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
+        woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday
+        woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)
+        year_nmumber = features_days.year
     
     if args.sampling_period=='monthly':
         #month index (only used if sampling monthly)
@@ -369,8 +385,8 @@ def run():
         period = 12. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
         moy_norm = fff.month.values / period #not sure if by default this starts on Monday vs Sunday
         moy = np.stack([np.cos(moy_norm), np.sin(moy_norm)], axis=-1)    
-    
-    
+        year_nmumber = features_days.year
+
     
     
     # Assemble indices for quarterly lagged data
diff --git a/trainer.py b/trainer.py
index 2bface6..fae64c1 100755
--- a/trainer.py
+++ b/trainer.py
@@ -458,7 +458,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed):
                 else:
                     side_eval_pipe = None
                 if forward_split:
-                    forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
+                    forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
                                                   mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                   verbose=verbose, predict_window=predict_window,
                                                   train_completeness_threshold=0.01, predict_completeness_threshold=0,

From 147245795b9a238a635862ac346ee3ad4a0d85f7 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Sat, 7 Jul 2018 17:55:42 -0700
Subject: [PATCH 11/42] removed dummy tensors, now only return exact needed

---
 input_pipe.py    | 155 +++++++++++++++++++----------------------------
 make_features.py |   2 +
 2 files changed, 66 insertions(+), 91 deletions(-)

diff --git a/input_pipe.py b/input_pipe.py
index b86707c..b0989d5 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -111,7 +111,7 @@ def cut(self, counts, start, end):
         if self.sampling_period=='daily':
             cropped_dow = self.inp.dow[start:end]
             cropped_woy = self.inp.woy[start:end]
-            cropped_moy = 0*cropped_dow
+            cropped_moy = 0*cropped_dow #Month information is alreayd contained in week information. COuld incude anyway to be explicit, but for now do not use as a feature
         elif self.sampling_period=='weekly':
             cropped_woy = self.inp.woy[start:end]
             cropped_dow = 0*cropped_woy
@@ -130,6 +130,7 @@ def cut(self, counts, start, end):
         # =============================================================================        
         
         #If used Arturius' original feature set then will include the lagged data:
+#        if self.features_set == 'arturius':
         if self.inp.lagged_ix:
             # Cut lagged counts
             # gather() accepts only int32 indexes
@@ -156,10 +157,17 @@ def cut(self, counts, start, end):
 
 
         if self.features_set=='arturius' or self.features_set=='full':#for now, for full just do sam [include lagged]
-            return x_counts, y_counts, cropped_dow, lagged_count, cropped_woy, cropped_moy
+            if self.sampling_period=='daily':
+                return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy
+            if self.sampling_period=='weekly':
+                return x_counts, y_counts, lagged_count, cropped_woy
+            if self.sampling_period=='monthly':
+                return x_counts, y_counts, lagged_count, cropped_moy
+            
 #        elif self.features_set=='full':
 #            return aaaaaaaaaaa #can drop lagged 
         else:
+            print(self.features_set)
             raise Exception('problem with features_set')
 
 
@@ -212,15 +220,51 @@ def reject_filter(self, x_counts, y_counts, *args):
 
 
-    def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix,
-                      count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, 
-                      count_pctl_75, count_pctl_95, count_pctl_100, count_variance):
-                                                    
-#        def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix,
-#                                    count_median, year_autocorr, quarter_autocorr, count_pctl_100):
+
+
+    def make_features(self, *args):        
         """
         Main method. Assembles input data into final tensors
         """
+#    def make_features__arturius(self, x_counts, y_counts, lagged_counts, dow, woy, moy, pf_agent, pf_country, pf_site, page_ix,
+#                                count_median, year_autocorr, quarter_autocorr, count_pctl_100):
+#        
+#    def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix,
+#                      count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, 
+#                      count_pctl_75, count_pctl_95, count_pctl_100, count_variance):  
+        
+        
+        # =============================================================================
+        # Unpack the vars depending on which features_set - sampling_period
+        # The order needs to match the output of the cut method.
+        # cut_train and cut_eval return args + cut_output
+        # the args are things like pf_agent, p
+        # the cut_output is the same order as the return of the cut method.
+        # =============================================================================
+        print(args)
+        if self.features_set == 'arturius':
+            if self.sampling_period == 'daily':
+                x_counts, y_counts, lagged_counts, dow, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args
+            elif self.sampling_period == 'weekly':
+                x_counts, y_counts, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args        
+            elif self.sampling_period == 'monthly':
+                x_counts, y_counts, lagged_counts, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args          
+        #For now just use the same ...
+#        count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance)
+        elif self.features_set == 'full':
+            f = ooooooooo
+            if self.sampling_period == 'daily':
+                x_counts, y_counts, lagged_counts, dow, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args
+            elif self.sampling_period == 'weekly':
+                x_counts, y_counts, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args        
+            elif self.sampling_period == 'monthly':
+                x_counts, y_counts, lagged_counts, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args 
+        
+        
+        
+        # =============================================================================
+        # Do train - predict splits
+        # =============================================================================
         if self.sampling_period == 'daily':
             x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
             x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func
@@ -241,6 +285,14 @@ def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, m
 
 
         # Combine all page features into single tensor
+#        scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr,
+#                                                count_pctl_0,
+#                                                count_pctl_5,
+#                                                count_pctl_25,
+#                                                count_pctl_75,
+#                                                count_pctl_95,
+#                                                count_pctl_100,
+#                                                count_variance])         
         scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too
         flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
         series_features = tf.expand_dims(flat_features, 0)
@@ -291,84 +343,6 @@ def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, m
 
 
-
-    def make_features__full(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix,
-                      count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, 
-                      count_pctl_75, count_pctl_95, count_pctl_100, count_variance):
-        """
-        Main method. Assembles input data into final tensors
-        """
-        if self.sampling_period == 'daily':
-            x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
-            x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func
-        elif self.sampling_period == 'weekly':
-            x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0)
-        elif self.sampling_period == 'monthly':
-            x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0)
-
-        # Normalize counts
-        mean = tf.reduce_mean(x_counts)
-        std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean)))
-        norm_x_counts = (x_counts - mean) / std
-        norm_y_counts = (y_counts - mean) / std
-        norm_lagged_counts = (lagged_counts - mean) / std   #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ??
-
-        # Split lagged counts to train and test
-        x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
-
-        # Combine all page features into single tensor
-        scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr,
-                                                count_pctl_0,
-                                                count_pctl_5,
-                                                count_pctl_25,
-                                                count_pctl_75,
-                                                count_pctl_95,
-                                                count_pctl_100,
-                                                count_variance]) 
-        flat_features = tf.concat([scalar_features], axis=0) 
-        series_features = tf.expand_dims(flat_features, 0)
-            
-            
-        #Any time dependent feature need to be split into x [train] and y [test]
-        #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths
-
-        # Train features, depending on measurement frequency
-        x_features = tf.expand_dims(norm_x_counts, -1) # [n_days] -> [n_days, 1]
-        if self.sampling_period == 'daily':
-            x_features = tf.concat([x_features, x_dow, x_woy], axis=1)
-        elif self.sampling_period == 'weekly':
-            x_features = tf.concat([x_features, x_woy], axis=1)            
-        elif self.sampling_period == 'monthly':
-            x_features = tf.concat([x_features, x_moy], axis=1)             
-        #Regardess of period/frequency will have below features:
-        x_features = tf.concat([x_features, x_lagged,
-                                # Stretch series_features to all training days
-                                # [1, features] -> [n_days, features]
-                                tf.tile(series_features, [self.train_window, 1])], axis=1)
-
-        # Test features
-        if self.sampling_period == 'daily':
-            y_features = tf.concat([y_dow, y_woy], axis=1)
-        elif self.sampling_period == 'weekly':
-            y_features = y_woy + 0
-        elif self.sampling_period == 'monthly':
-            y_features = y_moy + 0
-        #Regardess of period/frequency will have below features:
-        y_features = tf.concat([y_features, y_lagged,
-                                # Stretch series_features to all testing days
-                                # [1, features] -> [n_days, features]
-                                tf.tile(series_features, [self.predict_window, 1])
-                                ], axis=1)
-
-#        print(x_features)
-        
-        #!!!!! why no lagged_y alnoe, only in y_features??? 
-        #!!!! why no norm_y_counts ?????
-        return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix
-        #Must match up with setting self.XYZ = it_tensors below in __init__. 
-
-
-
     def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None,
                  batch_size=127, runs_in_burst=1, verbose=True, predict_window=60, train_window=500,
                  train_completeness_threshold=1, predict_completeness_threshold=1, back_offset=0,
@@ -435,18 +409,17 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         # Choose right cutter function for current ModelMode
         cutter = {ModelMode.TRAIN: self.cut_train, ModelMode.EVAL: self.cut_eval, ModelMode.PREDICT: self.cut_eval}
         #Choose the right feature maker function, depending on feature_set used:
-        #feature_maker = {'arturius': self.make_features, 'full': self.make_features__full}
-        feature_maker = {'arturius': self.make_features, 'full': self.make_features}#!!!!!!just for now always use art
+#        feature_maker = {'arturius': self.make_features__arturius, 'full': self.make_features__full}
         # Create dataset, transform features and assemble batches
         root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch)
         batch = (root_ds
                  .map(cutter[mode])
                  .filter(self.reject_filter)
-                 .map(feature_maker[self.features_set], num_parallel_calls=num_threads)
+                 #.map(feature_maker[self.features_set], num_parallel_calls=num_threads)
+                 .map(self.make_features, num_parallel_calls=num_threads)
                  .batch(batch_size)
                  .prefetch(runs_in_burst * 2)
                  )
-
         self.iterator = batch.make_initializable_iterator()
         it_tensors = self.iterator.get_next()
 
@@ -464,7 +437,7 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
 
         self.encoder_features_depth = self.time_x.shape[2].value
         print('self.encoder_features_depth',self.encoder_features_depth)
-
+        
     def load_vars(self, session):
         self.inp.restore(session)
 
diff --git a/make_features.py b/make_features.py
index b6fc82f..61cb2e4 100755
--- a/make_features.py
+++ b/make_features.py
@@ -493,6 +493,8 @@ def run():
 
     print(tensors)
     print(plain)
+    print(tensors.keys())
+    print(plain.keys())
 
     # Store data to the disk
     VarFeeder(args.data_dir, tensors, plain)

From a395049be10621fca8367fe0d26c596f8bdea0d0 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 9 Jul 2018 09:48:48 -0700
Subject: [PATCH 12/42] input pipe cleanup

---
 input_pipe.py | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/input_pipe.py b/input_pipe.py
index b0989d5..bb827f8 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -226,14 +226,6 @@ def make_features(self, *args):
         """
         Main method. Assembles input data into final tensors
         """
-#    def make_features__arturius(self, x_counts, y_counts, lagged_counts, dow, woy, moy, pf_agent, pf_country, pf_site, page_ix,
-#                                count_median, year_autocorr, quarter_autocorr, count_pctl_100):
-#        
-#    def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix,
-#                      count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, 
-#                      count_pctl_75, count_pctl_95, count_pctl_100, count_variance):  
-        
-        
         # =============================================================================
         # Unpack the vars depending on which features_set - sampling_period
         # The order needs to match the output of the cut method.
@@ -260,8 +252,6 @@ def make_features(self, *args):
             elif self.sampling_period == 'monthly':
                 x_counts, y_counts, lagged_counts, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args 
         
-        
-        
         # =============================================================================
         # Do train - predict splits
         # =============================================================================
@@ -408,14 +398,11 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
 
         # Choose right cutter function for current ModelMode
         cutter = {ModelMode.TRAIN: self.cut_train, ModelMode.EVAL: self.cut_eval, ModelMode.PREDICT: self.cut_eval}
-        #Choose the right feature maker function, depending on feature_set used:
-#        feature_maker = {'arturius': self.make_features__arturius, 'full': self.make_features__full}
         # Create dataset, transform features and assemble batches
         root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch)
         batch = (root_ds
                  .map(cutter[mode])
                  .filter(self.reject_filter)
-                 #.map(feature_maker[self.features_set], num_parallel_calls=num_threads)
                  .map(self.make_features, num_parallel_calls=num_threads)
                  .batch(batch_size)
                  .prefetch(runs_in_burst * 2)
@@ -424,16 +411,17 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         it_tensors = self.iterator.get_next()
 
         # Assign all tensors to class variables
-        if self.features_set=='arturius' or self.features_set=='full':
-            self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
-            self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures
-        if self.features_set=='simple':
+#        if self.features_set=='arturius' or self.features_set=='full':
+        #self.time_x is the tensor of features, regardless of which feature set, so this can stay same.
+        #But if not doing lagged then can return None for that ???
+        self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
+        self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures
+        """if self.features_set=='simple':
             pass
 #        if self.features_set=='full':
 #            pass
         if self.features_set=='full_w_context':
-            pass
-        
+            pass"""
 
         self.encoder_features_depth = self.time_x.shape[2].value
         print('self.encoder_features_depth',self.encoder_features_depth)

From a9066aea0a9c690f4a1c8012b14e7d0890ccc213 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 9 Jul 2018 12:11:52 -0700
Subject: [PATCH 13/42] input pipe features, arturius and full features all
 samplingperiods work on kaggle data

---
 hparams.py       |  4 +--
 input_pipe.py    | 49 ++++++++++++++++++-----------
 make_features.py | 82 ++++++++++++++++++++++++++----------------------
 3 files changed, 78 insertions(+), 57 deletions(-)

diff --git a/hparams.py b/hparams.py
index dfab587..24e66b6 100755
--- a/hparams.py
+++ b/hparams.py
@@ -5,8 +5,8 @@
 params_s32 = dict(
     batch_size=256,
     #train_window=380,
-    #train_window=283,
-    train_window=65,#try 65 w our data to see if allows more samples through filter
+    train_window=283,
+    #train_window=65,#try 65 w our data to see if allows more samples through filter
     train_skip_first=0,
     rnn_depth=267,
     use_attn=False,
diff --git a/input_pipe.py b/input_pipe.py
index bb827f8..e2b8e5a 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -111,15 +111,15 @@ def cut(self, counts, start, end):
         if self.sampling_period=='daily':
             cropped_dow = self.inp.dow[start:end]
             cropped_woy = self.inp.woy[start:end]
-            cropped_moy = 0*cropped_dow #Month information is alreayd contained in week information. COuld incude anyway to be explicit, but for now do not use as a feature
+#            cropped_moy = 0*cropped_dow #Month information is alreayd contained in week information. COuld incude anyway to be explicit, but for now do not use as a feature
         elif self.sampling_period=='weekly':
             cropped_woy = self.inp.woy[start:end]
-            cropped_dow = 0*cropped_woy
-            cropped_moy = 0*cropped_woy
+#            cropped_dow = 0*cropped_woy
+#            cropped_moy = 0*cropped_woy
         elif self.sampling_period=='monthly':
             cropped_moy = self.inp.moy[start:end]
-            cropped_dow = 0*cropped_moy
-            cropped_woy = 0*cropped_moy            
+#            cropped_dow = 0*cropped_moy
+#            cropped_woy = 0*cropped_moy            
             
 
@@ -244,13 +244,15 @@ def make_features(self, *args):
         #For now just use the same ...
 #        count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance)
         elif self.features_set == 'full':
-            f = ooooooooo
             if self.sampling_period == 'daily':
-                x_counts, y_counts, lagged_counts, dow, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args
+                x_counts, y_counts, lagged_counts, dow, woy, page_ix, count_median, year_autocorr, quarter_autocorr,\
+                count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args
             elif self.sampling_period == 'weekly':
-                x_counts, y_counts, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args        
+                x_counts, y_counts, lagged_counts, woy, page_ix, count_median, year_autocorr, quarter_autocorr,\
+                count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args
             elif self.sampling_period == 'monthly':
-                x_counts, y_counts, lagged_counts, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args 
+                x_counts, y_counts, lagged_counts, moy, page_ix, count_median, year_autocorr, quarter_autocorr,\
+                count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args
         
         # =============================================================================
         # Do train - predict splits
@@ -282,10 +284,19 @@ def make_features(self, *args):
 #                                                count_pctl_75,
 #                                                count_pctl_95,
 #                                                count_pctl_100,
-#                                                count_variance])         
-        scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too
-        flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
-        series_features = tf.expand_dims(flat_features, 0)
+#                                                count_variance])
+        if self.features_set == 'arturius':
+            scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])
+            flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
+            series_features = tf.expand_dims(flat_features, 0)
+        elif self.features_set == 'full':
+            scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance])
+            #flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
+            flat_features = tf.concat([scalar_features], axis=0) 
+            series_features = tf.expand_dims(flat_features, 0)
+        
+        
+        
         
 #            print(scalar_features) #4
 #            print(flat_features) #18
@@ -328,6 +339,9 @@ def make_features(self, *args):
         
         #!!!!! why no lagged_y alnoe, only in y_features??? 
         #!!!! why no norm_y_counts ?????
+        
+        print('x_features')
+        print(x_features)
         return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix
         #Must match up with setting self.XYZ = it_tensors below in __init__. 
 
@@ -407,6 +421,8 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
                  .batch(batch_size)
                  .prefetch(runs_in_burst * 2)
                  )
+        print('---------------- Done batching ----------------')
+        print(batch)
         self.iterator = batch.make_initializable_iterator()
         it_tensors = self.iterator.get_next()
 
@@ -452,10 +468,7 @@ def page_features(inp: VarFeeder, features_set):
         raise Exception('not ready yet')
         
     elif features_set=='full':
-#        print(inp.counts)
-        dummy = tf.zeros_like(inp.counts)
-#        print(dummy)
-        d = (inp.counts, dummy, dummy, dummy,
+        d = (inp.counts,
             inp.page_ix,
             inp.count_median,
             inp.year_autocorr, inp.quarter_autocorr,
@@ -465,7 +478,7 @@ def page_features(inp: VarFeeder, features_set):
             inp.count_pctl_75,
             inp.count_pctl_95,
             inp.count_pctl_100,
-            inp.count_variance)    
+            inp.count_variance)          
         
     elif features_set=='full_w_context':
         raise Exception('not ready yet')
diff --git a/make_features.py b/make_features.py
index 61cb2e4..3845f34 100755
--- a/make_features.py
+++ b/make_features.py
@@ -33,7 +33,10 @@ def read_all(data_type,sampling_period) -> pd.DataFrame:
     Reads source data for training/prediction
     """
     def read_file(file):
-        df = read_cached(file).set_index('Page')
+        try:
+            df = read_cached(file).set_index('Page')
+        except AttributeError:
+            raise Exception('File not exist, did you specify correct sampling_period?')        
         df.columns = df.columns.astype('M8[D]')
         return df
 
@@ -345,47 +348,47 @@ def run():
     WEEK_NUMBER_MAX = 53. #52.
     
     
+    features_times = pd.date_range(data_start, features_end, freq='D')
+    
     if args.sampling_period=='daily':
-        
-        features_days = pd.date_range(data_start, features_end, freq='D')
-        #dow = normalize(features_days.dayofweek.values)
+        #dow = normalize(features_times.dayofweek.values)
         week_period = 7 / (2 * np.pi)
-        dow_norm = features_days.dayofweek.values / week_period #S.dayofweek gives day of the week with Monday=0, Sunday=6
+        dow_norm = features_times.dayofweek.values / week_period #S.dayofweek gives day of the week with Monday=0, Sunday=6
         dow = np.stack([np.cos(dow_norm), np.sin(dow_norm)], axis=-1)
         
         #index of week number, when sampling at DAILY level
         if WEEK_NUMBER_METHOD=='pandas':
-            week = features_days.weekofyear.values
+            week = features_times.weekofyear.values
         elif WEEK_NUMBER_METHOD=='floor7':
-            week = np.floor((features_days.dayofyear.values - 1.) /7.)
+            week = np.floor((features_times.dayofyear.values - 1.) /7.)
         year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
         woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday
         woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)
     
         #To catch longer term trending data, can also include year number. [depending on size of train / prediction windows and random sampling boundaries could be same value over whole series]
-        year_nmumber = features_days.year
+        year_number = features_times.year
     
     
     if args.sampling_period=='weekly':
         #index of week number, when sampling at WEEKLY level (this is different than above)
-        fff = pd.date_range(data_start, features_end, freq='W')
+#        features_times = pd.date_range(data_start, features_end, freq='W')
         #!!!!!!!!!!!!! still need to worry about alignment ... 
         if WEEK_NUMBER_METHOD=='pandas':
-            week = fff.weekofyear.values
+            week = features_times.weekofyear.values
         elif WEEK_NUMBER_METHOD=='floor7':
-            week = np.floor((fff.dayofyear.values - 1.) /7.)
+            week = np.floor((features_times.dayofyear.values - 1.) /7.)
         year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
         woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday
         woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)
-        year_nmumber = features_days.year
+        year_number = features_times.year
     
     if args.sampling_period=='monthly':
         #month index (only used if sampling monthly)
-        fff = pd.date_range(data_start, features_end, freq='M') #!!!!! need to think about alignment of starting month on particular dates ....
+#        features_times = pd.date_range(data_start, features_end, freq='M') #!!!!! need to think about alignment of starting month on particular dates ....
         period = 12. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
-        moy_norm = fff.month.values / period #not sure if by default this starts on Monday vs Sunday
+        moy_norm = features_times.month.values / period #not sure if by default this starts on Monday vs Sunday
         moy = np.stack([np.cos(moy_norm), np.sin(moy_norm)], axis=-1)    
-        year_nmumber = features_days.year
+        year_number = features_times.year
 
     
@@ -412,9 +415,9 @@ def run():
             count_median=count_median,
             year_autocorr=year_autocorr,
             quarter_autocorr=quarter_autocorr,
-            dow=dow,#N x 2 array since encoded week periodicity as complex number
+            #dow=dow,#N x 2 array since encoded week periodicity as complex number
             
-            woy=woy,#!!!!!!!!
+            #woy=woy,#!!!!!!!!
             count_pctl_100=percentiles[5],#max #!!!!!!!!!!!!!!!! just to see what happens: apend one of my features.
         )
     
@@ -422,7 +425,7 @@ def run():
         tensors = dict(
             counts=df,
             count_median=count_median,#this is just the median feature, can put in others too
-            dow=dow,
+            #dow=dow,
         )    
         
     elif (args.features_set == 'full') or (args.features_set == 'full_w_context'):
@@ -453,35 +456,40 @@ def run():
 
         )  
         
-        if args.sampling_period=='daily':
-            tensors['dow']=dow
-            tensors['woy']=woy #and want want week number too, aggregating last ~10 days into week 52
-        elif args.sampling_period=='weekly':
-            tensors['woy']=woy
-        elif args.sampling_period=='monthly':
-            tensors['moy']=moy
-        else:
-            raise Exception('Must specify correct sampling period')
+    else:
+        raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full","full_w_context"]')
+
+
+
+        
+    if args.sampling_period=='daily':
+        tensors['dow']=dow
+        tensors['woy']=woy #and want want week number too, aggregating last ~10 days into week 52
+    elif args.sampling_period=='weekly':
+        tensors['woy']=woy
+    elif args.sampling_period=='monthly':
+        tensors['moy']=moy
+    else:
+        raise Exception('Must specify correct sampling period')
             
             
-        #If provide other info based on e.g. new location (any features that are not derived purely from the time series)
-        if args.features_set == 'full_w_context':
-            tensors['country'] = asdasdasd
-            tensors['region'] = asdasdasd
-            tensors['city_population'] = asdasdasd
-            raise Exception('not implemented yet')
-            #... can write scraper function to get these ...
+    """#If provide other info based on e.g. new location (any features that are not derived purely from the time series)
+    if args.features_set == 'full_w_context':
+        tensors['country'] = asdasdasd
+        tensors['region'] = asdasdasd
+        tensors['city_population'] = asdasdasd
+        raise Exception('not implemented yet')
+        #... can write scraper function to get these ..."""
         
         
-    else:
-        raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full","full_w_context"]')
+
     
     
     plain = dict(
-        features_days=len(features_days),
+        features_times=len(features_times),
         data_days=len(df.columns),
         N_time_series=len(df),
         data_start=data_start,

From 06eacdabed45debdc25c3021387548940e7b8458 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 9 Jul 2018 14:12:01 -0700
Subject: [PATCH 14/42] predict script add arguments needed; trainer minor
 updates

---
 PREDICT.py |  8 ++++----
 Readme.md  | 14 ++++++++++++--
 hparams.py |  6 +++---
 trainer.py | 21 +++++++++++++++------
 4 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/PREDICT.py b/PREDICT.py
index e8adbb6..ca3c071 100755
--- a/PREDICT.py
+++ b/PREDICT.py
@@ -26,8 +26,8 @@
 
 
 FEATURES_SET = 'arturius'# 'arturius' 'simple' 'full'
-
-
+SAMPLING_PERIOD = 'daily'
+DATA_TYPE = 'ours' #'kaggle' #'ours'
 
 
 # =============================================================================
@@ -52,7 +52,7 @@ def mean_smape(true, pred):
 #read_all funcion loads the (hardcoded) file "data/all.pkl", or otherwise train2.csv
 print('loading data...')
 from make_features import read_all
-df_all = read_all()
+df_all = read_all(DATA_TYPE,SAMPLING_PERIOD)
 print('df_all.columns')
 print(df_all.columns)
 
@@ -69,7 +69,7 @@ def mean_smape(true, pred):
 t_preds = []
 for tm in range(3):
     tf.reset_default_graph()
-    t_preds.append(predict(FEATURES_SET, paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63,
+    t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63,
                     n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True))
 
 
diff --git a/Readme.md b/Readme.md
index 1e11cb9..6ccd8e4 100755
--- a/Readme.md
+++ b/Readme.md
@@ -50,8 +50,19 @@ GK modifications for own data:
 python3 make_features.py data/vars kaggle daily full --add_days=63
 python3 make_features.py data/vars ours daily full --add_days=63
 
+#Just in case making new features
+cd data
+rm -R vars/
+rm -R cpt/
+rm -R cpt_tmp/
+rm -R logs/
+rm *.pkl
+cd ..
+ll data/
+
 #no reason to expect 10000 to 11500 is good range to save out. View loss along the way
 python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
 --name TEST_attn_head --hparam_set=TEST_attn_head
 --name TEST_stacked --hparam_set=TEST_stacked
 
@@ -65,9 +76,8 @@ python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asg
 
 
 To do:
-0.  -- got working with few examples of our added features (one static, one time varying 2D), now just organize programmatically
 0. save log files to view SMAPE etc metrics during training
 1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks]
-2. PREPROCESS.py  -  allow downsample in time to weekly, monthly
+2. for weekly. monthly inputs, will there be issue in Kaggle code???
 3. Prediction intervals
 4. Architecture improvements
\ No newline at end of file
diff --git a/hparams.py b/hparams.py
index 24e66b6..901200c 100755
--- a/hparams.py
+++ b/hparams.py
@@ -3,10 +3,10 @@
 
 # Manually selected params
 params_s32 = dict(
-    batch_size=256,
+    batch_size=64,#256,
     #train_window=380,
-    train_window=283,
-    #train_window=65,#try 65 w our data to see if allows more samples through filter
+    #train_window=283,
+    train_window=30,#try 65 w our data to see if allows more samples through filter
     train_skip_first=0,
     rnn_depth=267,
     use_attn=False,
diff --git a/trainer.py b/trainer.py
index fae64c1..d98ffd4 100755
--- a/trainer.py
+++ b/trainer.py
@@ -422,7 +422,7 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
 
     real_train_pages = splitter.splits[0].train_size
     real_eval_pages = splitter.splits[0].test_size
-
+    
     items_per_eval = real_eval_pages * eval_pct
     eval_batches = int(np.ceil(items_per_eval / eval_batch_size))
     steps_per_epoch = real_train_pages // batch_size
@@ -432,9 +432,17 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
     global_step = tf.train.get_or_create_global_step()
     inc_step = tf.assign_add(global_step, 1)
 
-
     all_models: List[ModelTrainerV2] = []
 
+    print('real_train_pages', real_train_pages)
+    print('real_eval_pages', real_eval_pages)
+    print('batch_size', batch_size)
+    print('items_per_eval', items_per_eval)
+    print('eval_batches', eval_batches)
+    print('steps_per_epoch', steps_per_epoch)
+    print('eval_every_step', eval_every_step)
+
+
     def create_model(features_set, sampling_period, scope, index, prefix, seed):
 
         with tf.variable_scope('input') as inp_scope:
@@ -448,11 +456,12 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed):
                                  rand_seed=seed, train_skip_first=hparams.train_skip_first,
                                  back_offset=predict_window if forward_split else 0)
                 inp_scope.reuse_variables()
+                TCT = .3 #0.01
                 if side_split:
                     side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
                                                mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                verbose=verbose, predict_window=predict_window,
-                                               train_completeness_threshold=0.01, predict_completeness_threshold=0,
+                                               train_completeness_threshold=TCT, predict_completeness_threshold=0,
                                                train_window=train_window, rand_seed=seed, runs_in_burst=eval_batches,
                                                back_offset=predict_window * (2 if forward_split else 1))
                 else:
@@ -461,7 +470,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed):
                     forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
                                                   mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
                                                   verbose=verbose, predict_window=predict_window,
-                                                  train_completeness_threshold=0.01, predict_completeness_threshold=0,
+                                                  train_completeness_threshold=TCT, predict_completeness_threshold=0,
                                                   train_window=train_window, rand_seed=seed, runs_in_burst=eval_batches,
                                                   back_offset=predict_window)
                 else:
@@ -637,12 +646,12 @@ def ema_vars(model):
                 has_best_indicator = '↑'
             else:
                 has_best_indicator = ' '
-            status = "%2d: Best top SMAPE=%.3f%s (%s)" % (
+            status = "%2d: Best top  %.3f%s (%s)" % (
                 epoch + 1, current_top, has_best_indicator,
                 ",".join(["%.3f" % m.top for m in eval_smape.metrics]))
 
             if trainer.has_active():
-                status += ", frwd/side best MAE=%.3f/%.3f, SMAPE=%.3f/%.3f; avg MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, %d am" % \
+                status += ", frwd/side best MAE=%.3f/%.3f, SMAPE=%.3f/%.3f; avg MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, %d active models" % \
                           (eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch,
                            eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch,
                            trainer.has_active())

From c1c6f370d8dedc5ce2222993ba77ef4a583308d5 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 9 Jul 2018 17:20:22 -0700
Subject: [PATCH 15/42] --

---
 PREDICT.py    | 7 ++++---
 PREPROCESS.py | 4 +++-
 Readme.md     | 2 +-
 hparams.py    | 2 +-
 trainer.py    | 5 ++++-
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/PREDICT.py b/PREDICT.py
index ca3c071..28ca514 100755
--- a/PREDICT.py
+++ b/PREDICT.py
@@ -28,6 +28,7 @@
 FEATURES_SET = 'arturius'# 'arturius' 'simple' 'full'
 SAMPLING_PERIOD = 'daily'
 DATA_TYPE = 'ours' #'kaggle' #'ours'
+Nmodels = 3.
 
 
 # =============================================================================
@@ -67,16 +68,16 @@ def mean_smape(true, pred):
 #preds = predict(paths, default_hparams(), back_offset=0,
 #                    n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True)
 t_preds = []
-for tm in range(3):
+for tm in range(3):  #!!!!!!!! Nmodels
     tf.reset_default_graph()
     t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63,
                     n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True))
 
 
 # =============================================================================
-# average the 3 models predictions
+# average the N models predictions
 # =============================================================================
-preds = sum(t_preds)/3.
+preds = sum(t_preds)/Nmodels
 
 
 # =============================================================================
diff --git a/PREPROCESS.py b/PREPROCESS.py
index 6d4b1cd..bc1c8fb 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -390,6 +390,8 @@ def remove_downsample_columns(df, out_of_range_fill_value):
             #for now assume we do ipmutation THEN aggregation:
             #df = aggregate(df,sampling_period)
 
+        df*= 0.
+        df += 237.
         print(df)
 
         #SHould end up with a csv that is rows are series (each id), cols are dates
@@ -443,7 +445,7 @@ def make_key_csv(df):
     START_DATE = '2015-01-01' #None
     END_DATE = '2017-12-31' #None
     REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful
-    SAMPLING_PERIOD = 'weekly' #'daily', 'weekly', 'monthly'
+    SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly'
 
 
     # =============================================================================
diff --git a/Readme.md b/Readme.md
index 6ccd8e4..719bae9 100755
--- a/Readme.md
+++ b/Readme.md
@@ -62,7 +62,7 @@ ll data/
 
 #no reason to expect 10000 to 11500 is good range to save out. View loss along the way
 python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50
 --name TEST_attn_head --hparam_set=TEST_attn_head
 --name TEST_stacked --hparam_set=TEST_stacked
 
diff --git a/hparams.py b/hparams.py
index 901200c..21600cd 100755
--- a/hparams.py
+++ b/hparams.py
@@ -8,7 +8,7 @@
     #train_window=283,
     train_window=30,#try 65 w our data to see if allows more samples through filter
     train_skip_first=0,
-    rnn_depth=267,
+    rnn_depth=27,#267,
     use_attn=False,
     attention_depth=64,
     attention_heads=1,
diff --git a/trainer.py b/trainer.py
index d98ffd4..fd5d3d5 100755
--- a/trainer.py
+++ b/trainer.py
@@ -434,6 +434,9 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
 
     all_models: List[ModelTrainerV2] = []
 
+    print('eval_pct', eval_pct)
+    print('eval_k', eval_k)
+    print('eval_batch_size', eval_batch_size)
     print('real_train_pages', real_train_pages)
     print('real_eval_pages', real_eval_pages)
     print('batch_size', batch_size)
@@ -646,7 +649,7 @@ def ema_vars(model):
                 has_best_indicator = '↑'
             else:
                 has_best_indicator = ' '
-            status = "%2d: Best top  %.3f%s (%s)" % (
+            status = "%2d: Best top SMAPE=%.3f%s (%s)" % (
                 epoch + 1, current_top, has_best_indicator,
                 ",".join(["%.3f" % m.top for m in eval_smape.metrics]))
 

From c68ad1e4989a12ededb7400055353829c7fd0fda Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 12 Jul 2018 15:11:02 -0700
Subject: [PATCH 16/42] debugging, cleanup, adding other optimizers

---
 Adam_HD_optimizer.py     |  92 ++++++++++++++++++++++++++++++
 PREPROCESS.py            |   8 ++-
 SGDN_HD_optimizer.py     |  76 +++++++++++++++++++++++++
 classification_models.py | 120 +++++++++++++++++++++++++++++++++++++++
 drnn.py                  | 101 ++++++++++++++++++++++++++++++++
 hparams.py               |  15 +++--
 input_pipe.py            |  67 ++++++++++++++++++----
 make_features.py         |  17 +++---
 model.py                 |  92 ++++++++++++++++++++++++++----
 trainer.py               |   4 +-
 10 files changed, 550 insertions(+), 42 deletions(-)
 create mode 100644 Adam_HD_optimizer.py
 create mode 100644 SGDN_HD_optimizer.py
 create mode 100644 classification_models.py
 create mode 100644 drnn.py

diff --git a/Adam_HD_optimizer.py b/Adam_HD_optimizer.py
new file mode 100644
index 0000000..341f44b
--- /dev/null
+++ b/Adam_HD_optimizer.py
@@ -0,0 +1,92 @@
+#Copy paste from https://github.com/zadaianchuk/HyperGradientDescent/blob/master/Adam_HD_optimizer.py
+#Hypergradient Descent Optimizer
+
+
+
+
+from __future__ import division
+
+import tensorflow as tf
+
+class AdamHDOptimizer(tf.train.GradientDescentOptimizer):
+
+    def __init__(self, alpha_0, beta =10**(-7), name="HGD", mu=0.99, eps = 10**(-8),type_of_learning_rate ="global"):
+        super(AdamHDOptimizer, self).__init__(beta, name=name)
+
+        self._mu = mu
+        self._alpha_0 = alpha_0
+        self._beta = beta
+        self._eps = eps
+        self._type = type_of_learning_rate
+
+
+    def minimize(self, loss, global_step):
+
+        # Algo params as constant tensors
+        mu = tf.convert_to_tensor(self._mu, dtype=tf.float32)
+        alpha_0=tf.convert_to_tensor(self._alpha_0, dtype=tf.float32)
+        beta=tf.convert_to_tensor(self._beta, dtype=tf.float32)
+        eps = tf.convert_to_tensor(self._eps, dtype=tf.float32)
+
+        var_list = tf.trainable_variables()
+
+        # create and retrieve slot variables for:
+        # direction of previous step
+        ds = [self._get_or_make_slot(var,
+                  tf.constant(0.0, tf.float32, var.get_shape()), "direction", "direction")
+                  for var in var_list]
+        # current learning_rate alpha
+        if self._type == "global":
+            alpha = self._get_or_make_slot(alpha_0, alpha_0, "learning_rate", "learning_rate")
+        else:
+            alphas = [self._get_or_make_slot(var,
+                      tf.constant(self._alpha_0, tf.float32, var.get_shape()), "learning_rates", "learning_rates")
+                      for var in var_list]
+        #  moving average estimation
+        ms = [self._get_or_make_slot(var,
+            tf.constant(0.0, tf.float32, var.get_shape()), "m", "m")
+            for var in var_list]
+        vs = [self._get_or_make_slot(var,
+            tf.constant(0.0, tf.float32, var.get_shape()), "v", "v")
+            for var in var_list]
+        # power of mu for bias-corrected first and second moment estimate
+        mu_power = tf.get_variable("mu_power", shape=(), dtype=tf.float32, trainable=False, initializer=tf.constant_initializer(1.0))
+
+        # update moving averages of first and second moment:
+        grads = tf.gradients(loss, var_list)
+        grads_squared = [tf.square(g) for g in grads]
+        m_updates = [m.assign(mu*m + (1.0-mu)*g) for m, g in zip(ms, grads)] #new means
+        v_updates = [v.assign(mu*v + (1.0-mu)*g2) for v, g2 in zip(vs, grads_squared)]
+        mu_power_update = [tf.assign(mu_power,tf.multiply(mu_power,mu))]
+        # bais correction of the estimates
+        with tf.control_dependencies(v_updates+m_updates+mu_power_update):
+            ms_hat = [tf.divide(m,tf.constant(1.0) - mu_power) for m in ms]
+            vs_hat = [tf.divide(v,tf.constant(1.0) - mu_power) for v in vs]
+
+        #update of learning rate alpha, main difference between ADAM and ADAM-HD
+        if self._type == "global":
+            hypergrad = sum([tf.reduce_sum(tf.multiply(d,g)) for d,g in zip(ds, grads)])
+            alphas_update = [alpha.assign(alpha-beta*hypergrad)]
+        else:
+            hypergrads = [tf.multiply(d,g) for d,g in zip(ds, grads)]
+            alphas_update = [alpha.assign(alpha-beta*hypergrad) for alpha,hypergrad in zip(alphas,hypergrads)]
+
+        # update step directions
+        with tf.control_dependencies(alphas_update): #we want to be sure that alphas calculated using previous step directions
+            ds_updates=[d.assign(-tf.divide(m, tf.sqrt(v) + self._eps)) for (m,v,d) in zip(ms_hat,vs_hat,ds)]
+
+        # update parameters of the model
+        with tf.control_dependencies(ds_updates):
+                if self._type == "global":
+                    dirs = [alpha*d for  d in ds]
+                    alpha_norm = alpha
+                else:
+                    dirs = [alpha*d for  d, alpha in zip(ds,alphas)]
+                    alpha_norm = sum([tf.reduce_mean(alpha**2) for alpha in alphas])
+                variable_updates = [v.assign_add(d) for v, d in zip(var_list, dirs)]
+                global_step.assign_add(1)
+                # add summaries  (track alphas changes)
+                with tf.name_scope("summaries"):
+                    with tf.name_scope("per_iteration"):
+                        alpha_norm_sum=tf.summary.scalar("alpha", alpha_norm, collections=[tf.GraphKeys.SUMMARIES, "per_iteration"])
+        return tf.group(*variable_updates)
\ No newline at end of file
diff --git a/PREPROCESS.py b/PREPROCESS.py
index bc1c8fb..0f0c29e 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -352,6 +352,11 @@ def remove_downsample_columns(df, out_of_range_fill_value):
             dd = pd.DataFrame(dates).T 
             dd['Page'] = u
             
+            #Make a good eay cae to overfit
+            dd*= 0.
+            dd += u
+            
+            
             #If doing imputation / other
             #for each series individually
             #...
@@ -390,8 +395,7 @@ def remove_downsample_columns(df, out_of_range_fill_value):
             #for now assume we do ipmutation THEN aggregation:
             #df = aggregate(df,sampling_period)
 
-        df*= 0.
-        df += 237.
+
         print(df)
 
         #SHould end up with a csv that is rows are series (each id), cols are dates
diff --git a/SGDN_HD_optimizer.py b/SGDN_HD_optimizer.py
new file mode 100644
index 0000000..549e42b
--- /dev/null
+++ b/SGDN_HD_optimizer.py
@@ -0,0 +1,76 @@
+#Copy paste from https://github.com/zadaianchuk/HyperGradientDescent/blob/master/SGDN_HD_optimizer.py
+#Hypergradient Descent Optimizer
+
+
+from __future__ import division
+
+import tensorflow as tf
+
+class MomentumSGDHDOptimizer(tf.train.GradientDescentOptimizer):
+
+    def __init__(self, alpha_0, beta =10**(-7), name="HGD", mu=0.95, type_of_learning_rate ="global"):
+        super(MomentumSGDHDOptimizer, self).__init__(beta, name=name)
+        self._mu = mu
+        self._alpha_0 = alpha_0
+        self._beta = beta
+        self._type = type_of_learning_rate
+
+
+    def minimize(self, loss, global_step):
+
+        # Algo params as constant tensors
+        mu = tf.convert_to_tensor(self._mu, dtype=tf.float32)
+        alpha_0=tf.convert_to_tensor(self._alpha_0, dtype=tf.float32)
+        beta=tf.convert_to_tensor(self._beta, dtype=tf.float32)
+
+        var_list = tf.trainable_variables()
+
+        # create and retrieve slot variables for:
+        # direction of previous step
+        ds = [self._get_or_make_slot(var,
+                  tf.constant(0.0, tf.float32, var.get_shape()), "direction", "direction")
+                  for var in var_list]
+        # current learning_rate alpha
+        if self._type == "global":
+            alpha = self._get_or_make_slot(alpha_0, alpha_0, "learning_rate", "learning_rate")
+        else:
+            alphas = [self._get_or_make_slot(var,
+                      tf.constant(self._alpha_0, tf.float32, var.get_shape()), "learning_rates", "learning_rates")
+                      for var in var_list]
+        # moving average estimation
+        ms = [self._get_or_make_slot(var,
+            tf.constant(0.0, tf.float32, var.get_shape()), "m", "m")
+            for var in var_list]
+
+        # update moving averages of the stochastic gradient:
+        grads = tf.gradients(loss, var_list)
+        m_updates = [m.assign(mu*m + (1.0-mu)*g) for m, g in zip(ms, grads)]
+
+        #update of learning rate alpha, it is the main difference between SGD with Nesterov momentum
+        #and its hypergradient version
+        if self._type == "global":
+            hypergrad = sum([tf.reduce_sum(tf.multiply(d,g)) for d,g in zip(ds, grads)])
+            alphas_update = [alpha.assign(alpha-beta*hypergrad)]
+        else:
+            hypergrads = [tf.multiply(d,g) for d,g in zip(ds, grads)]
+            alphas_update = [alpha.assign(alpha-beta*hypergrad) for alpha,hypergrad in zip(alphas,hypergrads)]
+
+        # update step directions
+        with tf.control_dependencies(m_updates+alphas_update):  #we want to be sure that alphas calculated using previous step directions
+            ds_updates=[d.assign(-(mu*m + (1.0-mu)*g)) for (m,d,g) in zip(ms,ds,grads)]
+
+        # update parameters of the model
+        with tf.control_dependencies(ds_updates):
+                if self._type == "global":
+                    alpha_norm = alpha
+                    variable_updates = [v.assign_add(alpha*d) for v, d in zip(var_list, ds)]
+                else:
+                    alpha_norm = sum([tf.reduce_mean(alpha**2) for alpha in alphas])
+                    variable_updates = [v.assign_add(alpha*d) for v,d,alpha in zip(var_list, ds,alphas)]
+                global_step.assign_add(1)
+
+                #add summuries  (track alphas changes)
+                with tf.name_scope("summaries"):
+                    with tf.name_scope("per_iteration"):
+                        alpha_sum=tf.summary.scalar("alpha", alpha_norm, collections=[tf.GraphKeys.SUMMARIES, "per_iteration"])
+        return tf.group(*variable_updates)
\ No newline at end of file
diff --git a/classification_models.py b/classification_models.py
new file mode 100644
index 0000000..e6b633b
--- /dev/null
+++ b/classification_models.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+"""
+Dilated LSTM Cells in Tensorflow
+
+From github user "code-terminator"
+https://github.com/code-terminator/DilatedRNN/blob/master/models/drnn.py
+
+based on the paper
+Dilated Recurrent Neural Networks
+Nov 2017
+Chang et al.
+https://arxiv.org/pdf/1710.02224.pdf 
+"""
+
+import tensorflow as tf
+from drnn import multi_dRNN_with_dilations
+
+def _contruct_cells(hidden_structs, cell_type):
+    """
+    This function contructs a list of cells.
+    """
+    # error checking
+    if cell_type not in ["RNN", "LSTM", "GRU"]:
+        raise ValueError("The cell type is not currently supported.")
+
+    # define cells
+    cells = []
+    for hidden_dims in hidden_structs:
+        if cell_type == "RNN":
+            cell = tf.contrib.rnn.BasicRNNCell(hidden_dims)
+        elif cell_type == "LSTM":
+            cell = tf.contrib.rnn.BasicLSTMCell(hidden_dims)
+        elif cell_type == "GRU":
+            cell = tf.contrib.rnn.GRUCell(hidden_dims)
+        cells.append(cell)
+
+    return cells
+
+
+def _rnn_reformat(x, input_dims, n_steps):
+    """
+    This function reformat input to the shape that standard RNN can take. 
+    
+    Inputs:
+        x -- a tensor of shape (batch_size, n_steps, input_dims).
+    Outputs:
+        x_reformat -- a list of 'n_steps' tenosrs, each has shape (batch_size, input_dims).
+    """
+    # permute batch_size and n_steps
+    x_ = tf.transpose(x, [1, 0, 2])
+    # reshape to (n_steps*batch_size, input_dims)
+    x_ = tf.reshape(x_, [-1, input_dims])
+    # split to get a list of 'n_steps' tensors of shape (batch_size, input_dims)
+    x_reformat = tf.split(x_, n_steps, 0)
+
+    return x_reformat
+
+
+def drnn_classification(x,
+                        hidden_structs,
+                        dilations,
+                        n_steps,
+                        n_classes,
+                        input_dims=1,
+                        cell_type="RNN"):
+    """
+    This function construct a multilayer dilated RNN for classifiction.  
+    Inputs:
+        x -- a tensor of shape (batch_size, n_steps, input_dims).
+        hidden_structs -- a list, each element indicates the hidden node dimension of each layer.
+        dilations -- a list, each element indicates the dilation of each layer.
+        n_steps -- the length of the sequence.
+        n_classes -- the number of classes for the classification.
+        input_dims -- the input dimension.
+        cell_type -- the type of the RNN cell, should be in ["RNN", "LSTM", "GRU"].
+    
+    Outputs:
+        pred -- the prediction logits at the last timestamp and the last layer of the RNN.
+                'pred' does not pass any output activation functions.
+    """
+    # error checking
+    assert (len(hidden_structs) == len(dilations))
+
+    # reshape inputs
+    x_reformat = _rnn_reformat(x, input_dims, n_steps)
+
+    # construct a list of cells
+    cells = _contruct_cells(hidden_structs, cell_type)
+
+    # define dRNN structures
+    layer_outputs = multi_dRNN_with_dilations(cells, x_reformat, dilations)
+
+    if dilations[0] == 1:
+        # dilation starts at 1, no data dependency lost
+        # define the output layer
+        weights = tf.Variable(tf.random_normal(shape=[hidden_structs[-1],
+                                                      n_classes]))
+        bias = tf.Variable(tf.random_normal(shape=[n_classes]))
+        # define prediction
+        pred = tf.add(tf.matmul(layer_outputs[-1], weights), bias)
+    else:
+        # dilation starts not at 1, needs to fuse the output
+        
+        # define output layer
+        weights = tf.Variable(tf.random_normal(shape=[hidden_structs[
+            -1] * dilations[0], n_classes]))
+        bias = tf.Variable(tf.random_normal(shape=[n_classes]))
+
+        # concat hidden_outputs
+        for idx, i in enumerate(range(-dilations[0], 0, 1)):
+            if idx == 0:
+                hidden_outputs_ = layer_outputs[i]
+            else:
+                hidden_outputs_ = tf.concat(
+                    [hidden_outputs_, layer_outputs[i]],
+                    axis=1)
+
+        pred = tf.add(tf.matmul(hidden_outputs_, weights), bias)
+
+    return pred
\ No newline at end of file
diff --git a/drnn.py b/drnn.py
new file mode 100644
index 0000000..3fb5799
--- /dev/null
+++ b/drnn.py
@@ -0,0 +1,101 @@
+"""
+Dilated LSTM Cells in Tensorflow
+
+From github user "code-terminator"
+https://github.com/code-terminator/DilatedRNN/blob/master/models/drnn.py
+
+based on the paper
+Dilated Recurrent Neural Networks
+Nov 2017
+Chang et al.
+https://arxiv.org/pdf/1710.02224.pdf 
+"""
+
+import copy
+import itertools
+import numpy as np
+import tensorflow as tf
+
+def dRNN(cell, inputs, rate, scope='default'):
+    """
+    This function constructs a layer of dilated RNN.
+    Inputs:
+        cell -- the dilation operations is implemented independent of the RNN cell.
+            In theory, any valid tensorflow rnn cell should work.
+        inputs -- the input for the RNN. inputs should be in the form of
+            a list of 'n_steps' tenosrs. Each has shape (batch_size, input_dims)
+        rate -- the rate here refers to the 'dilations' in the orginal WaveNet paper. 
+        scope -- variable scope.
+    Outputs:
+        outputs -- the outputs from the RNN.
+    """
+    n_steps = len(inputs)
+    if rate < 0 or rate >= n_steps:
+        raise ValueError('The \'rate\' variable needs to be adjusted.')
+    print "Building layer: %s, input length: %d, dilation rate: %d, input dim: %d." % (
+        scope, n_steps, rate, inputs[0].get_shape()[1])
+
+    # make the length of inputs divide 'rate', by using zero-padding
+    EVEN = (n_steps % rate) == 0
+    if not EVEN:
+        # Create a tensor in shape (batch_size, input_dims), which all elements are zero.  
+        # This is used for zero padding
+        zero_tensor = tf.zeros_like(inputs[0])
+        dialated_n_steps = n_steps // rate + 1
+        print "=====> %d time points need to be padded. " % (
+            dialated_n_steps * rate - n_steps)
+        print "=====> Input length for sub-RNN: %d" % (dialated_n_steps)
+        for i_pad in xrange(dialated_n_steps * rate - n_steps):
+            inputs.append(zero_tensor)
+    else:
+        dialated_n_steps = n_steps // rate
+        print "=====> Input length for sub-RNN: %d" % (dialated_n_steps)
+
+    # now the length of 'inputs' divide rate
+    # reshape it in the format of a list of tensors
+    # the length of the list is 'dialated_n_steps' 
+    # the shape of each tensor is [batch_size * rate, input_dims] 
+    # by stacking tensors that "colored" the same
+
+    # Example: 
+    # n_steps is 5, rate is 2, inputs = [x1, x2, x3, x4, x5]
+    # zero-padding --> [x1, x2, x3, x4, x5, 0]
+    # we want to have --> [[x1; x2], [x3; x4], [x_5; 0]]
+    # which the length is the ceiling of n_steps/rate
+    dilated_inputs = [tf.concat(inputs[i * rate:(i + 1) * rate],
+                                axis=0) for i in range(dialated_n_steps)]
+
+    # building a dialated RNN with reformated (dilated) inputs
+    dilated_outputs, _ = tf.contrib.rnn.static_rnn(
+        cell, dilated_inputs,
+        dtype=tf.float32, scope=scope)
+
+    # reshape output back to the input format as a list of tensors with shape [batch_size, input_dims]
+    # split each element of the outputs from size [batch_size*rate, input_dims] to 
+    # [[batch_size, input_dims], [batch_size, input_dims], ...] with length = rate
+    splitted_outputs = [tf.split(output, rate, axis=0)
+                        for output in dilated_outputs]
+    unrolled_outputs = [output
+                        for sublist in splitted_outputs for output in sublist]
+    # remove padded zeros
+    outputs = unrolled_outputs[:n_steps]
+
+    return outputs
+
+
+def multi_dRNN_with_dilations(cells, inputs, dilations):
+    """
+    This function constucts a multi-layer dilated RNN. 
+    Inputs:
+        cells -- A list of RNN cells.
+        inputs -- A list of 'n_steps' tensors, each has shape (batch_size, input_dims).
+        dilations -- A list of integers with the same length of 'cells' indicates the dilations for each layer.
+    Outputs:
+        x -- A list of 'n_steps' tensors, as the outputs for the top layer of the multi-dRNN.
+    """
+    assert (len(cells) == len(dilations))
+    x = copy.copy(inputs)
+    for cell, dilation in zip(cells, dilations):
+        scope_name = "multi_dRNN_dilation_%d" % dilation
+        x = dRNN(cell, x, dilation, scope=scope_name)
+    return x
\ No newline at end of file
diff --git a/hparams.py b/hparams.py
index 21600cd..5eaeeef 100755
--- a/hparams.py
+++ b/hparams.py
@@ -3,12 +3,12 @@
 
 # Manually selected params
 params_s32 = dict(
-    batch_size=64,#256,
+    batch_size=123,#256,
     #train_window=380,
-    #train_window=283,
-    train_window=30,#try 65 w our data to see if allows more samples through filter
+    train_window=283,
+    #train_window=30,#try 65 w our data to see if allows more samples through filter
     train_skip_first=0,
-    rnn_depth=27,#267,
+    rnn_depth=267,
     use_attn=False,
     attention_depth=64,
     attention_heads=1,
@@ -49,14 +49,13 @@
 # Test setting with multiple attention heads
 #python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --no_eval --no_forward_split --asgd_decay=0.99 --max_steps=11500 --save_from_step=200
 params_TEST_attn_head = dict(
-    batch_size=256,
-    #train_window=380,
+    batch_size=64,#256,
     train_window=283,
     train_skip_first=0,
-    rnn_depth=267,
+    rnn_depth=27,#267,
     use_attn=True,#!!!!!!!!!!!!!!!! Set True
     attention_depth=64,
-    attention_heads=2,#!!!!!!!!!!!!!!!! Set True
+    attention_heads=1,#!!!!!!
     encoder_readout_dropout=0.4768781146510798,
 
     encoder_rnn_layers=1,
diff --git a/input_pipe.py b/input_pipe.py
index e2b8e5a..4e8b3c2 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -22,6 +22,12 @@ def __init__(self, test_set: List[tf.Tensor], train_set: List[tf.Tensor], test_s
 
 
 class Splitter:
+    """
+    This is the splitter used when side_split
+    (vs. FakeSplitter when not side_split [when forward_split])
+    
+    Is typical train-test split
+    """
     def cluster_pages(self, cluster_idx: tf.Tensor):
         """
         Shuffles pages so all user_agents of each unique pages stays together in a shuffled list
@@ -62,14 +68,42 @@ def prepare_split(i):
             train_sampled_size = int(round(train_size * train_sampling))
             test_idx = splits[i][:test_sampled_size]
             train_idx = complements[i][:train_sampled_size]
+            
+#            print(test_size)
+#            print(train_size)
+#            print(test_sampled_size)
+#            print(train_sampled_size)   
+#            print(test_idx)
+#            print(train_idx)
+            #When doing --side_split validation option, was getting a type error
+            #when creating test_set, tran_set list comprehensions: change dtype here for idx
+            test_idx = tf.cast(test_idx, tf.int32)
+            train_idx = tf.cast(train_idx, tf.int32)
+            
+            test_idx = tf.Print(test_idx, ['test_idx',tf.shape(test_idx),test_idx])
+            train_idx = tf.Print(train_idx, ['train_idx',tf.shape(train_idx),train_idx])
+            """48354
+            96709
+            48354
+            96709
+            Tensor("strided_slice_1:0", shape=(48354,), dtype=float32, device=/device:CPU:0)
+            Tensor("strided_slice_2:0", shape=(96709,), dtype=float32, device=/device:CPU:0)"""     
             test_set = [tf.gather(tensor, test_idx, name=mk_name('test', tensor)) for tensor in tensors]
             tran_set = [tf.gather(tensor, train_idx, name=mk_name('train', tensor)) for tensor in tensors]
+#            print(test_set)
+#            print(tran_set)
             return Split(test_set, tran_set, test_sampled_size, train_sampled_size)
 
         self.splits = [prepare_split(i) for i in range(n_splits)]
 
 
 class FakeSplitter:
+    """
+    This is the splitter used when forward_split
+    (vs. Splitter when not forward_split [when side_split])
+    
+    Is typical train-test split
+    """    
     def __init__(self, tensors: List[tf.Tensor], n_splits, seed, test_sampling=1.0):
         total_series = tensors[0].shape[0].value
         N_time_series = int(round(total_series * test_sampling))
@@ -80,7 +114,7 @@ def mk_name(prefix, tensor):
         def prepare_split(i):
             idx = tf.random_shuffle(tf.range(0, N_time_series, dtype=tf.int32), seed + i)
             train_tensors = [tf.gather(tensor, idx, name=mk_name('shfl', tensor)) for tensor in tensors]
-            if test_sampling < 1.0:
+            if test_sampling < 1.0: #Only use subset of time series = test_sampling
                 sampled_idx = idx[:N_time_series]
                 test_tensors = [tf.gather(tensor, sampled_idx, name=mk_name('shfl_test', tensor)) for tensor in tensors]
             else:
@@ -97,7 +131,7 @@ def cut(self, counts, start, end):
         :param counts: counts timeseries
         :param start: start index
         :param end: end index
-        :return: tuple (train_counts, test_counts, dow, lagged_counts)
+        :return: tuple (train_counts, test_counts, lagged_counts, [dow,woy,moy])
         """
         # Pad counts to ensure we have enough array length for prediction
         counts = tf.concat([counts, tf.fill([self.predict_window], np.NaN)], axis=0)
@@ -178,22 +212,24 @@ def cut_train(self, counts, *args):
         :param args: pass-through data, will be appended to result
         :return: result of cut() + args
         """
-        n_days = self.predict_window + self.train_window
+        n_timesteps = self.predict_window + self.train_window
         # How much free space we have to choose starting day
-        free_space = self.inp.data_days - n_days - self.back_offset - self.start_offset
+        free_space = self.inp.data_days - n_timesteps - self.back_offset - self.start_offset
         if self.verbose:
+            #!!!!!! doesn't really matter since this is just printout, but would need to change for WEEKLY / MONTHLY
             lower_train_start = self.inp.data_start + pd.Timedelta(self.start_offset, 'D')
-            lower_test_end = lower_train_start + pd.Timedelta(n_days, 'D')
+            lower_test_end = lower_train_start + pd.Timedelta(n_timesteps, 'D')
             lower_test_start = lower_test_end - pd.Timedelta(self.predict_window, 'D')
             upper_train_start = self.inp.data_start + pd.Timedelta(free_space - 1, 'D')
-            upper_test_end = upper_train_start + pd.Timedelta(n_days, 'D')
+            upper_test_end = upper_train_start + pd.Timedelta(n_timesteps, 'D')
             upper_test_start = upper_test_end - pd.Timedelta(self.predict_window, 'D')
             print(f"Free space for training: {free_space} days.")
             print(f" Lower train {lower_train_start}, prediction {lower_test_start}..{lower_test_end}")
             print(f" Upper train {upper_train_start}, prediction {upper_test_start}..{upper_test_end}")
         # Random starting point
         offset = tf.random_uniform((), self.start_offset, free_space, dtype=tf.int32, seed=self.rand_seed)
-        end = offset + n_days
+#        offset = tf.Print(offset,['offset',tf.shape(offset),offset])
+        end = offset + n_timesteps
         # Cut all the things
         return self.cut(counts, offset, end) + args
 
@@ -211,6 +247,7 @@ def cut_eval(self, counts, *args):
     def reject_filter(self, x_counts, y_counts, *args):
         """
         Rejects timeseries having too many zero datapoints (more than self.max_train_empty)
+        [by this point, NANs would have already been converted to 0's, this is is NAN's U 0's]
         """
         if self.verbose:
             print("max empty %d train %d predict" % (self.max_train_empty, self.max_predict_empty))
@@ -270,7 +307,7 @@ def make_features(self, *args):
         std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean)))
         norm_x_counts = (x_counts - mean) / std
         norm_y_counts = (y_counts - mean) / std
-        norm_lagged_counts = (lagged_counts - mean) / std   #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ??
+        norm_lagged_counts = (lagged_counts - mean) / std
 
         # Split lagged counts to train and test
         x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
@@ -308,7 +345,7 @@ def make_features(self, *args):
         #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths
 
         # Train features, depending on measurement frequency
-        x_features = tf.expand_dims(norm_x_counts, -1) # [n_days] -> [n_days, 1]
+        x_features = tf.expand_dims(norm_x_counts, -1) # [n_timesteps] -> [n_timesteps, 1]
         if self.sampling_period == 'daily':
             x_features = tf.concat([x_features, x_dow, x_woy], axis=1)
         elif self.sampling_period == 'weekly':
@@ -318,7 +355,7 @@ def make_features(self, *args):
         #Regardess of period/frequency will have below features:
         x_features = tf.concat([x_features, x_lagged,
                                 # Stretch series_features to all training days
-                                # [1, features] -> [n_days, features]
+                                # [1, features] -> [n_timesteps, features]
                                 tf.tile(series_features, [self.train_window, 1])], axis=1)
 
         # Test features
@@ -331,7 +368,7 @@ def make_features(self, *args):
         #Regardess of period/frequency will have below features:
         y_features = tf.concat([y_features, y_lagged,
                                 # Stretch series_features to all testing days
-                                # [1, features] -> [n_days, features]
+                                # [1, features] -> [n_timesteps, features]
                                 tf.tile(series_features, [self.predict_window, 1])
                                 ], axis=1)
 
@@ -407,6 +444,14 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         self.mode = mode
         self.verbose = verbose
 
+
+        print('max_train_empty',self.max_train_empty)
+        print('max_predict_empty',self.max_predict_empty)
+        print('train_window',self.train_window)
+        print('predict_window',self.predict_window)
+        print('attn_window',self.attn_window)
+
+        
         # Reserve more processing threads for eval/predict because of larger batches
         num_threads = 3 if mode == ModelMode.TRAIN else 6
 
diff --git a/make_features.py b/make_features.py
index 3845f34..5eb8642 100755
--- a/make_features.py
+++ b/make_features.py
@@ -113,7 +113,7 @@ def single_autocorr(series, lag):
 def batch_autocorr(data, lag, starts, ends, threshold, backoffset=0):
     """
     Calculate autocorrelation for batch (many time series at once)
-    :param data: Time series, shape [N_time_series, n_days]
+    :param data: Time series, shape [N_time_series, n_timesteps]
     :param lag: Autocorrelation lag
     :param starts: Start index for each series
     :param ends: End index for each series
@@ -123,8 +123,8 @@ def batch_autocorr(data, lag, starts, ends, threshold, backoffset=0):
     autocorrelation value is NaN
     """
     n_series = data.shape[0]
-    n_days = data.shape[1]
-    max_end = n_days - backoffset
+    n_timesteps = data.shape[1]
+    max_end = n_timesteps - backoffset
     corr = np.empty(n_series, dtype=np.float64)
     support = np.empty(n_series, dtype=np.float64)
     for i in range(n_series):
@@ -149,21 +149,21 @@ def find_start_end(data: np.ndarray):
     """
     Calculates start and end of real traffic data. Start is an index of first non-zero, non-NaN value,
      end is index of last non-zero, non-NaN value
-    :param data: Time series, shape [N_time_series, n_days]
+    :param data: Time series, shape [N_time_series, n_timesteps]
     :return:
     """
     N_time_series = data.shape[0]
-    n_days = data.shape[1]
+    n_timesteps = data.shape[1]
     start_idx = np.full(N_time_series, -1, dtype=np.int32)
     end_idx = np.full(N_time_series, -1, dtype=np.int32)
     for page in range(N_time_series):
         # scan from start to the end
-        for day in range(n_days):
+        for day in range(n_timesteps):
             if not np.isnan(data[page, day]) and data[page, day] > 0:
                 start_idx[page] = day
                 break
         # reverse scan, from end to start
-        for day in range(n_days - 1, -1, -1):
+        for day in range(n_timesteps - 1, -1, -1):
             if not np.isnan(data[page, day]) and data[page, day] > 0:
                 end_idx[page] = day
                 break
@@ -284,7 +284,6 @@ def run():
 
     # Get the data
     df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type, args.sampling_period)
-    
 
     # =============================================================================
     # STATIC FEATURES
@@ -294,7 +293,7 @@ def run():
     data_start, data_end = df.columns[0], df.columns[-1]
 
     # We have to project some date-dependent features (day of week, etc) to the future dates for prediction
-    features_end = data_end + pd.Timedelta(args.add_days, unit='D')
+    features_end = data_end + pd.Timedelta(args.add_days, unit='D') #!!!!!!!!!!! will need to change for WEEKLY MONTHLY sampled
     print(f"start: {data_start}, end:{data_end}, features_end:{features_end}")
 
     # Group unique pages by agents
diff --git a/model.py b/model.py
index 6145305..faf9fae 100755
--- a/model.py
+++ b/model.py
@@ -15,6 +15,17 @@
 # RNN = tf.contrib.cudnn_rnn.CudnnRNNRelu
 
 
+
+def debug_tensor_print(tensor):
+    """
+    Debugging mode:
+        Print info about a tensor in realtime
+    """
+    tensor_list = [tensor.name, tf.shape(tensor), tensor]
+    tensor = tf.Print(tensor, tensor_list)
+    return tensor
+
+
 def default_init(seed):
     # replica of tf.glorot_uniform_initializer(seed=seed)
     return layers.variance_scaling_initializer(factor=1.0,
@@ -237,7 +248,10 @@ def decode_predictions(decoder_readout, inp: InputPipe):
     batch_readout = tf.transpose(decoder_readout)
     batch_std = tf.expand_dims(inp.norm_std, -1)
     batch_mean = tf.expand_dims(inp.norm_mean, -1)
-    return batch_readout * batch_std + batch_mean
+    
+    ret = batch_readout * batch_std + batch_mean
+#    ret = tf.Print(ret, ['ret:',tf.shape(ret),ret, 'batch_readout:',batch_readout, 'batch_std:',batch_std, 'batch_mean',batch_mean])
+    return ret
 
 
 def calc_loss(predictions, true_y, additional_mask=None):
@@ -263,7 +277,11 @@ def calc_loss(predictions, true_y, additional_mask=None):
 
 
 def make_train_op(loss, ema_decay=None, prefix=None):
-    optimizer = COCOB()
+    #optimizer = COCOB()
+    ##train.AdamOptimizer train.GradientDescentOptimizer
+    optimizer = tf.train.AdamOptimizer() #!!!!!try simpler optimizer on our data.
+#    optimizer = tf.train.GradientDescentOptimizer(1e-9) #!!!!!try simpler optimizer on our data.
+    
     glob_step = tf.train.get_global_step()
 
     # Add regularization losses
@@ -391,6 +409,9 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
                                                       fingerprint, seed=seed)
 
         # Run decoder
+        #... = decoder(encoder_state, attn_features, prediction_inputs, previous_y)
+        print('inp.norm_x[:, -1]',inp.norm_x[:, -1])
+        print('inp.time_y',inp.time_y)
         decoder_targets, decoder_outputs = self.decoder(encoder_state,
                                                         attn_features if hparams.use_attn else None,
                                                         inp.time_y, inp.norm_x[:, -1])
@@ -399,7 +420,13 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.predict_window)
 
         # Get final denormalized predictions
-        self.predictions = decode_predictions(decoder_targets, inp)
+        vv = decode_predictions(decoder_targets, inp)
+        vv = tf.Print(vv, ['decode_predictions',vv,tf.shape(vv)])
+        self.predictions = vv
+#        print('self.predictions (still log1p(counts))')
+#        print(self.predictions)
+        
+
 
         # Calculate losses and build training op
         if inp.mode == ModelMode.PREDICT:
@@ -416,9 +443,12 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         else:
             self.mae, smape_loss, self.smape, self.loss_item_count = calc_loss(self.predictions, inp.true_y,
                                                                                additional_mask=loss_mask)
+            #from calc_loss:
+            #mae_loss, smape_loss(true_y, predictions, weights), calc_smape_rounded(true_y, predictions, weights), tf.size(true_y)
+            
             if is_train:
                 # Sum all losses
-                total_loss = smape_loss + enc_stab_loss + dec_stab_loss + enc_activation_loss + dec_activation_loss
+                total_loss = smape_loss + enc_stab_loss + dec_stab_loss + enc_activation_loss + dec_activation_loss  #!!!!!!!! put in pinball loss instead of SMAPE when doing quantiles
                 self.train_op, self.glob_norm, self.ema = make_train_op(total_loss, asgd_decay, prefix=graph_prefix)
 
 
@@ -458,6 +488,20 @@ def build_cell(idx):
         else:
             cell = build_cell(0)
 
+
+        #!!!!!! on our data, when doing side_split, encoder_state is fine [no NANs],
+        #but when doing walk_forward, some rows (instances) are all NANs (and the others all defined),
+        #then eventually every instance becomes NANs
+        N_nans = tf.reduce_sum(tf.cast(tf.is_nan(encoder_state), tf.float32))
+        tt = tf.cast(tf.is_nan(encoder_state), tf.float32)
+        ff = tf.reduce_sum(tt,axis=1)
+        ggg = tf.cast(tf.equal(ff, ff*0.+267.), tf.float32)
+        N_all_NAN_encoder_states = tf.reduce_sum(ggg)
+        total = tf.reduce_prod(tf.shape(encoder_state))
+        encoder_state = tf.Print(encoder_state,['encoder_state', tf.shape(encoder_state), encoder_state, 'N_nans', N_nans, 'total', total, 'N_all_NAN_encoder_states', N_all_NAN_encoder_states])
+        
+
+
         nest.assert_same_structure(encoder_state, cell.state_size)
         predict_days = self.inp.predict_window
         assert prediction_inputs.shape[1] == predict_days
@@ -470,11 +514,13 @@ def build_cell(idx):
 
         # Stop condition for decoding loop
         def cond_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray):
+            #!!!!! ???? Need to change when doing as weekly data???
             return time < predict_days
 
         # FC projecting layer to get single predicted value from RNN output
         def project_output(tensor):
-            return tf.layers.dense(tensor, 1, name='decoder_output_proj', kernel_initializer=self.default_init())
+            N_pctls=1 #!!!!!!!!!! quantiles
+            return tf.layers.dense(tensor, N_pctls, name='decoder_output_proj', kernel_initializer=self.default_init())
 
         def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray):
             """
@@ -496,13 +542,15 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_
                 # Append previous predicted value + attention vector to input features
                 next_input = tf.concat([prev_output, features, attn], axis=1)
             else:
-                # Append previous predicted value to input features
                 next_input = tf.concat([prev_output, features], axis=1)
+                # Append previous predicted value to input features
 
             # Run RNN cell
             output, state = cell(next_input, prev_state)
             # Make prediction from RNN outputs
-            projected_output = project_output(output)
+            projected_output = project_output(output) #!!!!!!!!!! quantiles
+            projected_output = tf.Print(projected_output, ['time',time,'projected_output',projected_output,tf.shape(projected_output),'output',output,tf.shape(output),'state',state,tf.shape(state) ,'prev_output',prev_output,tf.shape(prev_output) ,'features',features,tf.shape(features),features[1,:18]])
+            
             # Append step results to the buffer arrays
             if return_raw_outputs:
                 array_outputs = array_outputs.write(time, output)
@@ -515,13 +563,37 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_
                      tf.expand_dims(previous_y, -1),
                      encoder_state,
                      tf.TensorArray(dtype=tf.float32, size=predict_days),
-                     tf.TensorArray(dtype=tf.float32, size=predict_days) if return_raw_outputs else tf.constant(0)]
+                     tf.TensorArray(dtype=tf.float32, size=predict_days) if return_raw_outputs else tf.constant(0)] #!!!!!!! size= ... x N_pctls
         # Run the loop
-        _, _, _, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init)
-
+        _time, _projected_output, _state, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init)
+        
+        
+        print('decoder')
+#        print('_time',_time)
+#        _time = debug_tensor_print(_time)
+#        print('_projected_output',_projected_output)
+#        _projected_output = debug_tensor_print(_projected_output)     
+#        print('_state',_state)        
+#        _state = debug_tensor_print(_state)  
+
+        
+#        targets_ta_tensor = tf.convert_to_tensor(targets_ta)
+#        targets_ta_tensor = tf.Print(targets_ta_tensor,[targets_ta_tensor])
+#        print('targets_ta',targets_ta)
+#        print('outputs_ta',outputs_ta)
         # Get final tensors from buffer arrays
         targets = targets_ta.stack()
         # [time, batch_size, 1] -> [time, batch_size]
         targets = tf.squeeze(targets, axis=-1)
         raw_outputs = outputs_ta.stack() if return_raw_outputs else None
+
+#        print('targets',targets)
+        #!!!!!!!!!!! why targets becomes NANs ?????
+#        why targets NANs?
+        targets = debug_tensor_print(targets)  #63 x 245,   except for first 2 prints for each new iteration it is 63 x 64
+#        raw_outputs = debug_tensor_print(raw_outputs) #is 63 x 64 x 267
+        
+#        print_list = ['_time', _time.name, tf.shape(_time), _time]
+#        raw_outputs = tf.Print(raw_outputs, print_list)
+        
         return targets, raw_outputs
diff --git a/trainer.py b/trainer.py
index fd5d3d5..470bb50 100755
--- a/trainer.py
+++ b/trainer.py
@@ -459,7 +459,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed):
                                  rand_seed=seed, train_skip_first=hparams.train_skip_first,
                                  back_offset=predict_window if forward_split else 0)
                 inp_scope.reuse_variables()
-                TCT = .3 #0.01
+                TCT = 0.01
                 if side_split:
                     side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
                                                mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
@@ -740,7 +740,7 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False,
             else:
                 predictions += cp_predictions
     predictions /= len(checkpoints)
-    offset = pd.Timedelta(back_offset, 'D')
+    offset = pd.Timedelta(back_offset, 'D') #!!!!!!!!!!!! need to change these lines when sampling WEEKLY MONTHLY
     start_prediction = inp.data_end + pd.Timedelta('1D') - offset
     end_prediction = start_prediction + pd.Timedelta(predict_window - 1, 'D')
     predictions.columns = pd.date_range(start_prediction, end_prediction)

From e33979ec1d0a534b9c33d8c99416bdb1a01b224e Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 16 Jul 2018 10:51:08 -0700
Subject: [PATCH 17/42] doing median of last 4 weeks imputation for daily
 sampled data

---
 PREPROCESS.py | 113 ++++++++++++++++++++++++++++++++++++++++++++++----
 Readme.md     |  29 +++++++++----
 2 files changed, 128 insertions(+), 14 deletions(-)

diff --git a/PREPROCESS.py b/PREPROCESS.py
index 0f0c29e..0388596 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -206,12 +206,22 @@ def imputation__simple(df,imputation_method):
         cols = new_df.columns.tolist()
         new_df = new_df[cols[-1:]+cols[:-1]]
         new_df.reset_index(drop=True,inplace=True)        
-        return new_df   
+        return new_df
+    
+
+    
     
     
     if (imputation_method == 'median') or (imputation_method == 'mean'):
         df = imputation__simple(df,imputation_method)
-    
+        
+#    if imputation_method == 'lagKmedian':
+#        #First get rid of the big blocks of mising values [more than 1 seasonality long]
+##        df = imputation_big_gaps(df)
+#        #Then deal with the short missing holes
+#        N_seasons = 4
+#        df = imputation_lagKmedian(df,N_seasons)
+        
     else:
         raise Exception('not implemented other methods yet')
     
@@ -233,6 +243,79 @@ def imputation__simple(df,imputation_method):
 
 
+def imputation_lagKmedian_single_series(df,seasonality,N_seasons):
+    """
+    Fill in short missing gaps by replacing missing value with:
+        median over last K weeks for that day.
+        E.g. Monday is missing, so use median count over 4 previous Mondays
+        
+    Intended for short holes. Remove longer ones in chunks of length seasonality.
+    
+    For now assuming that big chunk removal is done AFTER this step.
+    """
+    #If the whole series is empty (all -1):    
+    if np.alltrue(df.drop(columns='Page').values==-1):
+        return df
+
+    max_block_length = seasonality - 1
+    offsets = np.arange(-max_block_length,1,1)
+    
+    cols = list(df.columns)
+    cols = cols[:-1]#only the date cols., not the "Page" col
+#    N_timesteps = len(cols)
+#    print(cols)
+#    print(N_timesteps)
+    c = df['Page'].values
+    _ = df.drop(columns=['Page'])
+#    print(_.values)
+    missing_inds = np.where(_<0.)[1]
+
+
+    if missing_inds.size > 0:
+        #Means there are some missing values
+        #So scan through the data and fill in bad values,
+        #starting after the first real data [ignore all -1's that occur before
+        #time series starts for real]
+        first_real_ind = np.where(_>=0.)[1][0]
+        missing_inds =  missing_inds[missing_inds>first_real_ind]
+#        print(missing_inds)
+        
+        for mi in missing_inds:
+            #Only fill in those gaps that are small holes (less than 1 seasonality)
+            #Check that this particular missing val is not in a missing block 
+            #that has >= 1 seasonality size:
+#            print(mi)
+            in_block = False
+           
+            for off in offsets:
+#                print(_.values)
+                block_inds = np.arange(mi+off,mi+off+seasonality,1)
+#                print(block_inds)
+#                print(block_inds, [i in missing_inds for i in block_inds])
+                if np.alltrue([i in missing_inds for i in block_inds]):
+                    in_block = True
+                    break
+#                x = _.values[0][mi+off : mi+off+seasonality]
+#                if np.alltrue(x==-1):
+            if in_block:
+                continue 
+            #If it is not in a completely missing block [at least 1 value is recorded], then do lag K median:
+            prev_K_inds = np.arange(mi-seasonality, max(0, mi - N_seasons*(seasonality+1)), -seasonality).tolist()
+            t = _[_.columns[prev_K_inds]].values
+            t = t[t>=0]
+            imputed_val = np.median(t)
+            #If all K previous timesteps were -1, then would give nan, so set manually to -1:
+            if np.isnan(imputed_val):# == np.nan:
+                imputed_val = -1
+            _[_.columns[mi]] = imputed_val
+            
+#        g = np.where(_<0.)[1]
+#        g = g[g>first_real_ind]
+#        print(g)
+#        print('\n'*3)
+    _['Page'] = c
+    return _
+
 
 
 def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, start_date=None, end_date=None):
@@ -352,9 +435,23 @@ def remove_downsample_columns(df, out_of_range_fill_value):
             dd = pd.DataFrame(dates).T 
             dd['Page'] = u
             
-            #Make a good eay cae to overfit
-            dd*= 0.
-            dd += u
+            print(i,u)
+            if imputation_method=='lagKmedian':
+                if sampling_period=='daily':
+                    N_seasons = 4
+                    seasonality = 7
+                elif sampling_period=='weekly':
+                    N_seasons = 4
+                    seasonality = 1
+                dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons)
+
+#            if i == 58:
+#                v=eeeeee
+            #Make a good easy case to overfit
+            DEBUG = False
+            if DEBUG:
+                dd*= 0.
+                dd += u
             
             
             #If doing imputation / other
@@ -369,6 +466,8 @@ def remove_downsample_columns(df, out_of_range_fill_value):
         df.reset_index(drop=True,inplace=True)
         
         
+        
+        
         #If we did aggregation, then above reogranization will have many of the columns Nan / -1,
         #since e.g. went from daily to weekly, then 6 days of the week will look empty. So remove them.
         if sampling_period=='weekly':
@@ -389,7 +488,7 @@ def remove_downsample_columns(df, out_of_range_fill_value):
         
         
         #Imputation, dealing with missing seasonality blocks / out of phase
-        if imputation_method:
+        if imputation_method=='median' or imputation_method=='mean':
             df = do_imputation(df,imputation_method)
             #Could do impoutation then downsampling, vs. downsampling then imputation ... unclear which is better here in general.
             #for now assume we do ipmutation THEN aggregation:
@@ -445,7 +544,7 @@ def make_key_csv(df):
     # =============================================================================
     # TOTAL COMPLETED TRIPS:
     myDataDir = r"/Users/kocher/Desktop/forecasting/exData/totalCompletedTripsDaily"
-    IMPUTATION_METHOD = None #'median' #'STL' #None
+    IMPUTATION_METHOD = 'lagKmedian' #'median' #'STL' #'lagKmedian' #None
     START_DATE = '2015-01-01' #None
     END_DATE = '2017-12-31' #None
     REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful
diff --git a/Readme.md b/Readme.md
index 719bae9..b7fef7d 100755
--- a/Readme.md
+++ b/Readme.md
@@ -43,12 +43,11 @@ See also [detailed model description](how_it_works.md)
 
 GK modifications for own data:
 1. PREPROCESS.py - Maximize reuse of existing architecture: just put my data in exact same format as Kaggle competition csv's
-2. $source activate gktf
+2. $source activate gktf.  #previously set up a conda environment w/ Python 3.6, tensorflow 1.4.0, to match same versions as Kaggle solution
 3. $cd ..../kaggle-web-traffic
 4. $python3 PREPROCESS.py
 5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period
-python3 make_features.py data/vars kaggle daily full --add_days=63
-python3 make_features.py data/vars ours daily full --add_days=63
+python3 make_features.py data/vars kaggle daily full --add_days=85
 
 #Just in case making new features
 cd data
@@ -60,12 +59,26 @@ rm *.pkl
 cd ..
 ll data/
 
+python3 make_features.py data/vars ours daily full --add_days=63
+python3 make_features.py data/vars kaggle daily full --add_days=63
+
+
+
+
+
 #no reason to expect 10000 to 11500 is good range to save out. View loss along the way
 python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
 python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50
---name TEST_attn_head --hparam_set=TEST_attn_head
+python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50
 --name TEST_stacked --hparam_set=TEST_stacked
 
+--no_eval
+--side_split
+
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --side_split
+
+
+
 
 7. $python3 PREDICT.py
 
@@ -76,8 +89,10 @@ python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_de
 
 
 To do:
-0. save log files to view SMAPE etc metrics during training
-1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks]
-2. for weekly. monthly inputs, will there be issue in Kaggle code???
+0. print out the SMAPE for the actual data [current is doing SMAPE of the unrounded log1p(data) which will likely be much smaller than for real]
+1. Visualizations of predictions on our own data
+1. why encoder_state NANs in it [is it train predict window completeness thresholds?]
+
+2. for weekly. monthly inputs, need to change few places in tensorflow code
 3. Prediction intervals
 4. Architecture improvements
\ No newline at end of file

From e555ee19e3acfb12c5dc5882c83b48c3dd61814d Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 16 Jul 2018 13:19:30 -0700
Subject: [PATCH 18/42] changed preprocessing to NANs instead of -1 which fixed
 the SMAPE 2 issue

---
 .gitignore    |  6 ++++--
 PREPROCESS.py | 45 +++++++++++++++++++++++++++++++--------------
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/.gitignore b/.gitignore
index fc3b504..8be5e29 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,5 +14,7 @@ data/submission.csv.gz
 data/*
 */.DS_STORE
 .DS_STORE
-images/*
-ex_figs/*
+images/
+ex_figs/
+
+*.png
diff --git a/PREPROCESS.py b/PREPROCESS.py
index 0388596..f61b34a 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -85,7 +85,7 @@ def get_earliest_latest_dates(df):
 
 
-def __missing_vals_distribution(df):
+def __missing_vals_distribution(df,out_of_range_fill_value):
     """
     Look at two things:
         - What fraction of our time series are desne vs. have >= 1 missing value?
@@ -96,7 +96,7 @@ def __missing_vals_distribution(df):
          start/end missing, nd intermedite gaps have been filled with -1
     """
 
-    def make_cdf(v):
+    def make_cdf(v,out_of_range_fill_value):
         c = Counter(v)
         x = list(c.keys())
         x = np.array(x) -1 #-1 to go from diff in days from present data -> gap length
@@ -113,7 +113,7 @@ def make_cdf(v):
 
     #get fraction dense vs sparse:
     dd = df.values[:,1:]
-    sparse = (dd==-1).sum(axis=1)
+    sparse = (dd==out_of_range_fill_value).sum(axis=1)
     Nsparse = float((sparse>0).sum())
     print(Nsparse)
     Ntotal = float(dd.shape[0])
@@ -127,12 +127,12 @@ def make_cdf(v):
     #not officially starting yet, or it got closed out.
     all_gaps = []
     for row in dd:
-        inds = np.where(row!=-1)[0]
+        inds = np.where(row!=out_of_range_fill_value)[0]
         x = np.diff(inds)
         t = list(x[x>1])
         if len(t)>0:
             all_gaps.extend(t)
-    make_cdf(all_gaps)
+    make_cdf(all_gaps,out_of_range_fill_value)
 
 
@@ -243,7 +243,7 @@ def imputation__simple(df,imputation_method):
 
 
-def imputation_lagKmedian_single_series(df,seasonality,N_seasons):
+def imputation_lagKmedian_single_series(df,seasonality,N_seasons,out_of_range_fill_value):
     """
     Fill in short missing gaps by replacing missing value with:
         median over last K weeks for that day.
@@ -253,8 +253,8 @@ def imputation_lagKmedian_single_series(df,seasonality,N_seasons):
     
     For now assuming that big chunk removal is done AFTER this step.
     """
-    #If the whole series is empty (all -1):    
-    if np.alltrue(df.drop(columns='Page').values==-1):
+    #If the whole series is empty (all -1/NAN):    
+    if np.alltrue(df.drop(columns='Page').values==out_of_range_fill_value):
         return df
 
     max_block_length = seasonality - 1
@@ -295,8 +295,6 @@ def imputation_lagKmedian_single_series(df,seasonality,N_seasons):
                 if np.alltrue([i in missing_inds for i in block_inds]):
                     in_block = True
                     break
-#                x = _.values[0][mi+off : mi+off+seasonality]
-#                if np.alltrue(x==-1):
             if in_block:
                 continue 
             #If it is not in a completely missing block [at least 1 value is recorded], then do lag K median:
@@ -306,7 +304,7 @@ def imputation_lagKmedian_single_series(df,seasonality,N_seasons):
             imputed_val = np.median(t)
             #If all K previous timesteps were -1, then would give nan, so set manually to -1:
             if np.isnan(imputed_val):# == np.nan:
-                imputed_val = -1
+                imputed_val = out_of_range_fill_value
             _[_.columns[mi]] = imputed_val
             
 #        g = np.where(_<0.)[1]
@@ -399,6 +397,17 @@ def remove_downsample_columns(df, out_of_range_fill_value):
             df.drop(columns=bad_cols,inplace=True)
             return df
             
+        def make_index_col_left(df):
+            """
+            Make sure order as expected by putting page col left
+            """
+            id_col_name = 'Page'
+            cols = df.columns.tolist()
+            cols.remove(id_col_name)
+            
+            df = df[ [id_col_name] + cols]
+            return df
+        
         
         #Rename columns to be as in Kaggle data:
         df.rename(columns={'id':'Page'},inplace=True)
@@ -413,7 +422,7 @@ def remove_downsample_columns(df, out_of_range_fill_value):
             latest = min(latest,end_date)
         
         idx = pd.date_range(earliest,latest) #!!!!!! fro now doing daily. When doing weekly also keep with default freq='D' . If change to 'W' alignment gets messed up. Just do daily 'D', then later can correct easily.
-        OUT_OF_RANGE_FILL_VALUE = -1. #np.NaN #0 #puttign as nan casts to float and cannot convert to int
+        OUT_OF_RANGE_FILL_VALUE = np.NaN #0 #-1 #puttign as nan casts to float and cannot convert to int
 
 
         #Do aggregation from DAILY --> WEEKLY before doing any kind of imputation
@@ -422,6 +431,12 @@ def remove_downsample_columns(df, out_of_range_fill_value):
             df = aggregate_to_weekly(df, AGGREGATION_TYPE)    
 
     
+        #Some id's [15,16] have their missing values recorded as "-1"
+        #vs. later id's have their missing values simply missing from the original csv
+        #So for those id's that actually have -1, convert to NAN first:
+        df.replace(-1.,np.nan,inplace=True)
+    
+    
         #Reorganize data for each id (->"Page")
         unique_ids = pd.unique(df['Page'])
         df_list = []
@@ -443,7 +458,7 @@ def remove_downsample_columns(df, out_of_range_fill_value):
                 elif sampling_period=='weekly':
                     N_seasons = 4
                     seasonality = 1
-                dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons)
+                dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons,OUT_OF_RANGE_FILL_VALUE)
 
 #            if i == 58:
 #                v=eeeeee
@@ -482,7 +497,7 @@ def remove_downsample_columns(df, out_of_range_fill_value):
         # =============================================================================
         #VERBOSE = False
         #if VERBOSE:
-        #    __missing_vals_distribution(df)     
+        #    __missing_vals_distribution(df,OUT_OF_RANGE_FILL_VALUE)     
             
 
@@ -495,6 +510,8 @@ def remove_downsample_columns(df, out_of_range_fill_value):
             #df = aggregate(df,sampling_period)
 
 
+        #Reorder some things just in case
+        df = make_index_col_left(df)
         print(df)
 
         #SHould end up with a csv that is rows are series (each id), cols are dates

From ea234ba48b6a8d7dc360b3c4642eb89c0e6a3711 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 16 Jul 2018 16:05:24 -0700
Subject: [PATCH 19/42] future predictions on our data working

---
 PREDICT.py | 148 ++++++++++++++++++++++++++++++++---------------------
 model.py   |   6 +--
 trainer.py |  12 ++++-
 3 files changed, 102 insertions(+), 64 deletions(-)

diff --git a/PREDICT.py b/PREDICT.py
index 28ca514..67ac93c 100755
--- a/PREDICT.py
+++ b/PREDICT.py
@@ -24,11 +24,20 @@
 
 
-
-FEATURES_SET = 'arturius'# 'arturius' 'simple' 'full'
+# =============================================================================
+# PARAMETRS
+# =============================================================================
+FEATURES_SET = 'full'# 'arturius' 'simple' 'full'
 SAMPLING_PERIOD = 'daily'
 DATA_TYPE = 'ours' #'kaggle' #'ours'
-Nmodels = 3.
+Nmodels = 3
+PARAM_SETTING = 's32' #Which of the parameter settings to use [s32 is the default Kaggle one, with a few thigns modified as I want]
+PARAM_SETTING_FULL_NAME = hparams.params_s32 #Which of the parameter settings to use corresponding to the PARAM_SETTING. The mapping is defined in hparams.py at the end in "sets = {'s32':params_s32,..."
+
+
+
+
+
 
 
 # =============================================================================
@@ -62,33 +71,36 @@ def mean_smape(true, pred):
 # 
 # =============================================================================
 prev = df_all#.loc[:,:'2017-07-08']
-paths = [p for p in tf.train.get_checkpoint_state('data/cpt/s32').all_model_checkpoint_paths]
+paths = [p for p in tf.train.get_checkpoint_state(f'data/cpt/{PARAM_SETTING}').all_model_checkpoint_paths]
 
 #tf.reset_default_graph()
 #preds = predict(paths, default_hparams(), back_offset=0,
 #                    n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True)
 t_preds = []
-for tm in range(3):  #!!!!!!!! Nmodels
+for tm in range(Nmodels):
     tf.reset_default_graph()
-    t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63,
-                    n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True))
+    t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(PARAM_SETTING_FULL_NAME), back_offset=0, predict_window=63,
+                    n_models=Nmodels, target_model=tm, seed=2, batch_size=2048, asgd=True))
 
 
 # =============================================================================
 # average the N models predictions
 # =============================================================================
-preds = sum(t_preds)/Nmodels
+preds = sum(t_preds)/float(Nmodels)
 
 
 # =============================================================================
 # look at missing
 # =============================================================================
 missing_pages = prev.index.difference(preds.index)
+print('missing_pages',missing_pages)
 # Use zeros for missing pages
 rmdf = pd.DataFrame(index=missing_pages,
-                    data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns)
-f_preds = preds.append(rmdf).sort_index()
-
+                data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns)
+if DATA_TYPE=='kaggle':
+    f_preds = preds.append(rmdf).sort_index()
+elif DATA_TYPE=='ours':
+    f_preds = preds
 # Use zero for negative predictions
 f_preds[f_preds < 0.5] = 0
 # Rouns predictions to nearest int
@@ -96,14 +108,16 @@ def mean_smape(true, pred):
 
 
+
+print(f_preds)
+
 # =============================================================================
 # save out all predictions all days (for our stuff will be relevant, for his Kaggle maybe just needed one day)
 # =============================================================================
-firstK = 1000 #for size issues, for now while dev, just a few to look at
-ggg = f_preds.iloc[:firstK]
-ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True)
-
-
+#firstK = 1000 #for size issues, for now while dev, just a few to look at
+#ggg = f_preds.iloc[:firstK]
+#ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True)
+f_preds.to_csv('data/all_predictions_ours.csv.gz', compression='gzip', index=False, header=True)
 
 
@@ -122,60 +136,76 @@ def mean_smape(true, pred):
 randomK = 1000
 print('Saving figs of {} time series as checks'.format(randomK))
 pagenames = list(f_preds.index)
-pages = np.random.choice(pagenames, size=randomK, replace=False)
+pages = np.random.choice(pagenames, size=min(randomK,len(pagenames)), replace=False)
+N = pages.size
 for jj, page in enumerate(pages):
+    print(f"{jj} of {N}")
     plt.figure()
-    #prev.loc[page].fillna(0).plot(logy=True)
-    f_preds.loc[page].fillna(0).plot(logy=True)
-    #gt.loc[page].fillna(0).plot(logy=True)
-    f_preds.loc[page].plot(logy=True)
+    if DATA_TYPE=='kaggle':
+        prev.loc[page].fillna(0).plot()#logy=True)
+        f_preds.loc[page].fillna(0).plot(logy=True)
+    elif DATA_TYPE=='ours':
+        prev.loc[int(page)].plot()
+        f_preds.loc[page].plot()
     plt.title(page)
+    if not os.path.exists('ex_figs'):
+        os.mkdir('ex_figs')
     pathname = os.path.join('ex_figs', 'fig_{}.png'.format(jj))
     plt.savefig(pathname)
+    plt.close()
     
     
+#Cannot view on the AWS so move to local:   
+#zip -r ex_figs.zip ex_figs
+#cp ex_figs.zip /home/...../sync
     
     
-# =============================================================================
-# load, maniupalte test data    
-# =============================================================================
-def read_keys():
-    import os.path
-    key_file = 'data/keys2.pkl'
-    if os.path.exists(key_file):
-        return pd.read_pickle(key_file)
-    else:
-        print('Reading keys...')
-        raw_keys = pd.read_csv('data/key_2.csv.zip')
-        print('Processing keys...')
-        pagedate = raw_keys.Page.str.rsplit('_', expand=True, n=1).rename(columns={0:'page',1:'date_str'})
-        keys = raw_keys.drop('Page', axis=1).assign(page=pagedate.page, date=pd.to_datetime(pagedate.date_str))
-        del raw_keys, pagedate
-        print('Pivoting keys...')
-        pkeys = keys.pivot(index='page', columns='date', values='Id')
-        print('Storing keys...')
-        pkeys.to_pickle(key_file)
-        return pkeys
-keys = read_keys()    
-
-# =============================================================================
-# 
-# =============================================================================
-subm_preds = f_preds.loc[:, '2017-09-13':]
-assert np.all(subm_preds.index == keys.index)
-assert np.all(subm_preds.columns == keys.columns)
-answers = pd.DataFrame({'Id':keys.values.flatten(), 'Visits':np.round(subm_preds).astype(np.int64).values.flatten()})
-answers.to_csv('data/submission.csv.gz', compression='gzip', index=False, header=True)
-
-
-
-print('f_preds')
-print(f_preds)
-
-print('missing')
-print(prev.loc[missing_pages, '2016-12-15':])
\ No newline at end of file
+    
+    
+    
+        
+#For the Kaggle data, can also output compeition submission format:    
+if DATA_TYPE=='kaggle':   
+    # =============================================================================
+    # load, maniupalte test data    
+    # =============================================================================
+    def read_keys():
+        import os.path
+        key_file = 'data/keys2.pkl'
+        if os.path.exists(key_file):
+            return pd.read_pickle(key_file)
+        else:
+            print('Reading keys...')
+            raw_keys = pd.read_csv('data/key_2.csv.zip')
+            print('Processing keys...')
+            pagedate = raw_keys.Page.str.rsplit('_', expand=True, n=1).rename(columns={0:'page',1:'date_str'})
+            keys = raw_keys.drop('Page', axis=1).assign(page=pagedate.page, date=pd.to_datetime(pagedate.date_str))
+            del raw_keys, pagedate
+            print('Pivoting keys...')
+            pkeys = keys.pivot(index='page', columns='date', values='Id')
+            print('Storing keys...')
+            pkeys.to_pickle(key_file)
+            return pkeys
+    keys = read_keys()    
+    
+    # =============================================================================
+    # 
+    # =============================================================================
+    subm_preds = f_preds.loc[:, '2017-09-13':]
+    assert np.all(subm_preds.index == keys.index)
+    assert np.all(subm_preds.columns == keys.columns)
+    answers = pd.DataFrame({'Id':keys.values.flatten(), 'Visits':np.round(subm_preds).astype(np.int64).values.flatten()})
+    answers.to_csv('data/submission.csv.gz', compression='gzip', index=False, header=True)
+    
+    
+    
+    print('f_preds')
+    print(f_preds)
+    
+    print('missing')
+    print(prev.loc[missing_pages, '2016-12-15':])
\ No newline at end of file
diff --git a/model.py b/model.py
index faf9fae..bb7b831 100755
--- a/model.py
+++ b/model.py
@@ -421,7 +421,7 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
 
         # Get final denormalized predictions
         vv = decode_predictions(decoder_targets, inp)
-        vv = tf.Print(vv, ['decode_predictions',vv,tf.shape(vv)])
+#        vv = tf.Print(vv, ['decode_predictions',vv,tf.shape(vv)])
         self.predictions = vv
 #        print('self.predictions (still log1p(counts))')
 #        print(self.predictions)
@@ -498,7 +498,7 @@ def build_cell(idx):
         ggg = tf.cast(tf.equal(ff, ff*0.+267.), tf.float32)
         N_all_NAN_encoder_states = tf.reduce_sum(ggg)
         total = tf.reduce_prod(tf.shape(encoder_state))
-        encoder_state = tf.Print(encoder_state,['encoder_state', tf.shape(encoder_state), encoder_state, 'N_nans', N_nans, 'total', total, 'N_all_NAN_encoder_states', N_all_NAN_encoder_states])
+#        encoder_state = tf.Print(encoder_state,['encoder_state', tf.shape(encoder_state), encoder_state, 'N_nans', N_nans, 'total', total, 'N_all_NAN_encoder_states', N_all_NAN_encoder_states])
         
 
@@ -549,7 +549,7 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_
             output, state = cell(next_input, prev_state)
             # Make prediction from RNN outputs
             projected_output = project_output(output) #!!!!!!!!!! quantiles
-            projected_output = tf.Print(projected_output, ['time',time,'projected_output',projected_output,tf.shape(projected_output),'output',output,tf.shape(output),'state',state,tf.shape(state) ,'prev_output',prev_output,tf.shape(prev_output) ,'features',features,tf.shape(features),features[1,:18]])
+#            projected_output = tf.Print(projected_output, ['time',time,'projected_output',projected_output,tf.shape(projected_output),'output',output,tf.shape(output),'state',state,tf.shape(state) ,'prev_output',prev_output,tf.shape(prev_output) ,'features',features,tf.shape(features),features[1,:18]])
             
             # Append step results to the buffer arrays
             if return_raw_outputs:
diff --git a/trainer.py b/trainer.py
index 470bb50..eea2c9f 100755
--- a/trainer.py
+++ b/trainer.py
@@ -718,8 +718,16 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False,
                         pred, x, pname = sess.run([model.predictions, model.inp.true_x, model.inp.page_ix])
                     else:
                         pred, pname = sess.run([model.predictions, model.inp.page_ix])
-                    utf_names = [str(name, 'utf-8') for name in pname]
+
+                    #Our data already has page names (id's) as ints, so this decoding won't work, so just do str(id)
+                    try:
+                        utf_names = [str(name, 'utf-8') for name in pname]
+                    except UnicodeDecodeError:
+                        utf_names = [str(name) for name in pname]
+                        
                     pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred))
+                    print(pred_df)
+                    
                     pred_buffer.append(pred_df)
                     if return_x:
                         # noinspection PyUnboundLocalVariable
@@ -739,7 +747,7 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False,
                 predictions = cp_predictions
             else:
                 predictions += cp_predictions
-    predictions /= len(checkpoints)
+    predictions /= len(checkpoints) #Since it is averaging predictions over the chckpoints
     offset = pd.Timedelta(back_offset, 'D') #!!!!!!!!!!!! need to change these lines when sampling WEEKLY MONTHLY
     start_prediction = inp.data_end + pd.Timedelta('1D') - offset
     end_prediction = start_prediction + pd.Timedelta(predict_window - 1, 'D')

From 3b53e6a4223e038f6f80b18ae085186a0d3ad078 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 19 Jul 2018 01:06:10 -0700
Subject: [PATCH 20/42] multirun train-val bash script

---
 PREDICT.py                    |  16 ++---
 PREPROCESS.py                 | 109 ++++++++++++++++++++++++++++------
 RUN_MANY_TRAIN_VAL_WINDOWS.sh |  52 ++++++++++++++++
 Readme.md                     |  20 +++++--
 hparams.py                    |   2 +-
 model.py                      |  60 +++++++++----------
 trainer.py                    |  15 ++---
 7 files changed, 206 insertions(+), 68 deletions(-)
 create mode 100644 RUN_MANY_TRAIN_VAL_WINDOWS.sh

diff --git a/PREDICT.py b/PREDICT.py
index 67ac93c..a3ea201 100755
--- a/PREDICT.py
+++ b/PREDICT.py
@@ -33,7 +33,7 @@
 Nmodels = 3
 PARAM_SETTING = 's32' #Which of the parameter settings to use [s32 is the default Kaggle one, with a few thigns modified as I want]
 PARAM_SETTING_FULL_NAME = hparams.params_s32 #Which of the parameter settings to use corresponding to the PARAM_SETTING. The mapping is defined in hparams.py at the end in "sets = {'s32':params_s32,..."
-
+OUTPUT_DIR = 'output'
 
 
@@ -117,7 +117,7 @@ def mean_smape(true, pred):
 #firstK = 1000 #for size issues, for now while dev, just a few to look at
 #ggg = f_preds.iloc[:firstK]
 #ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True)
-f_preds.to_csv('data/all_predictions_ours.csv.gz', compression='gzip', index=False, header=True)
+f_preds.to_csv(f'{OUTPUT_DIR}/all_predictions_ours.csv.gz', compression='gzip', index=False, header=True)
 
 
@@ -148,17 +148,17 @@ def mean_smape(true, pred):
         prev.loc[int(page)].plot()
         f_preds.loc[page].plot()
     plt.title(page)
-    if not os.path.exists('ex_figs'):
-        os.mkdir('ex_figs')
-    pathname = os.path.join('ex_figs', 'fig_{}.png'.format(jj))
+    if not os.path.exists(OUTPUT_DIR):
+        os.mkdir(OUTPUT_DIR)
+    pathname = os.path.join(OUTPUT_DIR, 'fig_{}.png'.format(jj))
     plt.savefig(pathname)
     plt.close()
     
     
 #Cannot view on the AWS so move to local:   
-#zip -r ex_figs.zip ex_figs
-#cp ex_figs.zip /home/...../sync
+#zip -r output.zip output
+#cp output.zip /home/...../sync
     
     
@@ -200,7 +200,7 @@ def read_keys():
     assert np.all(subm_preds.index == keys.index)
     assert np.all(subm_preds.columns == keys.columns)
     answers = pd.DataFrame({'Id':keys.values.flatten(), 'Visits':np.round(subm_preds).astype(np.int64).values.flatten()})
-    answers.to_csv('data/submission.csv.gz', compression='gzip', index=False, header=True)
+    answers.to_csv(f'{OUTPUT_DIR}/submission.csv.gz', compression='gzip', index=False, header=True)
     
     
diff --git a/PREPROCESS.py b/PREPROCESS.py
index f61b34a..00c4695 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -23,6 +23,7 @@
 from collections import Counter
 
 from copy import deepcopy
+from scipy.signal import medfilt
 
 
 def load_my_data(myDataDir):
@@ -205,7 +206,7 @@ def imputation__simple(df,imputation_method):
         #Put "Page" at left
         cols = new_df.columns.tolist()
         new_df = new_df[cols[-1:]+cols[:-1]]
-        new_df.reset_index(drop=True,inplace=True)        
+        new_df.reset_index(drop=True,inplace=True)
         return new_df
     
 
@@ -316,6 +317,89 @@ def imputation_lagKmedian_single_series(df,seasonality,N_seasons,out_of_range_fi
 
 
+
+def data_augmentation(df, jitter_pcts_list=[.05,.10,.15], do_low_pass_filter=True, additive_trend=False):
+    """
+    Do some basic data augmentation with a few different options.
+    Then output Cartesian product of all these variations as the final set.
+    
+    Any missing point (NAN) will be left as NAN, but others will be modified in some way
+    """
+    
+    def jitter__uniform_pcts(df, jitter_pcts_list, N_perturbations):
+        """
+        On each observed value (non-NAN), add +/- jitter up to some
+        percent of the observed value. Either positive or negative.
+        If the count is small, then just leave it, otherwise perturb
+        (always leaving counts positive).
+        """
+        page = df['Page'].values[0]
+        cols = df.columns
+        x = df.drop(columns=['Page']).values[0]
+        dflist = []
+        for uniform_jitter in jitter_pcts_list:
+            ids = [str(page) + '__unijit{}_'.format(str(uniform_jitter)) + str(kk+1) for kk in range(N_perturbations)]
+            _ = np.zeros((N_perturbations,x.size))
+            f = lambda i: np.random.uniform(-uniform_jitter*i,uniform_jitter*i) if not np.isnan(i) else np.nan
+            for kk in range(N_perturbations):
+                _[kk] = [i + f(i) for i in x]# ).reshape(1,x.size)
+            d = {cols[i]:_[:,i] for i in range(x.size)}
+            df = pd.DataFrame(data=d)
+            df['Page'] = ids
+            dflist += [df]
+        df = pd.concat(dflist,axis=0)
+        df.reset_index(drop=True,inplace=True)
+        return df
+
+
+    def add_trend(df, slopes_list):
+        """
+        On each observed value (non-NAN), add +/- X_t, where X_t is from a 
+        linear trend with given slope, applied across whole series.
+        
+        Could change the character of the time series a lot so maybe not so good?
+        """
+        return df
+
+    def low_pass_filter(df, filter_type, kernel_size):
+        """
+        Low-pass filter the data with some kind of kernel, with some kernel size.
+        
+        Is going to smooth out the data a lot, not sure if this will change the
+        time series too much to be good??
+        """
+        page = df['Page'].values[0]
+        cols = df.columns
+        x = df.drop(columns=['Page']).values[0]
+        ids = [str(page) + '__{0}{1}'.format(filter_type.func_name,kernel_size)]
+        y = filter_type(x,kernel_size=kernel_size)
+        _ = np.where(np.invert(np.isnan(x)),y,np.nan)
+        d = {cols[i]:_[i] for i in range(x.size)}
+        df = pd.DataFrame(data=d,index=[0])
+        df['Page'] = ids
+        return df
+
+
+    #For each method, do 5x random
+    N_perturbations = 5
+    dflist = [df]
+    if jitter_pcts_list:
+        dflist += [jitter__uniform_pcts(df, jitter_pcts_list, N_perturbations)]
+    if do_low_pass_filter:
+        filter_type = medfilt
+        kernel_size = 7
+        dflist += [low_pass_filter(df, filter_type, kernel_size)]
+    if additive_trend:
+        slopes_list = [-1.1, 1.1]
+        dflist += [add_trend(df, slopes_list)]        
+#    if autoencoder:
+#        #Run through autoencoder, do VAE, get resulting series
+    
+    df = pd.concat(dflist,axis=0)    
+    return df
+        
+
+
 def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, start_date=None, end_date=None):
     """
     Take my data and format it exactly as needed to use for the Kaggle seq2seq
@@ -450,7 +534,7 @@ def make_index_col_left(df):
             dd = pd.DataFrame(dates).T 
             dd['Page'] = u
             
-            print(i,u)
+            print(i,u, 'of {}'.format(unique_ids[-1]))
             if imputation_method=='lagKmedian':
                 if sampling_period=='daily':
                     N_seasons = 4
@@ -460,18 +544,8 @@ def make_index_col_left(df):
                     seasonality = 1
                 dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons,OUT_OF_RANGE_FILL_VALUE)
 
-#            if i == 58:
-#                v=eeeeee
-            #Make a good easy case to overfit
-            DEBUG = False
-            if DEBUG:
-                dd*= 0.
-                dd += u
-            
-            
-            #If doing imputation / other
-            #for each series individually
-            #...
+            #Data augmentation
+            dd = data_augmentation(dd)
             
             df_list.append(dd)
         
@@ -481,8 +555,6 @@ def make_index_col_left(df):
         df.reset_index(drop=True,inplace=True)
         
         
-        
-        
         #If we did aggregation, then above reogranization will have many of the columns Nan / -1,
         #since e.g. went from daily to weekly, then 6 days of the week will look empty. So remove them.
         if sampling_period=='weekly':
@@ -566,7 +638,7 @@ def make_key_csv(df):
     END_DATE = '2017-12-31' #None
     REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful
     SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly'
-
+    RANDOM_SEED = None
 
     # =============================================================================
     #     MAIN
@@ -578,6 +650,9 @@ def make_key_csv(df):
     print('myDataDir',myDataDir)
     print('SAMPLING_PERIOD',SAMPLING_PERIOD)
     
+    #Seed random number generator in case of doing data augmentation:
+    np.random.seed(RANDOM_SEED)
+    
     #Load
     df = load_my_data(myDataDir)
     
diff --git a/RUN_MANY_TRAIN_VAL_WINDOWS.sh b/RUN_MANY_TRAIN_VAL_WINDOWS.sh
new file mode 100644
index 0000000..6a21d8d
--- /dev/null
+++ b/RUN_MANY_TRAIN_VAL_WINDOWS.sh
@@ -0,0 +1,52 @@
+#chmod 777 RUN_MANY_TRAIN_VAL_WINDOWS.sh
+#./RUN_MANY_TRAIN_VAL_WINDOWS.sh
+#Run over many train_window - predict_window length pairs
+#Compile results, analyze performance as (2D) heatmap
+
+
+#TRAIN_WINDOWS="1 2 5 10 20 50 100 150 200 250 300"
+#VALIDATION_WINDOWS="1 2 5 10 20 50 100"
+#e.g. TRAIN_WINDOWS has NAN SMAPE -> 2 problem with as big as size 50
+
+TRAIN_WINDOWS="100 150"
+VALIDATION_WINDOWS="33 66"
+#just to test...
+MAX_EPOCH=2
+
+
+#One time clean up
+cd data
+rm -R vars/
+rm -R cpt/
+rm -R cpt_tmp/
+rm -R logs/
+rm *.pkl
+cd ..
+#ls -l data/
+
+
+for v in $VALIDATION_WINDOWS; do
+    #Clea up between feature sets
+    cd data
+    rm -R vars/
+    rm -R cpt_tmp/
+    rm *.pkl
+    cd ..
+    #Create the features for our data
+    echo 'running make_features.py with --add_days='$v
+    python3 make_features.py data/vars ours daily full --add_days=$v
+    for t in $TRAIN_WINDOWS; do
+        echo 'validation window = '$v 'validation window = '$t
+        echo 'running trainer.py'
+        NAME="val$v-train$t"
+        echo 'NAAME='$NAME
+        python3 trainer.py full daily --name $NAME --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=$MAX_EPOCH --patience=5 --verbose --predict_window=$v --train_window=$t
+    done
+done
+
+
+
+#Now that all training is done, can run predictions
+#python3 PREDICT.py !!!!!make window sizes as params
+
+#now make heatmaps of performance:
\ No newline at end of file
diff --git a/Readme.md b/Readme.md
index b7fef7d..240a090 100755
--- a/Readme.md
+++ b/Readme.md
@@ -47,7 +47,7 @@ GK modifications for own data:
 3. $cd ..../kaggle-web-traffic
 4. $python3 PREPROCESS.py
 5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period
-python3 make_features.py data/vars kaggle daily full --add_days=85
+python3 make_features.py data/vars kaggle daily full --add_days=63
 
 #Just in case making new features
 cd data
@@ -74,8 +74,14 @@ python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head
 
 --no_eval
 --side_split
+--max_epoch=1000
+--save_from_step=1
+--verbose
+
+
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50 --train_window=100
 
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --side_split
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=1000 --patience=50 --verbose --side_split
 
 
@@ -90,9 +96,13 @@ python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_de
 
 To do:
 0. print out the SMAPE for the actual data [current is doing SMAPE of the unrounded log1p(data) which will likely be much smaller than for real]
-1. Visualizations of predictions on our own data
-1. why encoder_state NANs in it [is it train predict window completeness thresholds?]
+0. SMAPEs on ground truth 2018
+1. why encoder_state NANs in it for small train window lengths [is it train/predict window completeness thresholds?]
+1. performance heatmaps
 
 2. for weekly. monthly inputs, need to change few places in tensorflow code
 3. Prediction intervals
-4. Architecture improvements
\ No newline at end of file
+4. Architecture improvements: his is not the usual encoder-decoder:   add C context vector to every decoder step
+4. bi, di, MH
+5. custom attention
+6. VAE aug
\ No newline at end of file
diff --git a/hparams.py b/hparams.py
index 5eaeeef..0bfd3e6 100755
--- a/hparams.py
+++ b/hparams.py
@@ -5,7 +5,7 @@
 params_s32 = dict(
     batch_size=123,#256,
     #train_window=380,
-    train_window=283,
+#    train_window=283,#now make this a bash input to do train-validation window size performance heatmaps
     #train_window=30,#try 65 w our data to see if allows more samples through filter
     train_skip_first=0,
     rnn_depth=267,
diff --git a/model.py b/model.py
index bb7b831..c2b228d 100755
--- a/model.py
+++ b/model.py
@@ -414,15 +414,16 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         print('inp.time_y',inp.time_y)
         decoder_targets, decoder_outputs = self.decoder(encoder_state,
                                                         attn_features if hparams.use_attn else None,
-                                                        inp.time_y, inp.norm_x[:, -1])
+                                                        inp.time_y, inp.norm_x[:, -1]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
         # Decoder activation losses
         dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.predict_window)
         dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.predict_window)
 
         # Get final denormalized predictions
-        vv = decode_predictions(decoder_targets, inp)
+        self.predictions = decode_predictions(decoder_targets, inp)
+#        vv = decode_predictions(decoder_targets, inp)
 #        vv = tf.Print(vv, ['decode_predictions',vv,tf.shape(vv)])
-        self.predictions = vv
+#        self.predictions = vv
 #        print('self.predictions (still log1p(counts))')
 #        print(self.predictions)
         
@@ -492,19 +493,19 @@ def build_cell(idx):
         #!!!!!! on our data, when doing side_split, encoder_state is fine [no NANs],
         #but when doing walk_forward, some rows (instances) are all NANs (and the others all defined),
         #then eventually every instance becomes NANs
-        N_nans = tf.reduce_sum(tf.cast(tf.is_nan(encoder_state), tf.float32))
-        tt = tf.cast(tf.is_nan(encoder_state), tf.float32)
-        ff = tf.reduce_sum(tt,axis=1)
-        ggg = tf.cast(tf.equal(ff, ff*0.+267.), tf.float32)
-        N_all_NAN_encoder_states = tf.reduce_sum(ggg)
-        total = tf.reduce_prod(tf.shape(encoder_state))
+#        N_nans = tf.reduce_sum(tf.cast(tf.is_nan(encoder_state), tf.float32))
+#        tt = tf.cast(tf.is_nan(encoder_state), tf.float32)
+#        ff = tf.reduce_sum(tt,axis=1)
+#        ggg = tf.cast(tf.equal(ff, ff*0.+267.), tf.float32)
+#        N_all_NAN_encoder_states = tf.reduce_sum(ggg)
+#        total = tf.reduce_prod(tf.shape(encoder_state))
 #        encoder_state = tf.Print(encoder_state,['encoder_state', tf.shape(encoder_state), encoder_state, 'N_nans', N_nans, 'total', total, 'N_all_NAN_encoder_states', N_all_NAN_encoder_states])
         
 
         nest.assert_same_structure(encoder_state, cell.state_size)
-        predict_days = self.inp.predict_window
-        assert prediction_inputs.shape[1] == predict_days
+        predict_timesteps = self.inp.predict_window
+        assert prediction_inputs.shape[1] == predict_timesteps
 
         # [batch_size, time, input_depth] -> [time, batch_size, input_depth]
         inputs_by_time = tf.transpose(prediction_inputs, [1, 0, 2])
@@ -513,19 +514,18 @@ def build_cell(idx):
         return_raw_outputs = self.hparams.decoder_stability_loss > 0.0 or self.hparams.decoder_activation_loss > 0.0
 
         # Stop condition for decoding loop
-        def cond_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray):
-            #!!!!! ???? Need to change when doing as weekly data???
-            return time < predict_days
+        def cond_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray):
+            return timestep < predict_timesteps
 
         # FC projecting layer to get single predicted value from RNN output
         def project_output(tensor):
             N_pctls=1 #!!!!!!!!!! quantiles
             return tf.layers.dense(tensor, N_pctls, name='decoder_output_proj', kernel_initializer=self.default_init())
 
-        def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray):
+        def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray):
             """
             Main decoder loop
-            :param time: Day number
+            :param timestep: timestep number
             :param prev_output: Output(prediction) from previous step
             :param prev_state: RNN state tensor from previous step
             :param array_targets: Predictions, each step will append new value to this array
@@ -533,12 +533,12 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_
             :return:
             """
             # RNN inputs for current step
-            features = inputs_by_time[time]
+            features = inputs_by_time[timestep]
 
             # [batch, predict_window, readout_depth * n_heads] -> [batch, readout_depth * n_heads]
             if attn_features is not None:
                 #  [batch_size, 1] + [batch_size, input_depth]
-                attn = attn_features[:, time, :]
+                attn = attn_features[:, timestep, :]
                 # Append previous predicted value + attention vector to input features
                 next_input = tf.concat([prev_output, features, attn], axis=1)
             else:
@@ -549,28 +549,28 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_
             output, state = cell(next_input, prev_state)
             # Make prediction from RNN outputs
             projected_output = project_output(output) #!!!!!!!!!! quantiles
-#            projected_output = tf.Print(projected_output, ['time',time,'projected_output',projected_output,tf.shape(projected_output),'output',output,tf.shape(output),'state',state,tf.shape(state) ,'prev_output',prev_output,tf.shape(prev_output) ,'features',features,tf.shape(features),features[1,:18]])
+#            projected_output = tf.Print(projected_output, ['timestep',timestep,'projected_output',projected_output,tf.shape(projected_output),'output',output,tf.shape(output),'state',state,tf.shape(state) ,'prev_output',prev_output,tf.shape(prev_output) ,'features',features,tf.shape(features),features[1,:18]])
             
             # Append step results to the buffer arrays
             if return_raw_outputs:
-                array_outputs = array_outputs.write(time, output)
-            array_targets = array_targets.write(time, projected_output)
-            # Increment time and return
-            return time + 1, projected_output, state, array_targets, array_outputs
+                array_outputs = array_outputs.write(timestep, output)
+            array_targets = array_targets.write(timestep, projected_output)
+            # Increment timestep and return
+            return timestep + 1, projected_output, state, array_targets, array_outputs #!!!!!! quantiles: projected_output will be diff dims
 
         # Initial values for loop
         loop_init = [tf.constant(0, dtype=tf.int32),
                      tf.expand_dims(previous_y, -1),
                      encoder_state,
-                     tf.TensorArray(dtype=tf.float32, size=predict_days),
-                     tf.TensorArray(dtype=tf.float32, size=predict_days) if return_raw_outputs else tf.constant(0)] #!!!!!!! size= ... x N_pctls
+                     tf.TensorArray(dtype=tf.float32, size=predict_timesteps),
+                     tf.TensorArray(dtype=tf.float32, size=predict_timesteps) if return_raw_outputs else tf.constant(0)] #!!!!!!! size= ... x N_pctls
         # Run the loop
-        _time, _projected_output, _state, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init)
+        _timestep, _projected_output, _state, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init)
         
         
         print('decoder')
-#        print('_time',_time)
-#        _time = debug_tensor_print(_time)
+#        print('_timestep',_timestep)
+#        _timestep = debug_tensor_print(_timestep)
 #        print('_projected_output',_projected_output)
 #        _projected_output = debug_tensor_print(_projected_output)     
 #        print('_state',_state)        
@@ -590,10 +590,10 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_
 #        print('targets',targets)
         #!!!!!!!!!!! why targets becomes NANs ?????
 #        why targets NANs?
-        targets = debug_tensor_print(targets)  #63 x 245,   except for first 2 prints for each new iteration it is 63 x 64
+#        targets = debug_tensor_print(targets)  #63 x 245,   except for first 2 prints for each new iteration it is 63 x 64
 #        raw_outputs = debug_tensor_print(raw_outputs) #is 63 x 64 x 267
         
-#        print_list = ['_time', _time.name, tf.shape(_time), _time]
+#        print_list = ['_timestep', _timestep.name, tf.shape(_timestep), _timestep]
 #        raw_outputs = tf.Print(raw_outputs, print_list)
         
         return targets, raw_outputs
diff --git a/trainer.py b/trainer.py
index eea2c9f..d43c846 100755
--- a/trainer.py
+++ b/trainer.py
@@ -400,14 +400,14 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
           seed=None, logdir='data/logs', max_epoch=100, patience=2, train_sampling=1.0,
           eval_sampling=1.0, eval_memsize=5, gpu=0, gpu_allow_growth=False, save_best_model=False,
           forward_split=False, write_summaries=False, verbose=False, asgd_decay=None, tqdm=True,
-          side_split=True, max_steps=None, save_from_step=None, do_eval=True, predict_window=63):
+          side_split=True, max_steps=None, save_from_step=None, do_eval=True, predict_window=63, train_window=283):
 
     eval_k = int(round(26214 * eval_memsize / n_models))
     eval_batch_size = int(
         eval_k / (hparams.rnn_depth * hparams.encoder_rnn_layers))  # 128 -> 1024, 256->512, 512->256
     eval_pct = 0.1
     batch_size = hparams.batch_size
-    train_window = hparams.train_window
+#    train_window = hparams.train_window
     tf.reset_default_graph()
     if seed:
         tf.set_random_seed(seed)
@@ -673,15 +673,14 @@ def ema_vars(model):
 
 
 def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1,
-            target_model=0, asgd=False, seed=1, batch_size=1024):
+            target_model=0, asgd=False, seed=1, batch_size=1024, train_window=283):
     with tf.variable_scope('input') as inp_scope:
         with tf.device("/cpu:0"):
             inp = VarFeeder.read_vars("data/vars")
-            pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features
-                             n_epoch=1, verbose=verbose,
+            pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size,
                              train_completeness_threshold=0.01,
                              predict_window=predict_window,
-                             predict_completeness_threshold=0.0, train_window=hparams.train_window,
+                             predict_completeness_threshold=0.0, train_window=train_window,#hparams.train_window,
                              back_offset=back_offset)
     asgd_decay = 0.99 if asgd else None
     if n_models == 1:
@@ -754,7 +753,8 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False,
     predictions.columns = pd.date_range(start_prediction, end_prediction)
     if return_x:
         x = pd.concat(x_buffer)
-        start_data = inp.data_end - pd.Timedelta(hparams.train_window - 1, 'D') - back_offset
+        #start_data = inp.data_end - pd.Timedelta(hparams.train_window - 1, 'D') - back_offset
+        start_data = inp.data_end - pd.Timedelta(train_window - 1, 'D') - back_offset #!!!!!now for heatmaps
         end_data = inp.data_end - back_offset
         x.columns = pd.date_range(start_data, end_data)
         return predictions, x
@@ -790,6 +790,7 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False,
     parser.add_argument('--max_steps', type=int, help="Stop training after max steps")
     parser.add_argument('--save_from_step', type=int, help="Save model on each evaluation (10 evals per epoch), starting from this step")
     parser.add_argument('--predict_window', default=63, type=int, help="Number of days to predict")
+    parser.add_argument('--train_window', default=283, type=int, help="Train window chunk size")#Now that we want to do train size - val size performance heatmaps
     args = parser.parse_args()
 
     param_dict = dict(vars(args))

From 96132942d4d9ee686d8b6aee2a7e4b44d64c4c74 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 19 Jul 2018 01:50:12 -0700
Subject: [PATCH 21/42] added year-2010 / (2020-2010) as feature

---
 input_pipe.py    | 49 +++++++++++++++++++++++++++---------------------
 make_features.py | 34 +++++++++++++++++----------------
 2 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/input_pipe.py b/input_pipe.py
index 4e8b3c2..69d1d52 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -131,7 +131,7 @@ def cut(self, counts, start, end):
         :param counts: counts timeseries
         :param start: start index
         :param end: end index
-        :return: tuple (train_counts, test_counts, lagged_counts, [dow,woy,moy])
+        :return: tuple (train_counts, test_counts, lagged_counts, [dow,woy,moy,year])
         """
         # Pad counts to ensure we have enough array length for prediction
         counts = tf.concat([counts, tf.fill([self.predict_window], np.NaN)], axis=0)
@@ -155,6 +155,8 @@ def cut(self, counts, start, end):
 #            cropped_dow = 0*cropped_moy
 #            cropped_woy = 0*cropped_moy            
             
+        #ANd use year as a feature to get long term trend
+        cropped_year = self.inp.year[start:end]
 
         
         # =============================================================================
@@ -192,11 +194,11 @@ def cut(self, counts, start, end):
 
         if self.features_set=='arturius' or self.features_set=='full':#for now, for full just do sam [include lagged]
             if self.sampling_period=='daily':
-                return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy
+                return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy, cropped_year
             if self.sampling_period=='weekly':
-                return x_counts, y_counts, lagged_count, cropped_woy
+                return x_counts, y_counts, lagged_count, cropped_woy, cropped_year
             if self.sampling_period=='monthly':
-                return x_counts, y_counts, lagged_count, cropped_moy
+                return x_counts, y_counts, lagged_count, cropped_moy, cropped_year
             
 #        elif self.features_set=='full':
 #            return aaaaaaaaaaa #can drop lagged 
@@ -273,22 +275,22 @@ def make_features(self, *args):
         print(args)
         if self.features_set == 'arturius':
             if self.sampling_period == 'daily':
-                x_counts, y_counts, lagged_counts, dow, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args
+                x_counts, y_counts, lagged_counts, dow, woy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args
             elif self.sampling_period == 'weekly':
-                x_counts, y_counts, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args        
+                x_counts, y_counts, lagged_counts, woy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args        
             elif self.sampling_period == 'monthly':
-                x_counts, y_counts, lagged_counts, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args          
+                x_counts, y_counts, lagged_counts, moy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args          
         #For now just use the same ...
 #        count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance)
         elif self.features_set == 'full':
             if self.sampling_period == 'daily':
-                x_counts, y_counts, lagged_counts, dow, woy, page_ix, count_median, year_autocorr, quarter_autocorr,\
+                x_counts, y_counts, lagged_counts, dow, woy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\
                 count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args
             elif self.sampling_period == 'weekly':
-                x_counts, y_counts, lagged_counts, woy, page_ix, count_median, year_autocorr, quarter_autocorr,\
+                x_counts, y_counts, lagged_counts, woy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\
                 count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args
             elif self.sampling_period == 'monthly':
-                x_counts, y_counts, lagged_counts, moy, page_ix, count_median, year_autocorr, quarter_autocorr,\
+                x_counts, y_counts, lagged_counts, moy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\
                 count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args
         
         # =============================================================================
@@ -302,6 +304,11 @@ def make_features(self, *args):
         elif self.sampling_period == 'monthly':
             x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0)
 
+        #Already did a manual kind of scaling for year in make_features.py so don't need to normalize here...
+        x_year, y_year = tf.split(year, [self.train_window, self.predict_window], axis=0)
+        x_year = tf.expand_dims(x_year,axis=1)
+        y_year = tf.expand_dims(y_year,axis=1)
+
         # Normalize counts
         mean = tf.reduce_mean(x_counts)
         std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean)))
@@ -347,11 +354,11 @@ def make_features(self, *args):
         # Train features, depending on measurement frequency
         x_features = tf.expand_dims(norm_x_counts, -1) # [n_timesteps] -> [n_timesteps, 1]
         if self.sampling_period == 'daily':
-            x_features = tf.concat([x_features, x_dow, x_woy], axis=1)
+            x_features = tf.concat([x_features, x_dow, x_woy, x_year], axis=1)
         elif self.sampling_period == 'weekly':
-            x_features = tf.concat([x_features, x_woy], axis=1)            
+            x_features = tf.concat([x_features, x_woy, x_year], axis=1)            
         elif self.sampling_period == 'monthly':
-            x_features = tf.concat([x_features, x_moy], axis=1)             
+            x_features = tf.concat([x_features, x_moy, x_year], axis=1)             
         #Regardess of period/frequency will have below features:
         x_features = tf.concat([x_features, x_lagged,
                                 # Stretch series_features to all training days
@@ -360,11 +367,11 @@ def make_features(self, *args):
 
         # Test features
         if self.sampling_period == 'daily':
-            y_features = tf.concat([y_dow, y_woy], axis=1)
+            y_features = tf.concat([y_dow, y_woy, y_year], axis=1)
         elif self.sampling_period == 'weekly':
-            y_features = y_woy + 0
+            y_features = tf.concat([y_woy, y_year], axis=1)
         elif self.sampling_period == 'monthly':
-            y_features = y_moy + 0
+            y_features = tf.concat([y_moy, y_year], axis=1)
         #Regardess of period/frequency will have below features:
         y_features = tf.concat([y_features, y_lagged,
                                 # Stretch series_features to all testing days
@@ -500,7 +507,7 @@ def page_features(inp: VarFeeder, features_set):
     So do not need to pass in here the time-varying ones like day of week, 
     month of year, lagged, etc.
     
-    DO NOT return dow, woy, moy
+    DO NOT return dow, woy, moy, year
     """
     
     if features_set=='arturius':
@@ -509,8 +516,8 @@ def page_features(inp: VarFeeder, features_set):
                 inp.count_pctl_100
                 )
         
-    elif features_set=='simple':
-        raise Exception('not ready yet')
+#    elif features_set=='simple':
+#        raise Exception('not ready yet')
         
     elif features_set=='full':
         d = (inp.counts,
@@ -525,7 +532,7 @@ def page_features(inp: VarFeeder, features_set):
             inp.count_pctl_100,
             inp.count_variance)          
         
-    elif features_set=='full_w_context':
-        raise Exception('not ready yet')
+#    elif features_set=='full_w_context':
+#        raise Exception('not ready yet')
     
     return d
\ No newline at end of file
diff --git a/make_features.py b/make_features.py
index 5eb8642..d1061db 100755
--- a/make_features.py
+++ b/make_features.py
@@ -346,6 +346,11 @@ def run():
     WEEK_NUMBER_METHOD = 'floor7'#'pandas' #'floor7'
     WEEK_NUMBER_MAX = 53. #52.
     
+    REFERENCE_FIRST_YEAR = 2010 #Use the year number as a feature, calculated as (year-REF_1)/(REF_2 - REF_1) to put on smaller scale 
+    #(must be careful about normalizing on the fly within window, where depending on window size, most observations will have same year number, and 0 variance)
+    #so do this manual scaling instead of standard mean-var scaling
+    REFERENCE_LAST_YEAR = 2020
+    
     
     features_times = pd.date_range(data_start, features_end, freq='D')
     
@@ -364,9 +369,6 @@ def run():
         woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday
         woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)
     
-        #To catch longer term trending data, can also include year number. [depending on size of train / prediction windows and random sampling boundaries could be same value over whole series]
-        year_number = features_times.year
-    
     
     if args.sampling_period=='weekly':
         #index of week number, when sampling at WEEKLY level (this is different than above)
@@ -379,7 +381,6 @@ def run():
         year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
         woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday
         woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)
-        year_number = features_times.year
     
     if args.sampling_period=='monthly':
         #month index (only used if sampling monthly)
@@ -387,8 +388,9 @@ def run():
         period = 12. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
         moy_norm = features_times.month.values / period #not sure if by default this starts on Monday vs Sunday
         moy = np.stack([np.cos(moy_norm), np.sin(moy_norm)], axis=-1)    
-        year_number = features_times.year
 
+    #To catch longer term trending data, can also include year number. [depending on size of train / prediction windows and random sampling boundaries could be same value over whole series]
+    year = (features_times.year - REFERENCE_FIRST_YEAR)/float(REFERENCE_LAST_YEAR-REFERENCE_FIRST_YEAR)
     
     
     # Assemble indices for quarterly lagged data
@@ -420,12 +422,12 @@ def run():
             count_pctl_100=percentiles[5],#max #!!!!!!!!!!!!!!!! just to see what happens: apend one of my features.
         )
     
-    elif args.features_set == 'simple':
-        tensors = dict(
-            counts=df,
-            count_median=count_median,#this is just the median feature, can put in others too
-            #dow=dow,
-        )    
+#    elif args.features_set == 'simple':
+#        tensors = dict(
+#            counts=df,
+#            count_median=count_median,#this is just the median feature, can put in others too
+#            #dow=dow,
+#        )    
         
     elif (args.features_set == 'full') or (args.features_set == 'full_w_context'):
         tensors = dict(
@@ -450,16 +452,13 @@ def run():
 #            series_length=series_length,#length of series [number of samples] to get idea of how much history a series has #number nonzero
             
             #Other time-frequency/scale features
+            #tsfresh features
             #...
-            
-
         )  
         
     else:
         raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full","full_w_context"]')
 
-
-
         
     if args.sampling_period=='daily':
         tensors['dow']=dow
@@ -470,8 +469,11 @@ def run():
         tensors['moy']=moy
     else:
         raise Exception('Must specify correct sampling period')
+    
+    #Also use year number as feature
+    tensors['year']=year
             
-            
+    
     """#If provide other info based on e.g. new location (any features that are not derived purely from the time series)
     if args.features_set == 'full_w_context':
         tensors['country'] = asdasdasd

From a6ededdb106a0fa3df9cd25faa93dc645d4c3416 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 19 Jul 2018 03:21:12 -0700
Subject: [PATCH 22/42] added encoder-decoder context to every decoder timestep

---
 hparams.py |  7 +++++++
 model.py   | 25 +++++++++++++++++++------
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/hparams.py b/hparams.py
index 0bfd3e6..9b694b3 100755
--- a/hparams.py
+++ b/hparams.py
@@ -40,6 +40,13 @@
     encoder_activation_loss=1e-06, # max 0.001
     decoder_stability_loss=0.0, # max 100
     decoder_activation_loss=5e-06,  # max 0.001
+    
+    #Kaggle model architecture is more like a basic many-to-many RNN, not really a
+    #usual encoder-decoder architecture since computational graph does not have 
+    #connections from encoded representation to each decoder time step (only to 1st
+    #decoder timestep). Set below to True to use encoder-decoder; set False to use
+    #Kaggle architecture not really true encoder-decoder
+    RECURSIVE_W_ENCODER_CONTEXT=True, 
 )
 
 
diff --git a/model.py b/model.py
index c2b228d..b315e13 100755
--- a/model.py
+++ b/model.py
@@ -16,6 +16,8 @@
 
 
+
+
 def debug_tensor_print(tensor):
     """
     Debugging mode:
@@ -237,7 +239,7 @@ def smape_loss(true, predicted, weights):
     return tf.losses.compute_weighted_loss(smape, weights, loss_collection=None)
 
 
-def decode_predictions(decoder_readout, inp: InputPipe):
+def decode_predictions(decoder_readout, inp: InputPipe):#!!!!!quantiles
     """
     Converts normalized prediction values to log1p(pageviews), e.g. reverts normalization
     :param decoder_readout: Decoder output, shape [n_days, batch]
@@ -245,7 +247,7 @@ def decode_predictions(decoder_readout, inp: InputPipe):
     :return:
     """
     # [n_days, batch] -> [batch, n_days]
-    batch_readout = tf.transpose(decoder_readout)
+    batch_readout = tf.transpose(decoder_readout) #!!!!!quantiles
     batch_std = tf.expand_dims(inp.norm_std, -1)
     batch_mean = tf.expand_dims(inp.norm_mean, -1)
     
@@ -254,7 +256,7 @@ def decode_predictions(decoder_readout, inp: InputPipe):
     return ret
 
 
-def calc_loss(predictions, true_y, additional_mask=None):
+def calc_loss(predictions, true_y, additional_mask=None):#!!!!!quantiles
     """
     Calculates losses, ignoring NaN true values (assigning zero loss to them)
     :param predictions: Predicted values
@@ -276,7 +278,7 @@ def calc_loss(predictions, true_y, additional_mask=None):
                                                                                   weights), tf.size(true_y)
 
 
-def make_train_op(loss, ema_decay=None, prefix=None):
+def make_train_op(loss, ema_decay=None, prefix=None):#!!!!!quantiles
     #optimizer = COCOB()
     ##train.AdamOptimizer train.GradientDescentOptimizer
     optimizer = tf.train.AdamOptimizer() #!!!!!try simpler optimizer on our data.
@@ -473,9 +475,13 @@ def build_cell(idx):
                 has_dropout = hparams.decoder_input_dropout[idx] < 1 \
                               or hparams.decoder_state_dropout[idx] < 1 or hparams.decoder_output_dropout[idx] < 1
 
+                #context size alone may be as big as decoder state size?? Then input-> hidden would be a down projection...
+                #so maybe do a projection down, on the encoder side first [e.g. encoder output??] then better here...
                 if self.is_train and has_dropout:
                     attn_depth = attn_features.shape[-1].value if attn_features is not None else 0
-                    input_size = attn_depth + prediction_inputs.shape[-1].value + 1 if idx == 0 else self.hparams.rnn_depth
+                    context_depth = encoder_state.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT is not None else 0
+                    input_size = attn_depth + context_depth + prediction_inputs.shape[-1].value + 1 if idx == 0 else self.hparams.rnn_depth
+                    input_size = tf.Print(input_size, ['attn_depth',tf.shape(attn_depth),attn_depth, 'context_depth',tf.shape(context_depth),context_depth, 'input_size',tf.shape(input_size),input_size])#!!!!!!!!!!
                     cell = rnn.DropoutWrapper(cell, dtype=tf.float32, input_size=input_size,
                                               variational_recurrent=hparams.decoder_variational_dropout[idx],
                                               input_keep_prob=hparams.decoder_input_dropout[idx],
@@ -505,7 +511,7 @@ def build_cell(idx):
 
         nest.assert_same_structure(encoder_state, cell.state_size)
         predict_timesteps = self.inp.predict_window
-        assert prediction_inputs.shape[1] == predict_timesteps
+        assert prediction_inputs.shape[1] == predict_timesteps #!!!!!!!quantiles
 
         # [batch_size, time, input_depth] -> [time, batch_size, input_depth]
         inputs_by_time = tf.transpose(prediction_inputs, [1, 0, 2])
@@ -545,6 +551,13 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
                 next_input = tf.concat([prev_output, features], axis=1)
                 # Append previous predicted value to input features
 
+            #If using more of a typical encoder-decoder, also have encoder context each time:
+            if self.hparams.RECURSIVE_W_ENCODER_CONTEXT:
+#                encoder_state = tf.Print(next_input,['encoder_state',tf.shape(encoder_state),encoder_state])
+                next_input = tf.concat([next_input, encoder_state], axis=1)
+#                next_input = tf.Print(next_input,['next_input',tf.shape(next_input),next_input])
+               
+
             # Run RNN cell
             output, state = cell(next_input, prev_state)
             # Make prediction from RNN outputs

From d5277fd7f06ad6bf59e87a8f57253763d7926f7e Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 23 Jul 2018 14:54:16 -0700
Subject: [PATCH 23/42] misc. and starting on random series sizes

---
 PREDICT.py                    |  2 +-
 PREPROCESS.py                 |  5 +-
 RUN_MANY_TRAIN_VAL_WINDOWS.sh |  6 +--
 Readme.md                     | 18 +++++--
 __init__.py                   |  0
 hparams.py                    |  9 +++-
 input_pipe.py                 | 98 +++++++++++++++++++++--------------
 make_features.py              | 55 ++++++++++++++++++++
 model.py                      | 90 +++++++++++++++++++++-----------
 trainer.py                    | 75 +++++++++++++++++++--------
 10 files changed, 257 insertions(+), 101 deletions(-)
 create mode 100644 __init__.py

diff --git a/PREDICT.py b/PREDICT.py
index a3ea201..befdad6 100755
--- a/PREDICT.py
+++ b/PREDICT.py
@@ -79,7 +79,7 @@ def mean_smape(true, pred):
 t_preds = []
 for tm in range(Nmodels):
     tf.reset_default_graph()
-    t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(PARAM_SETTING_FULL_NAME), back_offset=0, predict_window=63,
+    t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(PARAM_SETTING_FULL_NAME), back_offset=0, history_window_size=283, horizon_window_size=63,
                     n_models=Nmodels, target_model=tm, seed=2, batch_size=2048, asgd=True))
 
 
diff --git a/PREPROCESS.py b/PREPROCESS.py
index 00c4695..a103c04 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -318,7 +318,7 @@ def imputation_lagKmedian_single_series(df,seasonality,N_seasons,out_of_range_fi
 
 
-def data_augmentation(df, jitter_pcts_list=[.05,.10,.15], do_low_pass_filter=True, additive_trend=False):
+def data_augmentation(df, jitter_pcts_list=[.05,.01], do_low_pass_filter=True, additive_trend=False):
     """
     Do some basic data augmentation with a few different options.
     Then output Cartesian product of all these variations as the final set.
@@ -332,6 +332,9 @@ def jitter__uniform_pcts(df, jitter_pcts_list, N_perturbations):
         percent of the observed value. Either positive or negative.
         If the count is small, then just leave it, otherwise perturb
         (always leaving counts positive).
+        
+        Just do at most 1 or 2 percent jitter to not corrupt to much,
+        ~ magnitude of measurement noise.
         """
         page = df['Page'].values[0]
         cols = df.columns
diff --git a/RUN_MANY_TRAIN_VAL_WINDOWS.sh b/RUN_MANY_TRAIN_VAL_WINDOWS.sh
index 6a21d8d..fd21a5f 100644
--- a/RUN_MANY_TRAIN_VAL_WINDOWS.sh
+++ b/RUN_MANY_TRAIN_VAL_WINDOWS.sh
@@ -1,6 +1,6 @@
 #chmod 777 RUN_MANY_TRAIN_VAL_WINDOWS.sh
 #./RUN_MANY_TRAIN_VAL_WINDOWS.sh
-#Run over many train_window - predict_window length pairs
+#Run over many history_window_size - horizon_window_size length pairs
 #Compile results, analyze performance as (2D) heatmap
 
 
@@ -36,11 +36,11 @@ for v in $VALIDATION_WINDOWS; do
     echo 'running make_features.py with --add_days='$v
     python3 make_features.py data/vars ours daily full --add_days=$v
     for t in $TRAIN_WINDOWS; do
-        echo 'validation window = '$v 'validation window = '$t
+        echo 'history window = '$t 'horizon window = '$v
         echo 'running trainer.py'
         NAME="val$v-train$t"
         echo 'NAAME='$NAME
-        python3 trainer.py full daily --name $NAME --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=$MAX_EPOCH --patience=5 --verbose --predict_window=$v --train_window=$t
+        python3 trainer.py full daily --name $NAME --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=$MAX_EPOCH --patience=5 --verbose --horizon_window_size=$v --history_window_size=$t
     done
 done
 
diff --git a/Readme.md b/Readme.md
index 240a090..ca66990 100755
--- a/Readme.md
+++ b/Readme.md
@@ -59,17 +59,22 @@ rm *.pkl
 cd ..
 ll data/
 
-python3 make_features.py data/vars ours daily full --add_days=63
-python3 make_features.py data/vars kaggle daily full --add_days=63
+python3 make_features.py data/vars ours daily full --add_days=50
+#python3 make_features.py data/vars kaggle daily full --add_days=63
+
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=5 --horizon_window_size=50 --history_window_size=100 --max_epoch=10
+
 
 
+----------------------------------------------------------------------------------------------------------------------------------------------------------
+
 #no reason to expect 10000 to 11500 is good range to save out. View loss along the way
 python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50
-python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=50
+python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=50
 --name TEST_stacked --hparam_set=TEST_stacked
 
 --no_eval
@@ -79,7 +84,10 @@ python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head
 --verbose
 
 
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50 --train_window=100
+python3 trainer.py full daily --name wEncDec --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=63 --history_window_size=100 --max_epoch=10
+
+python3 trainer.py full daily --name noEncDec --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=63 --history_window_size=100 --max_epoch=10
+
 
 python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=1000 --patience=50 --verbose --side_split
 
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/hparams.py b/hparams.py
index 9b694b3..4b28e7e 100755
--- a/hparams.py
+++ b/hparams.py
@@ -46,7 +46,14 @@
     #connections from encoded representation to each decoder time step (only to 1st
     #decoder timestep). Set below to True to use encoder-decoder; set False to use
     #Kaggle architecture not really true encoder-decoder
-    RECURSIVE_W_ENCODER_CONTEXT=True, 
+    RECURSIVE_W_ENCODER_CONTEXT=True,
+    
+    #Instead of fixed size windows, do training phase over range of window sizes
+    #drawn uniformly from [a,b]. Another form of randomization/regularization, 
+    #but more importantly this way model can generalize to different lengths so
+    #we can more fairly assess performance over range of history/horizon windows:
+    history_window_size_minmax=[7,365],
+    horizon_window_size_minmax=[7,60],
 )
 
 
diff --git a/input_pipe.py b/input_pipe.py
index 69d1d52..351bb3e 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -134,10 +134,11 @@ def cut(self, counts, start, end):
         :return: tuple (train_counts, test_counts, lagged_counts, [dow,woy,moy,year])
         """
         # Pad counts to ensure we have enough array length for prediction
-        counts = tf.concat([counts, tf.fill([self.predict_window], np.NaN)], axis=0)
+        counts = tf.concat([counts, tf.fill([self.horizon_window_size], np.NaN)], axis=0)
         cropped_count = counts[start:end]
-
-
+#        cropped_count = tf.Print(cropped_count,['INPUT PIPE > CUT > cropped_count',tf.shape(cropped_count), 'start', start, 'end', end])
+#        cropped_count = tf.Print(cropped_count,['self.history_window_size', self.history_window_size, 'self.horizon_window_size', self.horizon_window_size])
+        
         # =============================================================================
         # Ordinal periodic variables
         # which features are here depends on what the sampling period is for the data
@@ -185,7 +186,7 @@ def cut(self, counts, start, end):
 
         #Will always have the count series (the series we predict on):
         # Split for train and test
-        x_counts, y_counts = tf.split(cropped_count, [self.train_window, self.predict_window], axis=0)
+        x_counts, y_counts = tf.split(cropped_count, [self.history_window_size, self.horizon_window_size], axis=0)
 
         # Convert NaN to zero in for train data
         x_counts = tf.where(tf.is_nan(x_counts), tf.zeros_like(x_counts), x_counts)
@@ -214,17 +215,17 @@ def cut_train(self, counts, *args):
         :param args: pass-through data, will be appended to result
         :return: result of cut() + args
         """
-        n_timesteps = self.predict_window + self.train_window
+        n_timesteps = self.horizon_window_size + self.history_window_size
         # How much free space we have to choose starting day
         free_space = self.inp.data_days - n_timesteps - self.back_offset - self.start_offset
         if self.verbose:
             #!!!!!! doesn't really matter since this is just printout, but would need to change for WEEKLY / MONTHLY
             lower_train_start = self.inp.data_start + pd.Timedelta(self.start_offset, 'D')
             lower_test_end = lower_train_start + pd.Timedelta(n_timesteps, 'D')
-            lower_test_start = lower_test_end - pd.Timedelta(self.predict_window, 'D')
+            lower_test_start = lower_test_end - pd.Timedelta(self.horizon_window_size, 'D')
             upper_train_start = self.inp.data_start + pd.Timedelta(free_space - 1, 'D')
             upper_test_end = upper_train_start + pd.Timedelta(n_timesteps, 'D')
-            upper_test_start = upper_test_end - pd.Timedelta(self.predict_window, 'D')
+            upper_test_start = upper_test_end - pd.Timedelta(self.horizon_window_size, 'D')
             print(f"Free space for training: {free_space} days.")
             print(f" Lower train {lower_train_start}, prediction {lower_test_start}..{lower_test_end}")
             print(f" Upper train {upper_train_start}, prediction {upper_test_start}..{upper_test_end}")
@@ -238,12 +239,12 @@ def cut_train(self, counts, *args):
     def cut_eval(self, counts, *args):
         """
         Cuts segment of time series for evaluation.
-        Always cuts train_window + predict_window length segment beginning at start_offset point
+        Always cuts history_window_size + horizon_window_size length segment beginning at start_offset point
         :param counts: counts timeseries
         :param args: pass-through data, will be appended to result
         :return: result of cut() + args
         """
-        end = self.start_offset + self.train_window + self.predict_window
+        end = self.start_offset + self.history_window_size + self.horizon_window_size
         return self.cut(counts, self.start_offset, end) + args
 
     def reject_filter(self, x_counts, y_counts, *args):
@@ -297,15 +298,15 @@ def make_features(self, *args):
         # Do train - predict splits
         # =============================================================================
         if self.sampling_period == 'daily':
-            x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0)
-            x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func
+            x_dow, y_dow = tf.split(dow, [self.history_window_size, self.horizon_window_size], axis=0)
+            x_woy, y_woy = tf.split(woy, [self.history_window_size, self.horizon_window_size], axis=0) #need to see how to fit in woy into inputs to this func
         elif self.sampling_period == 'weekly':
-            x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0)
+            x_woy, y_woy = tf.split(woy, [self.history_window_size, self.horizon_window_size], axis=0)
         elif self.sampling_period == 'monthly':
-            x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0)
+            x_moy, y_moy = tf.split(moy, [self.history_window_size, self.horizon_window_size], axis=0)
 
         #Already did a manual kind of scaling for year in make_features.py so don't need to normalize here...
-        x_year, y_year = tf.split(year, [self.train_window, self.predict_window], axis=0)
+        x_year, y_year = tf.split(year, [self.history_window_size, self.horizon_window_size], axis=0)
         x_year = tf.expand_dims(x_year,axis=1)
         y_year = tf.expand_dims(y_year,axis=1)
 
@@ -317,7 +318,7 @@ def make_features(self, *args):
         norm_lagged_counts = (lagged_counts - mean) / std
 
         # Split lagged counts to train and test
-        x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0)
+        x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.history_window_size, self.horizon_window_size], axis=0)
 
 
         # Combine all page features into single tensor
@@ -363,7 +364,7 @@ def make_features(self, *args):
         x_features = tf.concat([x_features, x_lagged,
                                 # Stretch series_features to all training days
                                 # [1, features] -> [n_timesteps, features]
-                                tf.tile(series_features, [self.train_window, 1])], axis=1)
+                                tf.tile(series_features, [self.history_window_size, 1])], axis=1)
 
         # Test features
         if self.sampling_period == 'daily':
@@ -376,7 +377,7 @@ def make_features(self, *args):
         y_features = tf.concat([y_features, y_lagged,
                                 # Stretch series_features to all testing days
                                 # [1, features] -> [n_timesteps, features]
-                                tf.tile(series_features, [self.predict_window, 1])
+                                tf.tile(series_features, [self.horizon_window_size, 1])
                                 ], axis=1)
 
 #        print(x_features)
@@ -386,13 +387,14 @@ def make_features(self, *args):
         
         print('x_features')
         print(x_features)
+        print(x_features.shape)
         return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix
         #Must match up with setting self.XYZ = it_tensors below in __init__. 
 
 
     def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None,
-                 batch_size=127, runs_in_burst=1, verbose=True, predict_window=60, train_window=500,
+                 batch_size=127, runs_in_burst=1, verbose=True, horizon_window_size=60, history_window_size=500,
                  train_completeness_threshold=1, predict_completeness_threshold=1, back_offset=0,
                  train_skip_first=0, rand_seed=None):
         """
@@ -407,8 +409,8 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         :param batch_size:
         :param runs_in_burst: How many batches can be consumed at short time interval (burst). Multiplicator for prefetch()
         :param verbose: Print additional information during graph construction
-        :param predict_window: Number of days to predict
-        :param train_window: Use train_window days for traning
+        :param horizon_window_size: Number of days to predict
+        :param history_window_size: Use history_window_size days for traning
         :param train_completeness_threshold: Percent of zero datapoints allowed in train timeseries.
         :param predict_completeness_threshold: Percent of zero datapoints allowed in test/predict timeseries.
         :param back_offset: Don't use back_offset days at the end of timeseries
@@ -430,32 +432,35 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
             mode, inp.data_days, inp.data_start, inp.data_end, inp.features_end))
 
         if mode == ModelMode.TRAIN:
-            # reserve predict_window at the end for validation
-            assert inp.data_days - predict_window > predict_window + train_window, \
+            # reserve horizon_window_size at the end for validation
+            assert inp.data_days - horizon_window_size > horizon_window_size + history_window_size, \
                 "Predict+train window length (+predict window for validation) is larger than total number of days in dataset"
             self.start_offset = train_skip_first
         elif mode == ModelMode.EVAL or mode == ModelMode.PREDICT:
-            self.start_offset = inp.data_days - train_window - back_offset
+            self.start_offset = inp.data_days - history_window_size - back_offset
             if verbose:
                 train_start = inp.data_start + pd.Timedelta(self.start_offset, 'D')
-                eval_start = train_start + pd.Timedelta(train_window, 'D')
-                end = eval_start + pd.Timedelta(predict_window - 1, 'D')
+                eval_start = train_start + pd.Timedelta(history_window_size, 'D')
+                end = eval_start + pd.Timedelta(horizon_window_size - 1, 'D')
                 print("Train start %s, predict start %s, end %s" % (train_start, eval_start, end))
             assert self.start_offset >= 0
 
-        self.train_window = train_window
-        self.predict_window = predict_window
-        self.attn_window = train_window - predict_window + 1
-        self.max_train_empty = int(round(train_window * (1 - train_completeness_threshold)))
-        self.max_predict_empty = int(round(predict_window * (1 - predict_completeness_threshold)))
+        self.history_window_size = history_window_size #!!!!!!!!!!!random resize
+        self.horizon_window_size = horizon_window_size#!!!!!!!!!!!random resize
+        self.attn_window = history_window_size - horizon_window_size + 1#!!!!!!!!!!!random resize
+        self.max_train_empty = int(round(history_window_size * (1 - train_completeness_threshold)))#!!!!!!!!!!!random resize
+        self.max_predict_empty = int(round(horizon_window_size * (1 - predict_completeness_threshold)))#!!!!!!!!!!!random resize
         self.mode = mode
         self.verbose = verbose
+        
+        self.train_completeness_threshold = train_completeness_threshold
+        self.predict_completeness_threshold = predict_completeness_threshold
 
 
         print('max_train_empty',self.max_train_empty)
         print('max_predict_empty',self.max_predict_empty)
-        print('train_window',self.train_window)
-        print('predict_window',self.predict_window)
+        print('history_window_size',self.history_window_size)
+        print('horizon_window_size',self.horizon_window_size)
         print('attn_window',self.attn_window)
 
         
@@ -465,14 +470,29 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         # Choose right cutter function for current ModelMode
         cutter = {ModelMode.TRAIN: self.cut_train, ModelMode.EVAL: self.cut_eval, ModelMode.PREDICT: self.cut_eval}
         # Create dataset, transform features and assemble batches
+        #features is a list of tensors (one tensor per feature: counts, page_ix, ..., count_variance)
+        print('features',features)
+#        features = tf.Print(features,['features',tf.shape(features),features])
         root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch)
-        batch = (root_ds
-                 .map(cutter[mode])
-                 .filter(self.reject_filter)
-                 .map(self.make_features, num_parallel_calls=num_threads)
-                 .batch(batch_size)
-                 .prefetch(runs_in_burst * 2)
-                 )
+#        print(root_ds.output_classes, root_ds.output_shapes, root_ds.output_types,)
+        print(root_ds.output_shapes)
+#        batch = (root_ds
+#                 .map(cutter[mode])
+#                 .filter(self.reject_filter)
+#                 .map(self.make_features, num_parallel_calls=num_threads)
+#                 .batch(batch_size)
+#                 .prefetch(runs_in_burst * 2)
+#                 )
+        batch = root_ds.map(cutter[mode]).filter(self.reject_filter).map(self.make_features, num_parallel_calls=num_threads)
+        print('batch MFM', batch)
+        
+        batch = batch.batch(batch_size)
+        print('batch B', batch)
+         
+        batch = batch.prefetch(runs_in_burst * 2)
+        print('batch P', batch)
+        batch = (batch)
+        
         print('---------------- Done batching ----------------')
         print(batch)
         self.iterator = batch.make_initializable_iterator()
diff --git a/make_features.py b/make_features.py
index d1061db..170d183 100755
--- a/make_features.py
+++ b/make_features.py
@@ -265,6 +265,61 @@ def normalize(values: np.ndarray):
     return (values - values.mean()) / np.std(values)
 
 
+
+
+
+
+
+def encode_fixed_date_holidays__daily(dates_series):
+    """
+    Encode holidays and shoulder days, for holidays that occur yearly on fixed
+    dates.
+    For daily sampled data only.
+    
+    In USA:
+    Christmas, New Year, 4th of July, Halloween, Cinco de Mayo
+    Valentine's Day, Veteran's Day
+    
+    other international:
+        ...
+    """
+    return dates_series
+
+
+# =============================================================================
+# MOVING holidays [variable date]
+# =============================================================================
+def encode_thanksgiving__daily(dates_series):
+    """
+    Encode Thanksgiving holiday and shoulder days.
+    For daily sampled data only.
+    """
+#    4th Thurs of Novmber...
+#    if (month==11) and (dayofweek=='Thurs') and (22<=dayofmonth<=28)
+    return dates_series
+
+def encode_easter__daily(dates_series):
+    """
+    Encode Easter holiday and shoulder days.
+    For daily sampled data only.
+    """
+    return dates_series    
+
+#Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day
+    
+def encode_custom_dates__daily(dates_series,dates_list):
+    """
+    Encode custom days and optionally shoulder days.
+    For daily sampled data only.
+    
+    E.g. Superbowl Sunday
+    suberbowl_dates = ['2014-2-2','2015-2-1','2016-2-7','2017-2-5','2018-2-4','2019-2-3']
+    shoulders = [...]
+    """
+    return dates_series        
+    
+
+
 def run():
     parser = argparse.ArgumentParser(description='Prepare data')
     parser.add_argument('data_dir')
diff --git a/model.py b/model.py
index b315e13..81cc5d8 100755
--- a/model.py
+++ b/model.py
@@ -7,8 +7,11 @@
 from tensorflow.python.util import nest
 
 from cocob import COCOB
+from Adam_HD_optimizer import AdamHDOptimizer
+from SGDN_HD_optimizer import MomentumSGDHDOptimizer
 from input_pipe import InputPipe, ModelMode
 
+
 GRAD_CLIP_THRESHOLD = 10
 RNN = cudnn_rnn.CudnnGRU
 # RNN = tf.contrib.cudnn_rnn.CudnnLSTM
@@ -173,7 +176,7 @@ def attn_readout_v3(readout, attn_window, attn_heads, page_features, seed):
     # [batch(readout_depth), width, channels] -> [batch, height=1, width, channels]
     inp = readout[:, tf.newaxis, :, :]
 
-    # attn_window = train_window - predict_window + 1
+    # attn_window = history_window_size - horizon_window_size + 1
     # [batch, attn_window * n_heads]
     filter_logits = tf.layers.dense(page_features, attn_window * attn_heads, name="attn_focus",
                                     kernel_initializer=default_init(seed)
@@ -191,15 +194,15 @@ def attn_readout_v3(readout, attn_window, attn_heads, page_features, seed):
 
     # [width(attn_window), channels(batch), n_heads] -> [height(1), width(attn_window), channels(batch), multiplier(n_heads)]
     attn_filter = attns_max[tf.newaxis, :, :, :]
-    # [batch(readout_depth), height=1, width=n_days, channels=batch] -> [batch(readout_depth), height=1, width=predict_window, channels=batch*n_heads]
+    # [batch(readout_depth), height=1, width=n_days, channels=batch] -> [batch(readout_depth), height=1, width=horizon_window_size, channels=batch*n_heads]
     averaged = tf.nn.depthwise_conv2d_native(inp, attn_filter, [1, 1, 1, 1], 'VALID')
-    # [batch, height=1, width=predict_window, channels=readout_depth*n_neads] -> [batch(depth), predict_window, batch*n_heads]
+    # [batch, height=1, width=horizon_window_size, channels=readout_depth*n_neads] -> [batch(depth), horizon_window_size, batch*n_heads]
     attn_features = tf.squeeze(averaged, 1)
-    # [batch(depth), predict_window, batch*n_heads] -> [batch*n_heads, predict_window, depth]
+    # [batch(depth), horizon_window_size, batch*n_heads] -> [batch*n_heads, horizon_window_size, depth]
     attn_features = tf.transpose(attn_features, [2, 1, 0])
-    # [batch * n_heads, predict_window, depth] -> n_heads * [batch, predict_window, depth]
+    # [batch * n_heads, horizon_window_size, depth] -> n_heads * [batch, horizon_window_size, depth]
     heads = [attn_features[head_no::attn_heads] for head_no in range(attn_heads)]
-    # n_heads * [batch, predict_window, depth] -> [batch, predict_window, depth*n_heads]
+    # n_heads * [batch, horizon_window_size, depth] -> [batch, horizon_window_size, depth*n_heads]
     result = tf.concat(heads, axis=-1)
     # attn_diag = tf.unstack(attns_max, axis=-1)
     return result, None
@@ -215,11 +218,14 @@ def calc_smape_rounded(true, predicted, weights):
     """
     n_valid = tf.reduce_sum(weights)
     true_o = tf.round(tf.expm1(true))
-    pred_o = tf.maximum(tf.round(tf.expm1(predicted)), 0.0)
+    pred_o = tf.maximum(tf.round(tf.expm1(predicted)), 0.0) #!!!!!!! for us we could even clip at 1, since 0 means measurement was missing
     summ = tf.abs(true_o) + tf.abs(pred_o)
     zeros = summ < 0.01
     raw_smape = tf.abs(pred_o - true_o) / summ * 2.0
     smape = tf.where(zeros, tf.zeros_like(summ, dtype=tf.float32), raw_smape)
+    #!!!!!!!!!!! since summ is sum of absolute values of 2 rounded things, is only < .01 if is exactly = 0. For our data, this should NEVER happen, would mean unmeasured NAN, so actually this is exactly the SMAPE we want
+    
+#    smape = tf.Print(smape, ['pred_o',tf.shape(pred_o),pred_o, 'pred_o not round clip',tf.expm1(predicted),  'true_o',tf.shape(true_o),true_o,  'smape', smape, 'raw_smape', raw_smape])  
     return tf.reduce_sum(smape * weights) / n_valid
 
 
@@ -279,10 +285,19 @@ def calc_loss(predictions, true_y, additional_mask=None):#!!!!!quantiles
 
 
 def make_train_op(loss, ema_decay=None, prefix=None):#!!!!!quantiles
-    #optimizer = COCOB()
-    ##train.AdamOptimizer train.GradientDescentOptimizer
-    optimizer = tf.train.AdamOptimizer() #!!!!!try simpler optimizer on our data.
-#    optimizer = tf.train.GradientDescentOptimizer(1e-9) #!!!!!try simpler optimizer on our data.
+#    OPTIMIZER=#'SGDN-HD',#'COCOB',#'ADAM',#'SGDN-HD',#'ADAM-HD'
+#    if OPTIMIZER=='COCOB':
+#        optimizer = COCOB()
+#    if OPTIMIZER=='ADAM':
+#        optimizer = tf.train.AdamOptimizer()
+#    if OPTIMIZER=='SGD':
+#        optimizer = tf.train.GradientDescentOptimizer(1e-9)
+#    if OPTIMIZER=='SGDN-HD':
+#        optimizer = MomentumSGDHDOptimizer()
+#    if OPTIMIZER=='ADAM-HD':
+#        optimizer = AdamHDOptimizer()
+#    optimizer=MomentumSGDHDOptimizer(alpha_0=1e-1)#bad SMAPEs for various orders of magnitude alpha_0
+    optimizer = tf.train.AdamOptimizer()
     
     glob_step = tf.train.get_global_step()
 
@@ -313,7 +328,7 @@ def make_train_op(loss, ema_decay=None, prefix=None):#!!!!!quantiles
     return training_op, glob_norm, ema
 
 
-def convert_cudnn_state_v2(h_state, hparams, seed, c_state=None, dropout=1.0):
+def convert_cudnn_state_v3(h_state, hparams, seed, c_state=None, dropout=1.0):
     """
     Converts RNN state tensor from cuDNN representation to TF RNNCell compatible representation.
     :param h_state: tensor [num_layers, batch_size, depth]
@@ -335,13 +350,23 @@ def wrap_dropout(structure):
     # encoder_layers > decoder_layers: get outputs of upper encoder layers
     # encoder_layers < decoder_layers: feed encoder outputs to lower decoder layers, feed zeros to top layers
     h_layers = tf.unstack(h_state)
+    
+    #Regardless of relative number of layers in encoder vs. decoder, simple approach is 
+    #use topmost encoder layer hidden state as the (fixed) context
+    encoded_representation = wrap_dropout(h_layers[-1])
+    #above uses a different random dropout for the "encoded representaiton" than the actual top level output.
+    #This is possibly a good regularization thing since we dont expect the final hidden state to be  perfect summar/context vector,
+    #so a little randomness is probably good here.
+    #vs. below using topmost level exactly same dropout mask: _[-1]
     if hparams.encoder_rnn_layers >= hparams.decoder_rnn_layers:
-        return squeeze(wrap_dropout(h_layers[hparams.encoder_rnn_layers - hparams.decoder_rnn_layers:]))
+        _ = wrap_dropout(h_layers[hparams.encoder_rnn_layers - hparams.decoder_rnn_layers:])
+        return squeeze(_), _[-1] #Use the topmost hidden state of the encoder as the encoded representaiton
+#        return squeeze(_), encoded_representation #Use the topmost hidden state of the encoder as the encoded representaiton
     else:
         lower_inputs = wrap_dropout(h_layers)
         upper_inputs = [tf.zeros_like(h_layers[0]) for _ in
                         range(hparams.decoder_rnn_layers - hparams.encoder_rnn_layers)]
-        return squeeze(lower_inputs + upper_inputs)
+        return squeeze(lower_inputs + upper_inputs), lower_inputs[-1] #Use the topmost hidden state of the encoder as the encoded representaiton
 
 
 def rnn_stability_loss(rnn_output, beta):
@@ -381,7 +406,7 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         :param seed:
         :param graph_prefix: Subgraph prefix for multi-model graph
         :param asgd_decay: Decay for SGD averaging
-        :param loss_mask: Additional mask for losses calculation (one value for each prediction day), shape=[predict_window]
+        :param loss_mask: Additional mask for losses calculation (one value for each prediction day), shape=[horizon_window_size]
         """
         self.is_train = is_train
         self.inp = inp
@@ -392,13 +417,15 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         encoder_output, h_state, c_state = make_encoder(inp.time_x, inp.encoder_features_depth, is_train, hparams, seed,
                                                         transpose_output=False)
         # Encoder activation losses
-        enc_stab_loss = rnn_stability_loss(encoder_output, hparams.encoder_stability_loss / inp.train_window)
-        enc_activation_loss = rnn_activation_loss(encoder_output, hparams.encoder_activation_loss / inp.train_window)
+        enc_stab_loss = rnn_stability_loss(encoder_output, hparams.encoder_stability_loss / inp.history_window_size)
+        enc_activation_loss = rnn_activation_loss(encoder_output, hparams.encoder_activation_loss / inp.history_window_size)
 
         # Convert state from cuDNN representation to TF RNNCell-compatible representation
-        encoder_state = convert_cudnn_state_v2(h_state, hparams, c_state,
+        encoder_state, summary_z = convert_cudnn_state_v3(h_state, hparams, c_state,
                                                dropout=hparams.gate_dropout if is_train else 1.0)
-
+#        encoder_state = tf.Print(encoder_state, ['encoder_state',tf.shape(encoder_state),encoder_state])
+#        summary_z = tf.Print(summary_z, ['summary_z',tf.shape(summary_z),summary_z])
+        
         # Attention calculations
         # Compress encoder outputs
         enc_readout = compressed_readout(encoder_output, hparams,
@@ -416,10 +443,11 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         print('inp.time_y',inp.time_y)
         decoder_targets, decoder_outputs = self.decoder(encoder_state,
                                                         attn_features if hparams.use_attn else None,
+                                                        summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
                                                         inp.time_y, inp.norm_x[:, -1]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
         # Decoder activation losses
-        dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.predict_window)
-        dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.predict_window)
+        dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.horizon_window_size)
+        dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.horizon_window_size)
 
         # Get final denormalized predictions
         self.predictions = decode_predictions(decoder_targets, inp)
@@ -459,12 +487,12 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
     def default_init(self, seed_add=0):
         return default_init(self.seed + seed_add)
 
-    def decoder(self, encoder_state, attn_features, prediction_inputs, previous_y):
+    def decoder(self, encoder_state, attn_features, summary_z, prediction_inputs, previous_y):
         """
         :param encoder_state: shape [batch_size, encoder_rnn_depth]
         :param prediction_inputs: features for prediction days, tensor[batch_size, time, input_depth]
         :param previous_y: Last day pageviews, shape [batch_size]
-        :param attn_features: Additional features from attention layer, shape [batch, predict_window, readout_depth*n_heads]
+        :param attn_features: Additional features from attention layer, shape [batch, horizon_window_size, readout_depth*n_heads]
         :return: decoder rnn output
         """
         hparams = self.hparams
@@ -479,7 +507,8 @@ def build_cell(idx):
                 #so maybe do a projection down, on the encoder side first [e.g. encoder output??] then better here...
                 if self.is_train and has_dropout:
                     attn_depth = attn_features.shape[-1].value if attn_features is not None else 0
-                    context_depth = encoder_state.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT is not None else 0
+                    context_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT is not None else 0 #Should just be the encoder RNN depth
+                    print('attn_depth',attn_depth, 'context_depth',context_depth)
                     input_size = attn_depth + context_depth + prediction_inputs.shape[-1].value + 1 if idx == 0 else self.hparams.rnn_depth
                     input_size = tf.Print(input_size, ['attn_depth',tf.shape(attn_depth),attn_depth, 'context_depth',tf.shape(context_depth),context_depth, 'input_size',tf.shape(input_size),input_size])#!!!!!!!!!!
                     cell = rnn.DropoutWrapper(cell, dtype=tf.float32, input_size=input_size,
@@ -510,7 +539,7 @@ def build_cell(idx):
 
 
         nest.assert_same_structure(encoder_state, cell.state_size)
-        predict_timesteps = self.inp.predict_window
+        predict_timesteps = self.inp.horizon_window_size
         assert prediction_inputs.shape[1] == predict_timesteps #!!!!!!!quantiles
 
         # [batch_size, time, input_depth] -> [time, batch_size, input_depth]
@@ -541,7 +570,7 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
             # RNN inputs for current step
             features = inputs_by_time[timestep]
 
-            # [batch, predict_window, readout_depth * n_heads] -> [batch, readout_depth * n_heads]
+            # [batch, horizon_window_size, readout_depth * n_heads] -> [batch, readout_depth * n_heads]
             if attn_features is not None:
                 #  [batch_size, 1] + [batch_size, input_depth]
                 attn = attn_features[:, timestep, :]
@@ -553,11 +582,14 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
 
             #If using more of a typical encoder-decoder, also have encoder context each time:
             if self.hparams.RECURSIVE_W_ENCODER_CONTEXT:
-#                encoder_state = tf.Print(next_input,['encoder_state',tf.shape(encoder_state),encoder_state])
-                next_input = tf.concat([next_input, encoder_state], axis=1)
+                next_input = tf.concat([next_input, summary_z], axis=1) #!!!!!!!!summary_z[-1]
+#                if self.hparams.encoder_rnn_layers == 1:
+#                    next_input = tf.concat([next_input, summary_z], axis=1) #!!!!!!!!summary_z[-1]
+#                elif self.hparams.encoder_rnn_layers > 1:
+#                    next_input = tf.concat([next_input, summary_z[-1]], axis=1) #!!!!!!!!summary_z[-1]
 #                next_input = tf.Print(next_input,['next_input',tf.shape(next_input),next_input])
                
-
+                    
             # Run RNN cell
             output, state = cell(next_input, prev_state)
             # Make prediction from RNN outputs
diff --git a/trainer.py b/trainer.py
index d43c846..b2b2749 100755
--- a/trainer.py
+++ b/trainer.py
@@ -400,14 +400,14 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
           seed=None, logdir='data/logs', max_epoch=100, patience=2, train_sampling=1.0,
           eval_sampling=1.0, eval_memsize=5, gpu=0, gpu_allow_growth=False, save_best_model=False,
           forward_split=False, write_summaries=False, verbose=False, asgd_decay=None, tqdm=True,
-          side_split=True, max_steps=None, save_from_step=None, do_eval=True, predict_window=63, train_window=283):
+          side_split=True, max_steps=None, save_from_step=None, do_eval=True):#, horizon_window_size=63, history_window_size=283):
 
     eval_k = int(round(26214 * eval_memsize / n_models))
     eval_batch_size = int(
         eval_k / (hparams.rnn_depth * hparams.encoder_rnn_layers))  # 128 -> 1024, 256->512, 512->256
     eval_pct = 0.1
     batch_size = hparams.batch_size
-#    train_window = hparams.train_window
+#    history_window_size = hparams.history_window_size
     tf.reset_default_graph()
     if seed:
         tf.set_random_seed(seed)
@@ -446,36 +446,58 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
     print('eval_every_step', eval_every_step)
 
 
+    def random_draw_history_and_horizon_window_sizes(trainer):
+        """
+        Want to not only have random start end, but also variable size chunks for 
+        history and horizon sizes in TRAINING phase.
+        (in prediction phase, use fixed sizes, and then for different sizes see how performance is.)
+        """
+        history = np.random.randint(low=hparams.history_window_size_minmax[0],high=hparams.history_window_size_minmax[1]+1)
+        horizon = np.random.randint(low=hparams.horizon_window_size_minmax[0],high=hparams.horizon_window_size_minmax[1]+1)        
+        for TT in trainer.trainers:
+            TT.train_model.inp.history_window_size = history
+            TT.train_model.inp.horizon_window_size = horizon
+            TT.train_model.inp.attn_window = history - horizon + 1
+            TT.train_model.inp.max_train_empty = int(round(history * (1 - TT.train_model.inp.train_completeness_threshold)))
+            TT.train_model.inp.max_predict_empty = int(round(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))
+        return trainer
+
+
+
     def create_model(features_set, sampling_period, scope, index, prefix, seed):
 
+        #Just dummy filler, not important what value
+        history_dummy = 111
+        horizon_dummy = 42
+
         with tf.variable_scope('input') as inp_scope:
             with tf.device("/cpu:0"):
                 split = splitter.splits[index]
                 pipe = InputPipe(features_set, sampling_period, inp, features=split.train_set, N_time_series=split.train_size,
                                  mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose,
                                  train_completeness_threshold=train_completeness_threshold,
-                                 predict_completeness_threshold=train_completeness_threshold, train_window=train_window,
-                                 predict_window=predict_window,
+                                 predict_completeness_threshold=train_completeness_threshold, history_window_size=history_dummy,
+                                 horizon_window_size=horizon_dummy,
                                  rand_seed=seed, train_skip_first=hparams.train_skip_first,
-                                 back_offset=predict_window if forward_split else 0)
+                                 back_offset=horizon_dummy if forward_split else 0)
                 inp_scope.reuse_variables()
                 TCT = 0.01
                 if side_split:
                     side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
                                                mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
-                                               verbose=verbose, predict_window=predict_window,
+                                               verbose=verbose, horizon_window_size=horizon_dummy,
                                                train_completeness_threshold=TCT, predict_completeness_threshold=0,
-                                               train_window=train_window, rand_seed=seed, runs_in_burst=eval_batches,
-                                               back_offset=predict_window * (2 if forward_split else 1))
+                                               history_window_size=history_dummy, rand_seed=seed, runs_in_burst=eval_batches,
+                                               back_offset=horizon_dummy * (2 if forward_split else 1))
                 else:
                     side_eval_pipe = None
                 if forward_split:
-                    forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features
+                    forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
                                                   mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
-                                                  verbose=verbose, predict_window=predict_window,
+                                                  verbose=verbose, horizon_window_size=horizon_dummy,
                                                   train_completeness_threshold=TCT, predict_completeness_threshold=0,
-                                                  train_window=train_window, rand_seed=seed, runs_in_burst=eval_batches,
-                                                  back_offset=predict_window)
+                                                  history_window_size=history_dummy, rand_seed=seed, runs_in_burst=eval_batches,
+                                                  back_offset=horizon_dummy)
                 else:
                     forward_eval_pipe = None
         avg_sgd = asgd_decay is not None
@@ -600,8 +622,17 @@ def ema_vars(model):
                 tqr = range(steps_per_epoch)
 
             for _ in tqr:
+                #!!!!!!!!!! Variable random length train predict windows
+                #Random draw the train, predict window lengths
+                print(_)
+                trainer = random_draw_history_and_horizon_window_sizes(trainer)
+#                print('+++++++++++++++', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers])
+#                print('--------', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers])
+
                 try:
                     step = trainer.train_step(sess, epoch)
+#                    print('+-+-+-+-+-+-+-', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers])
+#                    print('0000000000000', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers])                    
                 except tf.errors.OutOfRangeError:
                     break
                     # if beholder:
@@ -672,15 +703,15 @@ def ema_vars(model):
         return np.mean(best_epoch_smape, dtype=np.float64)
 
 
-def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1,
-            target_model=0, asgd=False, seed=1, batch_size=1024, train_window=283):
+def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, verbose=False, horizon_window_size=6, back_offset=0, n_models=1,
+            target_model=0, asgd=False, seed=1, batch_size=1024, history_window_size=283): #For predict: allow horizon_window_size to be fixed
     with tf.variable_scope('input') as inp_scope:
         with tf.device("/cpu:0"):
             inp = VarFeeder.read_vars("data/vars")
             pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size,
                              train_completeness_threshold=0.01,
-                             predict_window=predict_window,
-                             predict_completeness_threshold=0.0, train_window=train_window,#hparams.train_window,
+                             horizon_window_size=horizon_window_size,
+                             predict_completeness_threshold=0.0, history_window_size=history_window_size,#hparams.history_window_size,
                              back_offset=back_offset)
     asgd_decay = 0.99 if asgd else None
     if n_models == 1:
@@ -749,12 +780,12 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False,
     predictions /= len(checkpoints) #Since it is averaging predictions over the chckpoints
     offset = pd.Timedelta(back_offset, 'D') #!!!!!!!!!!!! need to change these lines when sampling WEEKLY MONTHLY
     start_prediction = inp.data_end + pd.Timedelta('1D') - offset
-    end_prediction = start_prediction + pd.Timedelta(predict_window - 1, 'D')
+    end_prediction = start_prediction + pd.Timedelta(horizon_window_size - 1, 'D')
     predictions.columns = pd.date_range(start_prediction, end_prediction)
     if return_x:
         x = pd.concat(x_buffer)
-        #start_data = inp.data_end - pd.Timedelta(hparams.train_window - 1, 'D') - back_offset
-        start_data = inp.data_end - pd.Timedelta(train_window - 1, 'D') - back_offset #!!!!!now for heatmaps
+        #start_data = inp.data_end - pd.Timedelta(hparams.history_window_size - 1, 'D') - back_offset
+        start_data = inp.data_end - pd.Timedelta(history_window_size - 1, 'D') - back_offset #!!!!!now for heatmaps
         end_data = inp.data_end - back_offset
         x.columns = pd.date_range(start_data, end_data)
         return predictions, x
@@ -789,8 +820,8 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False,
     parser.add_argument('--no_tqdm', default=True, dest='tqdm', action='store_false', help="Don't use tqdm for status display during training")
     parser.add_argument('--max_steps', type=int, help="Stop training after max steps")
     parser.add_argument('--save_from_step', type=int, help="Save model on each evaluation (10 evals per epoch), starting from this step")
-    parser.add_argument('--predict_window', default=63, type=int, help="Number of days to predict")
-    parser.add_argument('--train_window', default=283, type=int, help="Train window chunk size")#Now that we want to do train size - val size performance heatmaps
+#    parser.add_argument('--horizon_window_size', default=63, type=int, help="Number of days to predict")
+#    parser.add_argument('--history_window_size', default=283, type=int, help="Train window chunk size")#Now that we want to do train size - val size performance heatmaps
     args = parser.parse_args()
 
     param_dict = dict(vars(args))
@@ -801,7 +832,7 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False,
     # hparams = build_hparams()
     # result = train("definc_attn", hparams, n_models=1, train_sampling=1.0, eval_sampling=1.0, patience=5, multi_gpu=True,
     #                save_best_model=False, gpu=0, eval_memsize=15, seed=5, verbose=True, forward_split=False,
-    #                write_summaries=True, side_split=True, do_eval=False, predict_window=63, asgd_decay=None, max_steps=11500,
+    #                write_summaries=True, side_split=True, do_eval=False, horizon_window_size=63, asgd_decay=None, max_steps=11500,
     #                save_from_step=10500)
 
     # print("Training result:", result)

From 0a67384deca6968cbddb6b7cfe7d0a8850b3fb57 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 23 Jul 2018 19:13:41 -0700
Subject: [PATCH 24/42] starting on holidays

---
 input_pipe.py    |  72 ++++++++++----
 make_features.py |  97 ++++++++++++++++--
 model.py         |  10 +-
 trainer.py       | 254 +++++++++++++++++++++++------------------------
 4 files changed, 273 insertions(+), 160 deletions(-)

diff --git a/input_pipe.py b/input_pipe.py
index 351bb3e..a1a7d0e 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -464,6 +464,17 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         print('attn_window',self.attn_window)
 
         
+        def random_draw_new_window_sizes():
+            history = np.random.randint(low=7,high=120+1)
+            horizon = np.random.randint(low=7,high=60+1)        
+            self.history_window_size = history
+            self.horizon_window_size = horizon
+            self.attn_window = history - horizon + 1
+            self.max_train_empty = int(round(history * (1 - self.train_completeness_threshold)))
+            self.max_predict_empty = int(round(horizon * (1 - self.predict_completeness_threshold)))
+    
+    
+        
         # Reserve more processing threads for eval/predict because of larger batches
         num_threads = 3 if mode == ModelMode.TRAIN else 6
 
@@ -472,26 +483,47 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         # Create dataset, transform features and assemble batches
         #features is a list of tensors (one tensor per feature: counts, page_ix, ..., count_variance)
         print('features',features)
-#        features = tf.Print(features,['features',tf.shape(features),features])
-        root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch)
-#        print(root_ds.output_classes, root_ds.output_shapes, root_ds.output_types,)
-        print(root_ds.output_shapes)
-#        batch = (root_ds
-#                 .map(cutter[mode])
-#                 .filter(self.reject_filter)
-#                 .map(self.make_features, num_parallel_calls=num_threads)
-#                 .batch(batch_size)
-#                 .prefetch(runs_in_burst * 2)
-#                 )
-        batch = root_ds.map(cutter[mode]).filter(self.reject_filter).map(self.make_features, num_parallel_calls=num_threads)
-        print('batch MFM', batch)
+    
+    
+#        for _ in range(max(n_epoch,20)):
+##            random_draw_new_window_sizes()
+#            print('max_train_empty',self.max_train_empty)
+#            print('max_predict_empty',self.max_predict_empty)
+#            print('history_window_size',self.history_window_size)
+#            print('horizon_window_size',self.horizon_window_size)
+#            print('attn_window',self.attn_window)            
+#            
+#            root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch)
+#    #        print(root_ds.output_classes, root_ds.output_shapes, root_ds.output_types,)
+#            print('root_ds.output_shapes',root_ds.output_shapes)
+#            print('root_ds.output_types',root_ds.output_types)
+#    #        batch = (root_ds
+#    #                 .map(cutter[mode])
+#    #                 .filter(self.reject_filter)
+#    #                 .map(self.make_features, num_parallel_calls=num_threads)
+#    #                 .batch(batch_size)
+#    #                 .prefetch(runs_in_burst * 2)
+#    #                 )
+#            
+#            #TEST:change horisoron jiostory
+#            batch = root_ds.map(cutter[mode]).filter(self.reject_filter).map(self.make_features, num_parallel_calls=num_threads)
+#            print('batch MFM', batch)
+#            
+#            batch = batch.batch(batch_size)
+#            print('batch B', batch)
+#             
+#            batch = batch.prefetch(runs_in_burst * 2)
+#            print('batch P', batch)
+#            batch = (batch)
         
-        batch = batch.batch(batch_size)
-        print('batch B', batch)
-         
-        batch = batch.prefetch(runs_in_burst * 2)
-        print('batch P', batch)
-        batch = (batch)
+        root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch)
+        batch = (root_ds
+                 .map(cutter[mode])
+                 .filter(self.reject_filter)
+                 .map(self.make_features, num_parallel_calls=num_threads)
+                 .batch(batch_size)
+                 .prefetch(runs_in_burst * 2)
+                 )        
         
         print('---------------- Done batching ----------------')
         print(batch)
@@ -504,6 +536,7 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         #But if not doing lagged then can return None for that ???
         self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
         self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures
+        print(self.true_x)
         """if self.features_set=='simple':
             pass
 #        if self.features_set=='full':
@@ -513,6 +546,7 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
 
         self.encoder_features_depth = self.time_x.shape[2].value
         print('self.encoder_features_depth',self.encoder_features_depth)
+        print('self.time_x.shape',self.time_x.shape)
         
     def load_vars(self, session):
         self.inp.restore(session)
diff --git a/make_features.py b/make_features.py
index 170d183..f8db776 100755
--- a/make_features.py
+++ b/make_features.py
@@ -270,7 +270,10 @@ def normalize(values: np.ndarray):
 
 
-def encode_fixed_date_holidays__daily(dates_series):
+
+
+
+def get_fixed_date_holidays__daily(dates_series, month_day):
     """
     Encode holidays and shoulder days, for holidays that occur yearly on fixed
     dates.
@@ -289,23 +292,25 @@ def encode_fixed_date_holidays__daily(dates_series):
 # =============================================================================
 # MOVING holidays [variable date]
 # =============================================================================
-def encode_thanksgiving__daily(dates_series):
+def get_thanksgivings__daily(dates_series):
     """
-    Encode Thanksgiving holiday and shoulder days.
-    For daily sampled data only.
+    Get Thanksgiving holiday dates within the few years time range
     """
 #    4th Thurs of Novmber...
 #    if (month==11) and (dayofweek=='Thurs') and (22<=dayofmonth<=28)
-    return dates_series
+    thanksgiving_dates = []
+    #...
+    return thanksgiving_dates
 
-def encode_easter__daily(dates_series):
+def get_Easters__daily(dates_series):
     """
-    Encode Easter holiday and shoulder days.
-    For daily sampled data only.
+    Get Easter holiday dates within the few years time range
     """
-    return dates_series    
+    easter_dates = []
+    #...
+    return easter_dates  
+
 
-#Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day
     
 def encode_custom_dates__daily(dates_series,dates_list):
     """
@@ -319,6 +324,78 @@ def encode_custom_dates__daily(dates_series,dates_list):
     return dates_series        
     
 
+def encode_all_holidays__daily(dates_series):
+    """
+    Encode all fixed and moving holidays, and corresponding holiday shoulders.
+    Intended for daily sampled data only.
+    """
+    
+    def spiral_encoding(dates_series, holiday_date, shoulder):
+        """
+        Encode holiday and shoulders as a spiral:
+        Rotation over 2pi, with radius goes from 0 to 1 [on holiday] back to 0
+        """
+        Ndays = len(dates_series)
+        r = np.zeros(Ndays)
+        r[holiday_date] = 1.
+        r[holiday_date-shoulder:holiday_date] = np.linspace(0., 1., shoulder) #!!!!!!!
+        r[holiday_date+1:holiday_date+shoulder+1] = np.linspace(1., 0., shoulder)#!!!!!!!
+        theta = np.zeros(Ndays)
+        theta[holiday_date-shoulder:holiday_date+shoulder+1] = (np.pi/(2.*shoulder + 1))*np.linspace(0., 1., 2*shoulder+1) #!!!!!!!
+        holiday_encoding = np.vstack((r*np.cos(theta), r*np.sin(theta)))
+        return holiday_encoding
+    
+    Ndays = len(dates_series)
+    
+    #Fixed Holidays [add other international ones as needed]:
+    xmas_dates = get_fixed_date_holidays__daily(dates_series, '12-25')
+    new_years_dates = get_fixed_date_holidays__daily(dates_series, '01-01')
+    july4_dates = get_fixed_date_holidays__daily(dates_series, '07-04')
+    halloween_dates = get_fixed_date_holidays__daily(dates_series, '10-31')
+    cincodemayo_dates = get_fixed_date_holidays__daily(dates_series, '05-05')
+    valentines_dates = get_fixed_date_holidays__daily(dates_series, '02-14')
+    veterans_dates = get_fixed_date_holidays__daily(dates_series, '11-11')
+    #taxday_dates = get_fixed_date_holidays__daily(dates_series, '04-15')
+    
+
+    #Rule Based Moving Holidays
+    thanksgiving_dates = get_thanksgivings__daily(dates_series)
+    easter_dates = get_Easters__daily(dates_series)
+    #... Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day
+    #Custom / Single Event moving Holidays
+    suberbowl_dates = ['2014-2-2','2015-2-1','2016-2-7','2017-2-5','2018-2-4','2019-2-3']
+
+    #Dict of holiday dates: shoulder halfwidth  [-S, -S+1, ..., holiday, holiday+1, ..., holiday+S]
+    #for now just use 3 as the shoulder width for all "major" holidays, 0 or 1 for "minor" holidays
+    #Use ODD numbers for shoulder sizes
+    holidays = {xmas_dates:3,
+                new_years_dates:3,
+                july4_dates:1,
+                halloween_dates:1,
+                cincodemayo_dates:1,
+                valentines_dates:1,
+                veterans_dates:1,
+                
+                thanksgiving_dates:3,
+                easter_dates:1,
+                
+                suberbowl_dates:1,
+                }
+    
+    #Assume additive holiday effects: (which should almost never matter anyway 
+    #for small shoulders unless there is overlap beteen some holidays. E.g. with shoulder=3, 
+    #Christmas and New Year's do NOT overlap.)
+    _ = np.zeros((2,Ndays))
+    encoded_holidays = pd.DataFrame(_,index=date_series)
+    #Iterate through each holiday, accumulating the effect:
+    for hd, shoulder in holidays.items():
+        #Since date series is potentially over few years, could have e.g. several Christmas furing that time range
+        for holiday_date in hd:
+            holiday_encoding = spiral_encoding(dates_series, holiday_date, shoulder)
+            xxxxx += holiday_encoding
+    return encoded_holidays
+
+
 
 def run():
     parser = argparse.ArgumentParser(description='Prepare data')
diff --git a/model.py b/model.py
index 81cc5d8..6856988 100755
--- a/model.py
+++ b/model.py
@@ -430,11 +430,13 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         # Compress encoder outputs
         enc_readout = compressed_readout(encoder_output, hparams,
                                          dropout=hparams.encoder_readout_dropout if is_train else 1.0, seed=seed)
+        
         # Calculate fingerprint from input features
-        fingerprint_inp = tf.concat([inp.lagged_x, tf.expand_dims(inp.norm_x, -1)], axis=-1)
-        fingerprint = make_fingerprint(fingerprint_inp, is_train, hparams.fingerprint_fc_dropout, seed)
-        # Calculate attention vector
-        attn_features, attn_weights = attn_readout_v3(enc_readout, inp.attn_window, hparams.attention_heads,
+        if hparams.use_attn:
+            fingerprint_inp = tf.concat([inp.lagged_x, tf.expand_dims(inp.norm_x, -1)], axis=-1)
+            fingerprint = make_fingerprint(fingerprint_inp, is_train, hparams.fingerprint_fc_dropout, seed)
+            # Calculate attention vector
+            attn_features, attn_weights = attn_readout_v3(enc_readout, inp.attn_window, hparams.attention_heads,
                                                       fingerprint, seed=seed)
 
         # Run decoder
diff --git a/trainer.py b/trainer.py
index b2b2749..8ed1e28 100755
--- a/trainer.py
+++ b/trainer.py
@@ -267,133 +267,133 @@ def has_active(self):
         return len(self.active())
 
 
-class ModelTrainer:
-    def __init__(self, train_model, eval_model, model_no=0, summary_writer=None, keep_best=5, patience=None):
-        self.train_model = train_model
-        self.eval_model = eval_model
-        self.stopped = False
-        self.smooth_train_mae = Ema()
-        self.smooth_train_smape = Ema()
-        self.smooth_eval_mae = Ema(0.5)
-        self.smooth_eval_smape = Ema(0.5)
-        self.smooth_grad = Ema(0.9)
-        self.summary_writer = summary_writer
-        self.model_no = model_no
-        self.best_top_n_loss = []
-        self.keep_best = keep_best
-        self.best_step = 0
-        self.patience = patience
-        self.train_pipe = train_model.inp
-        self.eval_pipe = eval_model.inp
-        self.epoch_mae = []
-        self.epoch_smape = []
-        self.last_epoch = -1
-
-    @property
-    def train_ops(self):
-        model = self.train_model
-        return [model.train_op, model.update_ema, model.summaries, model.mae, model.smape, model.glob_norm]
-
-    def process_train_results(self, run_results, offset, global_step, write_summary):
-        offset += 2
-        summaries, mae, smape, glob_norm = run_results[offset:offset + 4]
-        results = self.smooth_train_mae(mae), self.smooth_train_smape(smape), self.smooth_grad(glob_norm)
-        if self.summary_writer and write_summary:
-            self.summary_writer.add_summary(summaries, global_step=global_step)
-        return np.array(results)
-
-    @property
-    def eval_ops(self):
-        model = self.eval_model
-        return [model.mae, model.smape]
-
-    @property
-    def eval_len(self):
-        return len(self.eval_ops)
-
-    @property
-    def train_len(self):
-        return len(self.train_ops)
-
-    @property
-    def best_top_loss(self):
-        return -np.array(self.best_top_n_loss).mean()
-
-    @property
-    def best_epoch_mae(self):
-        return min(self.epoch_mae) if self.epoch_mae else np.NaN
-
-    @property
-    def mean_epoch_mae(self):
-        return np.mean(self.epoch_mae) if self.epoch_mae else np.NaN
-
-    @property
-    def mean_epoch_smape(self):
-        return np.mean(self.epoch_smape) if self.epoch_smape else np.NaN
-
-    @property
-    def best_epoch_smape(self):
-        return min(self.epoch_smape) if self.epoch_smape else np.NaN
-
-    def remember_for_epoch(self, epoch, mae, smape):
-        if epoch > self.last_epoch:
-            self.last_epoch = epoch
-            self.epoch_mae = []
-            self.epoch_smape = []
-        self.epoch_mae.append(mae)
-        self.epoch_smape.append(smape)
-
-    @property
-    def best_epoch_metrics(self):
-        return np.array([self.best_epoch_mae, self.best_epoch_smape])
-
-    @property
-    def mean_epoch_metrics(self):
-        return np.array([self.mean_epoch_mae, self.mean_epoch_smape])
-
-    def process_eval_results(self, run_results, offset, global_step, epoch):
-        totals = np.zeros(self.eval_len, np.float)
-        for result in run_results:
-            items = np.array(result[offset:offset + self.eval_len])
-            totals += items
-        results = totals / len(run_results)
-        mae, smape = results
-        if self.summary_writer and global_step > 200:
-            summary = tf.Summary(value=[
-                tf.Summary.Value(tag=f"test/MAE_{self.model_no}", simple_value=mae),
-                tf.Summary.Value(tag=f"test/SMAPE_{self.model_no}", simple_value=smape),
-            ])
-            self.summary_writer.add_summary(summary, global_step=global_step)
-        smooth_mae = self.smooth_eval_mae(mae)
-        smooth_smape = self.smooth_eval_smape(smape)
-        self.remember_for_epoch(epoch, mae, smape)
-
-        current_loss = -smooth_smape
-
-        prev_best_n = np.mean(self.best_top_n_loss) if self.best_top_n_loss else -np.inf
-        if self.best_top_n_loss:
-            log.debug("Current loss=%.3f, old best=%.3f, wait steps=%d", -current_loss,
-                      -max(self.best_top_n_loss), global_step - self.best_step)
-
-        if len(self.best_top_n_loss) >= self.keep_best:
-            heapq.heappushpop(self.best_top_n_loss, current_loss)
-        else:
-            heapq.heappush(self.best_top_n_loss, current_loss)
-        log.debug("Best loss=%.3f, top_5 avg loss=%.3f, top_5=%s",
-                  -max(self.best_top_n_loss), -np.mean(self.best_top_n_loss),
-                  ",".join(["%.3f" % -mae for mae in self.best_top_n_loss]))
-        new_best_n = np.mean(self.best_top_n_loss)
-
-        new_best = new_best_n > prev_best_n
-        if new_best:
-            self.best_step = global_step
-            log.debug("New best step %d, current loss=%.3f", global_step, -current_loss)
-        else:
-            step_count = global_step - self.best_step
-            if step_count > self.patience:
-                self.stopped = True
-
-        return mae, smape, new_best, smooth_mae, smooth_smape
+#class ModelTrainer:
+#    def __init__(self, train_model, eval_model, model_no=0, summary_writer=None, keep_best=5, patience=None):
+#        self.train_model = train_model
+#        self.eval_model = eval_model
+#        self.stopped = False
+#        self.smooth_train_mae = Ema()
+#        self.smooth_train_smape = Ema()
+#        self.smooth_eval_mae = Ema(0.5)
+#        self.smooth_eval_smape = Ema(0.5)
+#        self.smooth_grad = Ema(0.9)
+#        self.summary_writer = summary_writer
+#        self.model_no = model_no
+#        self.best_top_n_loss = []
+#        self.keep_best = keep_best
+#        self.best_step = 0
+#        self.patience = patience
+#        self.train_pipe = train_model.inp
+#        self.eval_pipe = eval_model.inp
+#        self.epoch_mae = []
+#        self.epoch_smape = []
+#        self.last_epoch = -1
+#
+#    @property
+#    def train_ops(self):
+#        model = self.train_model
+#        return [model.train_op, model.update_ema, model.summaries, model.mae, model.smape, model.glob_norm]
+#
+#    def process_train_results(self, run_results, offset, global_step, write_summary):
+#        offset += 2
+#        summaries, mae, smape, glob_norm = run_results[offset:offset + 4]
+#        results = self.smooth_train_mae(mae), self.smooth_train_smape(smape), self.smooth_grad(glob_norm)
+#        if self.summary_writer and write_summary:
+#            self.summary_writer.add_summary(summaries, global_step=global_step)
+#        return np.array(results)
+#
+#    @property
+#    def eval_ops(self):
+#        model = self.eval_model
+#        return [model.mae, model.smape]
+#
+#    @property
+#    def eval_len(self):
+#        return len(self.eval_ops)
+#
+#    @property
+#    def train_len(self):
+#        return len(self.train_ops)
+#
+#    @property
+#    def best_top_loss(self):
+#        return -np.array(self.best_top_n_loss).mean()
+#
+#    @property
+#    def best_epoch_mae(self):
+#        return min(self.epoch_mae) if self.epoch_mae else np.NaN
+#
+#    @property
+#    def mean_epoch_mae(self):
+#        return np.mean(self.epoch_mae) if self.epoch_mae else np.NaN
+#
+#    @property
+#    def mean_epoch_smape(self):
+#        return np.mean(self.epoch_smape) if self.epoch_smape else np.NaN
+#
+#    @property
+#    def best_epoch_smape(self):
+#        return min(self.epoch_smape) if self.epoch_smape else np.NaN
+#
+#    def remember_for_epoch(self, epoch, mae, smape):
+#        if epoch > self.last_epoch:
+#            self.last_epoch = epoch
+#            self.epoch_mae = []
+#            self.epoch_smape = []
+#        self.epoch_mae.append(mae)
+#        self.epoch_smape.append(smape)
+#
+#    @property
+#    def best_epoch_metrics(self):
+#        return np.array([self.best_epoch_mae, self.best_epoch_smape])
+#
+#    @property
+#    def mean_epoch_metrics(self):
+#        return np.array([self.mean_epoch_mae, self.mean_epoch_smape])
+#
+#    def process_eval_results(self, run_results, offset, global_step, epoch):
+#        totals = np.zeros(self.eval_len, np.float)
+#        for result in run_results:
+#            items = np.array(result[offset:offset + self.eval_len])
+#            totals += items
+#        results = totals / len(run_results)
+#        mae, smape = results
+#        if self.summary_writer and global_step > 200:
+#            summary = tf.Summary(value=[
+#                tf.Summary.Value(tag=f"test/MAE_{self.model_no}", simple_value=mae),
+#                tf.Summary.Value(tag=f"test/SMAPE_{self.model_no}", simple_value=smape),
+#            ])
+#            self.summary_writer.add_summary(summary, global_step=global_step)
+#        smooth_mae = self.smooth_eval_mae(mae)
+#        smooth_smape = self.smooth_eval_smape(smape)
+#        self.remember_for_epoch(epoch, mae, smape)
+#
+#        current_loss = -smooth_smape
+#
+#        prev_best_n = np.mean(self.best_top_n_loss) if self.best_top_n_loss else -np.inf
+#        if self.best_top_n_loss:
+#            log.debug("Current loss=%.3f, old best=%.3f, wait steps=%d", -current_loss,
+#                      -max(self.best_top_n_loss), global_step - self.best_step)
+#
+#        if len(self.best_top_n_loss) >= self.keep_best:
+#            heapq.heappushpop(self.best_top_n_loss, current_loss)
+#        else:
+#            heapq.heappush(self.best_top_n_loss, current_loss)
+#        log.debug("Best loss=%.3f, top_5 avg loss=%.3f, top_5=%s",
+#                  -max(self.best_top_n_loss), -np.mean(self.best_top_n_loss),
+#                  ",".join(["%.3f" % -mae for mae in self.best_top_n_loss]))
+#        new_best_n = np.mean(self.best_top_n_loss)
+#
+#        new_best = new_best_n > prev_best_n
+#        if new_best:
+#            self.best_step = global_step
+#            log.debug("New best step %d, current loss=%.3f", global_step, -current_loss)
+#        else:
+#            step_count = global_step - self.best_step
+#            if step_count > self.patience:
+#                self.stopped = True
+#
+#        return mae, smape, new_best, smooth_mae, smooth_smape
 
 
 def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01,

From 984a84815f300444e17a6c685935c1cb3b8a27fd Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Tue, 24 Jul 2018 16:46:18 -0700
Subject: [PATCH 25/42] finished basic holiday encoding except thxgiving,
 easter

---
 Readme.md           |  12 ++--
 holiday_features.py | 163 ++++++++++++++++++++++++++++++++++++++++++++
 input_pipe.py       |  20 +++---
 make_features.py    | 148 ++++++----------------------------------
 4 files changed, 200 insertions(+), 143 deletions(-)
 create mode 100644 holiday_features.py

diff --git a/Readme.md b/Readme.md
index ca66990..412d7d0 100755
--- a/Readme.md
+++ b/Readme.md
@@ -62,9 +62,9 @@ ll data/
 python3 make_features.py data/vars ours daily full --add_days=50
 #python3 make_features.py data/vars kaggle daily full --add_days=63
 
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=5 --horizon_window_size=50 --history_window_size=100 --max_epoch=10
-
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=100 --patience=5 --max_epoch=10
 
+--horizon_window_size=50 --history_window_size=100
 
 
@@ -103,14 +103,14 @@ python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_de
 
 
 To do:
-0. print out the SMAPE for the actual data [current is doing SMAPE of the unrounded log1p(data) which will likely be much smaller than for real]
 0. SMAPEs on ground truth 2018
 1. why encoder_state NANs in it for small train window lengths [is it train/predict window completeness thresholds?]
 1. performance heatmaps
 
 2. for weekly. monthly inputs, need to change few places in tensorflow code
 3. Prediction intervals
-4. Architecture improvements: his is not the usual encoder-decoder:   add C context vector to every decoder step
-4. bi, di, MH
-5. custom attention
+4. Architecture improvements: bi enc, dil
+4. K step recursive as hybrid of 1step recursive and K step direct
+4. MLP direct multihorizon
+5. custom attention [e.g. position specific]
 6. VAE aug
\ No newline at end of file
diff --git a/holiday_features.py b/holiday_features.py
new file mode 100644
index 0000000..7d9723d
--- /dev/null
+++ b/holiday_features.py
@@ -0,0 +1,163 @@
+#Define  few functions to create holiday features from the time series
+#For now, these are only intended to ork with DAILY sampled data
+
+import pandas as pd
+import numpy as np
+
+
+
+
+def encode_all_holidays__daily(dates_range):
+    """
+    Encode all fixed and moving holidays, and corresponding holiday shoulders.
+    Intended for daily sampled data only.
+    """
+    
+
+    def get_fixed_date_holidays__daily(dates_range, month_day):
+        """
+        Get YYYY-mm-DD holidays,
+        for holidays that occur yearly on fixed dates.
+
+        For daily sampled data only.
+        
+        In USA:
+        Christmas, New Year, 4th of July, Halloween, Cinco de Mayo
+        Valentine's Day, Veteran's Day
+        
+        other international:
+            ...
+        """
+#        return ['{}-{:02d}-{:02d}'.format(i.year,i.month,i.day) for i in dates_range if ((i.month==int(month_day[:2])) and (i.day==int(month_day[4:])))]
+#        print([(i.month, i.day) for i in dates_range])
+#        print([i for i in dates_range if ((i.month==int(month_day[:2])) and (i.day==int(month_day[4:])))])
+        return [i.strftime('%Y-%m-%d') for i in dates_range if ((i.month==int(month_day[:2])) and (i.day==int(month_day[3:])))]
+
+    # =============================================================================
+    # MOVING holidays [variable date]
+    # =============================================================================
+    def get_thanksgivings__daily(dates_range):
+        """
+        Get Thanksgiving holiday dates within the few years time range
+        """
+    #    4th Thurs of Novmber...
+    #    if (month==11) and (dayofweek=='Thurs') and (22<=dayofmonth<=28)
+        thanksgiving_dates = []
+        #...
+        return thanksgiving_dates
+    
+    def get_Easters__daily(dates_range):
+        """
+        Get Easter holiday dates within the few years time range
+        """
+        easter_dates = []
+        #...
+        return easter_dates  
+        
+#    def encode_custom_dates__daily(dates_range,dates_list):
+#        """
+#        Encode custom days and optionally shoulder days.
+#        For daily sampled data only.
+#        
+#        E.g. Superbowl Sunday
+#        suberbowl_dates = ['2014-02-02','2015-02-01','2016-02-07','2017-02-05','2018-02-04','2019-02-03']
+#        shoulders = [...]
+#        """
+#        return dates_range 
+    
+    def spiral_encoding(dates_range, holiday_date, shoulder):
+        """
+        Encode holiday and shoulders as a spiral:
+        Rotation over 2pi, with radius goes from 0 to 1 [on holiday] back to 0
+        """
+        N_real_days = len(dates_range)
+        real_min = min(dates_range)
+        real_max = max(dates_range)
+        dates_range_padded = pd.date_range(real_min-shoulder-2, real_max+shoulder+2, freq='D')
+#        print(dates_range)
+#        print(dates_range_padded)
+        
+        df = pd.DataFrame()
+        df['date'] = dates_range_padded.values 
+        Ndays = len(df)
+        
+#        print(holiday_date)
+        _ = df.loc[df['date']==holiday_date]
+        if len(_)>0:
+            ind = _.index.values[0]
+        #If this holiday is completely out of bounds of the time series input,
+        #ignore it [assumed additive holiday effects, so just add 0's]
+        else:
+            return np.zeros((N_real_days,2))
+        
+        #For radius: triangle kernel centered on holiday
+        r = np.zeros(Ndays)
+        r[ind-shoulder-1:ind+1] = np.linspace(0.,1.,shoulder+2)
+        r[ind:ind+shoulder+2] = np.linspace(1.,0.,shoulder+2)
+
+        #For anlge: go from phase [0,pi], with holiday at pi/2
+        theta = np.zeros(Ndays)
+        theta[ind-shoulder-1:ind+shoulder+2] = np.linspace(0., np.pi, 2*shoulder+3)
+        #Convert to Cartesian:
+        df['r'] = r 
+        df['theta'] = theta
+        df['x'] = df['r']*np.cos(df['theta'])
+        df['y'] = df['r']*np.sin(df['theta'])
+        v = df[((df['date']>=real_min) & (df['date']<=real_max))]
+        v = v[['x','y']].values
+#        print(v, v.sum(axis=0), v.sum(axis=1))
+        return v
+    
+    
+    
+    Ndays = len(dates_range)
+    
+    #Fixed Holidays [add other international ones as needed]:
+    xmas_dates = get_fixed_date_holidays__daily(dates_range, '12-25')
+    new_years_dates = get_fixed_date_holidays__daily(dates_range, '01-01')
+    july4_dates = get_fixed_date_holidays__daily(dates_range, '07-04')
+    halloween_dates = get_fixed_date_holidays__daily(dates_range, '10-31')
+    cincodemayo_dates = get_fixed_date_holidays__daily(dates_range, '05-05')
+    valentines_dates = get_fixed_date_holidays__daily(dates_range, '02-14')
+    veterans_dates = get_fixed_date_holidays__daily(dates_range, '11-11')
+    #taxday_dates = get_fixed_date_holidays__daily(dates_range, '04-15')
+    
+
+    #Rule Based Moving Holidays
+    thanksgiving_dates = get_thanksgivings__daily(dates_range)
+    easter_dates = get_Easters__daily(dates_range)
+    #... Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day
+    #Custom / Single Event moving Holidays
+    suberbowl_dates = ['2014-02-02','2015-02-01','2016-02-07','2017-02-05','2018-02-04','2019-02-03']
+    
+    #Dict of holiday dates: shoulder halfwidth  [-S, -S+1, ..., holiday, holiday+1, ..., holiday+S]
+    #for now just use 3 as the shoulder width for all "major" holidays, 0 or 1 for "minor" holidays
+    #Use ODD numbers for shoulder sizes
+    holidays = {'xmas_dates':(xmas_dates,3),
+                'new_years_dates':(new_years_dates,3),
+                'july4_dates':(july4_dates,1),
+                'halloween_dates':(halloween_dates,1),
+                'cincodemayo_dates':(cincodemayo_dates,1),
+                'valentines_dates':(valentines_dates,1),
+                'veterans_dates':(veterans_dates,1),
+                'thanksgiving_dates':(thanksgiving_dates,3),
+                'easter_dates':(easter_dates,1),
+                'suberbowl_dates':(suberbowl_dates,1),
+                }
+#    print(holidays)
+    
+    
+    #Assume additive holiday effects: (which should almost never matter anyway 
+    #for small shoulders unless there is overlap beteen some holidays. E.g. with shoulder=3, 
+    #Christmas and New Year's do NOT overlap.)
+#    encoded_holidays = pd.DataFrame()
+#    encoded_holidays['date'] = dates_range.values
+    _ = np.zeros((Ndays,2))
+    #Iterate through each holiday, accumulating the effect:
+    for mmm in holidays.values():
+        shoulder = mmm[1]
+        #Since date series is potentially over few years, could have e.g. several Christmas furing that time range
+        for hd in mmm[0]:
+            _ += spiral_encoding(dates_range, hd, shoulder)
+#    print(_)
+    return _
\ No newline at end of file
diff --git a/input_pipe.py b/input_pipe.py
index a1a7d0e..e8d2787 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -131,7 +131,7 @@ def cut(self, counts, start, end):
         :param counts: counts timeseries
         :param start: start index
         :param end: end index
-        :return: tuple (train_counts, test_counts, lagged_counts, [dow,woy,moy,year])
+        :return: tuple (train_counts, test_counts, lagged_counts, [subset of: dow,woy,moy,doy,year,holidays])
         """
         # Pad counts to ensure we have enough array length for prediction
         counts = tf.concat([counts, tf.fill([self.horizon_window_size], np.NaN)], axis=0)
@@ -146,6 +146,8 @@ def cut(self, counts, start, end):
         if self.sampling_period=='daily':
             cropped_dow = self.inp.dow[start:end]
             cropped_woy = self.inp.woy[start:end]
+            cropped_doy = self.inp.doy[start:end]
+            cropped_holidays = self.inp.holidays[start:end]
 #            cropped_moy = 0*cropped_dow #Month information is alreayd contained in week information. COuld incude anyway to be explicit, but for now do not use as a feature
         elif self.sampling_period=='weekly':
             cropped_woy = self.inp.woy[start:end]
@@ -195,7 +197,7 @@ def cut(self, counts, start, end):
 
         if self.features_set=='arturius' or self.features_set=='full':#for now, for full just do sam [include lagged]
             if self.sampling_period=='daily':
-                return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy, cropped_year
+                return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy, cropped_doy, cropped_year, cropped_holidays
             if self.sampling_period=='weekly':
                 return x_counts, y_counts, lagged_count, cropped_woy, cropped_year
             if self.sampling_period=='monthly':
@@ -276,7 +278,7 @@ def make_features(self, *args):
         print(args)
         if self.features_set == 'arturius':
             if self.sampling_period == 'daily':
-                x_counts, y_counts, lagged_counts, dow, woy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args
+                x_counts, y_counts, lagged_counts, dow, woy, doy, year, holidays, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args
             elif self.sampling_period == 'weekly':
                 x_counts, y_counts, lagged_counts, woy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args        
             elif self.sampling_period == 'monthly':
@@ -285,7 +287,7 @@ def make_features(self, *args):
 #        count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance)
         elif self.features_set == 'full':
             if self.sampling_period == 'daily':
-                x_counts, y_counts, lagged_counts, dow, woy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\
+                x_counts, y_counts, lagged_counts, dow, woy, doy, year, holidays, page_ix, count_median, year_autocorr, quarter_autocorr,\
                 count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args
             elif self.sampling_period == 'weekly':
                 x_counts, y_counts, lagged_counts, woy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\
@@ -299,7 +301,9 @@ def make_features(self, *args):
         # =============================================================================
         if self.sampling_period == 'daily':
             x_dow, y_dow = tf.split(dow, [self.history_window_size, self.horizon_window_size], axis=0)
-            x_woy, y_woy = tf.split(woy, [self.history_window_size, self.horizon_window_size], axis=0) #need to see how to fit in woy into inputs to this func
+            x_woy, y_woy = tf.split(woy, [self.history_window_size, self.horizon_window_size], axis=0)
+            x_doy, y_doy = tf.split(doy, [self.history_window_size, self.horizon_window_size], axis=0)
+            x_holidays, y_holidays = tf.split(holidays, [self.history_window_size, self.horizon_window_size], axis=0)
         elif self.sampling_period == 'weekly':
             x_woy, y_woy = tf.split(woy, [self.history_window_size, self.horizon_window_size], axis=0)
         elif self.sampling_period == 'monthly':
@@ -355,7 +359,7 @@ def make_features(self, *args):
         # Train features, depending on measurement frequency
         x_features = tf.expand_dims(norm_x_counts, -1) # [n_timesteps] -> [n_timesteps, 1]
         if self.sampling_period == 'daily':
-            x_features = tf.concat([x_features, x_dow, x_woy, x_year], axis=1)
+            x_features = tf.concat([x_features, x_dow, x_woy, tf.expand_dims(x_doy,-1), x_year, x_holidays], axis=1)
         elif self.sampling_period == 'weekly':
             x_features = tf.concat([x_features, x_woy, x_year], axis=1)            
         elif self.sampling_period == 'monthly':
@@ -368,7 +372,7 @@ def make_features(self, *args):
 
         # Test features
         if self.sampling_period == 'daily':
-            y_features = tf.concat([y_dow, y_woy, y_year], axis=1)
+            y_features = tf.concat([y_dow, y_woy, tf.expand_dims(y_doy,-1), y_year, y_holidays], axis=1)
         elif self.sampling_period == 'weekly':
             y_features = tf.concat([y_woy, y_year], axis=1)
         elif self.sampling_period == 'monthly':
@@ -561,7 +565,7 @@ def page_features(inp: VarFeeder, features_set):
     So do not need to pass in here the time-varying ones like day of week, 
     month of year, lagged, etc.
     
-    DO NOT return dow, woy, moy, year
+    DO NOT return dow, woy, moy, year, doy, holidays
     """
     
     if features_set=='arturius':
diff --git a/make_features.py b/make_features.py
index f8db776..5940cb0 100755
--- a/make_features.py
+++ b/make_features.py
@@ -9,6 +9,8 @@
 import numba
 from typing import Tuple, Dict, Collection, List
 
+from holiday_features import encode_all_holidays__daily
+
 
 def read_cached(name) -> pd.DataFrame:
     """
@@ -270,133 +272,6 @@ def normalize(values: np.ndarray):
 
 
-
-
-
-def get_fixed_date_holidays__daily(dates_series, month_day):
-    """
-    Encode holidays and shoulder days, for holidays that occur yearly on fixed
-    dates.
-    For daily sampled data only.
-    
-    In USA:
-    Christmas, New Year, 4th of July, Halloween, Cinco de Mayo
-    Valentine's Day, Veteran's Day
-    
-    other international:
-        ...
-    """
-    return dates_series
-
-
-# =============================================================================
-# MOVING holidays [variable date]
-# =============================================================================
-def get_thanksgivings__daily(dates_series):
-    """
-    Get Thanksgiving holiday dates within the few years time range
-    """
-#    4th Thurs of Novmber...
-#    if (month==11) and (dayofweek=='Thurs') and (22<=dayofmonth<=28)
-    thanksgiving_dates = []
-    #...
-    return thanksgiving_dates
-
-def get_Easters__daily(dates_series):
-    """
-    Get Easter holiday dates within the few years time range
-    """
-    easter_dates = []
-    #...
-    return easter_dates  
-
-
-    
-def encode_custom_dates__daily(dates_series,dates_list):
-    """
-    Encode custom days and optionally shoulder days.
-    For daily sampled data only.
-    
-    E.g. Superbowl Sunday
-    suberbowl_dates = ['2014-2-2','2015-2-1','2016-2-7','2017-2-5','2018-2-4','2019-2-3']
-    shoulders = [...]
-    """
-    return dates_series        
-    
-
-def encode_all_holidays__daily(dates_series):
-    """
-    Encode all fixed and moving holidays, and corresponding holiday shoulders.
-    Intended for daily sampled data only.
-    """
-    
-    def spiral_encoding(dates_series, holiday_date, shoulder):
-        """
-        Encode holiday and shoulders as a spiral:
-        Rotation over 2pi, with radius goes from 0 to 1 [on holiday] back to 0
-        """
-        Ndays = len(dates_series)
-        r = np.zeros(Ndays)
-        r[holiday_date] = 1.
-        r[holiday_date-shoulder:holiday_date] = np.linspace(0., 1., shoulder) #!!!!!!!
-        r[holiday_date+1:holiday_date+shoulder+1] = np.linspace(1., 0., shoulder)#!!!!!!!
-        theta = np.zeros(Ndays)
-        theta[holiday_date-shoulder:holiday_date+shoulder+1] = (np.pi/(2.*shoulder + 1))*np.linspace(0., 1., 2*shoulder+1) #!!!!!!!
-        holiday_encoding = np.vstack((r*np.cos(theta), r*np.sin(theta)))
-        return holiday_encoding
-    
-    Ndays = len(dates_series)
-    
-    #Fixed Holidays [add other international ones as needed]:
-    xmas_dates = get_fixed_date_holidays__daily(dates_series, '12-25')
-    new_years_dates = get_fixed_date_holidays__daily(dates_series, '01-01')
-    july4_dates = get_fixed_date_holidays__daily(dates_series, '07-04')
-    halloween_dates = get_fixed_date_holidays__daily(dates_series, '10-31')
-    cincodemayo_dates = get_fixed_date_holidays__daily(dates_series, '05-05')
-    valentines_dates = get_fixed_date_holidays__daily(dates_series, '02-14')
-    veterans_dates = get_fixed_date_holidays__daily(dates_series, '11-11')
-    #taxday_dates = get_fixed_date_holidays__daily(dates_series, '04-15')
-    
-
-    #Rule Based Moving Holidays
-    thanksgiving_dates = get_thanksgivings__daily(dates_series)
-    easter_dates = get_Easters__daily(dates_series)
-    #... Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day
-    #Custom / Single Event moving Holidays
-    suberbowl_dates = ['2014-2-2','2015-2-1','2016-2-7','2017-2-5','2018-2-4','2019-2-3']
-
-    #Dict of holiday dates: shoulder halfwidth  [-S, -S+1, ..., holiday, holiday+1, ..., holiday+S]
-    #for now just use 3 as the shoulder width for all "major" holidays, 0 or 1 for "minor" holidays
-    #Use ODD numbers for shoulder sizes
-    holidays = {xmas_dates:3,
-                new_years_dates:3,
-                july4_dates:1,
-                halloween_dates:1,
-                cincodemayo_dates:1,
-                valentines_dates:1,
-                veterans_dates:1,
-                
-                thanksgiving_dates:3,
-                easter_dates:1,
-                
-                suberbowl_dates:1,
-                }
-    
-    #Assume additive holiday effects: (which should almost never matter anyway 
-    #for small shoulders unless there is overlap beteen some holidays. E.g. with shoulder=3, 
-    #Christmas and New Year's do NOT overlap.)
-    _ = np.zeros((2,Ndays))
-    encoded_holidays = pd.DataFrame(_,index=date_series)
-    #Iterate through each holiday, accumulating the effect:
-    for hd, shoulder in holidays.items():
-        #Since date series is potentially over few years, could have e.g. several Christmas furing that time range
-        for holiday_date in hd:
-            holiday_encoding = spiral_encoding(dates_series, holiday_date, shoulder)
-            xxxxx += holiday_encoding
-    return encoded_holidays
-
-
-
 def run():
     parser = argparse.ArgumentParser(description='Prepare data')
     parser.add_argument('data_dir')
@@ -500,6 +375,13 @@ def run():
         year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???]  ---> actually in pandas numbering goes to 53, depending on start day of week for that year
         woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday
         woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1)
+        #Also day of year number. Do not do same circle encoding, just let it be usual ordinal.
+        #Also, careful w leapyear. After February, year's w it would be out of phase vs. years w/o leap year
+        #Instead, could leave a gap for leapyear. If that particular year has it, fill it in with that ordinal,
+        #otherwise the model just does not have that index.
+        doy = features_times.dayofyear.values
+        #If not doing the circle encoding, then normalize:
+        doy = normalize(doy)
     
     
     if args.sampling_period=='weekly':
@@ -525,6 +407,14 @@ def run():
     year = (features_times.year - REFERENCE_FIRST_YEAR)/float(REFERENCE_LAST_YEAR-REFERENCE_FIRST_YEAR)
     
     
+    #Holidays: try my "spiral encoding":
+    #Right now only doing for daily sampled data:
+    if args.sampling_period=='daily':
+        holidays = encode_all_holidays__daily(features_times)
+    
+    
+    
+    
     # Assemble indices for quarterly lagged data
     lagged_ix = np.stack(lag_indexes(data_start, features_end), axis=-1)
 
@@ -549,8 +439,6 @@ def run():
             year_autocorr=year_autocorr,
             quarter_autocorr=quarter_autocorr,
             #dow=dow,#N x 2 array since encoded week periodicity as complex number
-            
-            #woy=woy,#!!!!!!!!
             count_pctl_100=percentiles[5],#max #!!!!!!!!!!!!!!!! just to see what happens: apend one of my features.
         )
     
@@ -595,6 +483,8 @@ def run():
     if args.sampling_period=='daily':
         tensors['dow']=dow
         tensors['woy']=woy #and want want week number too, aggregating last ~10 days into week 52
+        tensors['doy']=doy
+        tensors['holidays']=holidays
     elif args.sampling_period=='weekly':
         tensors['woy']=woy
     elif args.sampling_period=='monthly':

From 1a23d99056104a01307cb9b313cf4eec6956af5a Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Wed, 25 Jul 2018 00:34:41 -0700
Subject: [PATCH 26/42] finished K-step lookback - moderate SMAPE improvement

---
 Readme.md  | 17 ++++++++++++++++-
 hparams.py |  6 ++++++
 model.py   | 53 ++++++++++++++++++++++++++++++++---------------------
 trainer.py |  2 +-
 4 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/Readme.md b/Readme.md
index 412d7d0..56ea667 100755
--- a/Readme.md
+++ b/Readme.md
@@ -41,7 +41,17 @@ See also [detailed model description](how_it_works.md)
 
 -----------------------------------
 
-GK modifications for own data:
+GK modifications for own forecasting application:
+
+1) Several architecture improvements:
+	- give encoded representation vector as context to every decoder timestep
+	- K step lookback: ideally the RNN would learn a hidden state representation that ~completely describes state of the system. In realiy, that is too much to expect. In addition to previous timestep prediction y_i-1, also feed in y_i-2,...,y_i-K for K-step lookback.
+	- performance analysis of validation set SMAPE as function of history/horizon window sizes [randomized uniformly in training over all min-max range of history/horizon window sizes]
+	- more in development
+2) More features, relevant to my data. More focus on seasonalities, and "spiral encoding" for holidays. Automated data augmentation.
+3) Dealing with holes/sparsity as in my data.
+
+
 1. PREPROCESS.py - Maximize reuse of existing architecture: just put my data in exact same format as Kaggle competition csv's
 2. $source activate gktf.  #previously set up a conda environment w/ Python 3.6, tensorflow 1.4.0, to match same versions as Kaggle solution
 3. $cd ..../kaggle-web-traffic
@@ -102,6 +112,11 @@ python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_de
 
 
+#For doing performance analysis of SMAPE as function of history/horizon window sizes:
+./RUN_MANY_TRAIN_VAL_WINDOWS
+
+
+----------------------------------------------------------------------------------------------------------------------------------------------------------
 To do:
 0. SMAPEs on ground truth 2018
 1. why encoder_state NANs in it for small train window lengths [is it train/predict window completeness thresholds?]
diff --git a/hparams.py b/hparams.py
index 4b28e7e..a899502 100755
--- a/hparams.py
+++ b/hparams.py
@@ -54,6 +54,12 @@
     #we can more fairly assess performance over range of history/horizon windows:
     history_window_size_minmax=[7,365],
     horizon_window_size_minmax=[7,60],
+    
+    #Lookback K steps: [without specifying, default previous Kaggle setting is K=1]:
+    #for predicting y_i, insteda of just feeding in previous K=1 prediction (y_i-1),
+    #feed in all previous K predictions: y_
+    LOOKBACK_K = 3, #!!!!Can NOT set this to be bigger than min history size (history_window_size_minmax[0])
+    #since then depending on random draw would possibly need to look back further than history size.
 )
 
 
diff --git a/model.py b/model.py
index 6856988..1aaab86 100755
--- a/model.py
+++ b/model.py
@@ -413,6 +413,8 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         self.hparams = hparams
         self.seed = seed
         self.inp = inp
+        self.lookback_K_actual = min(hparams.LOOKBACK_K, hparams.history_window_size_minmax[0])
+        print('self.lookback_K_actual',self.lookback_K_actual)
 
         encoder_output, h_state, c_state = make_encoder(inp.time_x, inp.encoder_features_depth, is_train, hparams, seed,
                                                         transpose_output=False)
@@ -441,12 +443,10 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
 
         # Run decoder
         #... = decoder(encoder_state, attn_features, prediction_inputs, previous_y)
-        print('inp.norm_x[:, -1]',inp.norm_x[:, -1])
-        print('inp.time_y',inp.time_y)
         decoder_targets, decoder_outputs = self.decoder(encoder_state,
                                                         attn_features if hparams.use_attn else None,
                                                         summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
-                                                        inp.time_y, inp.norm_x[:, -1]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
+                                                        inp.time_y, inp.norm_x[:, -self.lookback_K_actual:]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
         # Decoder activation losses
         dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.horizon_window_size)
         dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.horizon_window_size)
@@ -493,7 +493,7 @@ def decoder(self, encoder_state, attn_features, summary_z, prediction_inputs, pr
         """
         :param encoder_state: shape [batch_size, encoder_rnn_depth]
         :param prediction_inputs: features for prediction days, tensor[batch_size, time, input_depth]
-        :param previous_y: Last day pageviews, shape [batch_size]
+        :param previous_y: Last day pageviews, shape [batch_size, self.lookback_K_actual] 
         :param attn_features: Additional features from attention layer, shape [batch, horizon_window_size, readout_depth*n_heads]
         :return: decoder rnn output
         """
@@ -511,7 +511,7 @@ def build_cell(idx):
                     attn_depth = attn_features.shape[-1].value if attn_features is not None else 0
                     context_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT is not None else 0 #Should just be the encoder RNN depth
                     print('attn_depth',attn_depth, 'context_depth',context_depth)
-                    input_size = attn_depth + context_depth + prediction_inputs.shape[-1].value + 1 if idx == 0 else self.hparams.rnn_depth
+                    input_size = attn_depth + context_depth + prediction_inputs.shape[-1].value + self.lookback_K_actual if idx == 0 else self.hparams.rnn_depth
                     input_size = tf.Print(input_size, ['attn_depth',tf.shape(attn_depth),attn_depth, 'context_depth',tf.shape(context_depth),context_depth, 'input_size',tf.shape(input_size),input_size])#!!!!!!!!!!
                     cell = rnn.DropoutWrapper(cell, dtype=tf.float32, input_size=input_size,
                                               variational_recurrent=hparams.decoder_variational_dropout[idx],
@@ -552,7 +552,9 @@ def build_cell(idx):
 
         # Stop condition for decoding loop
         def cond_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray):
-            return timestep < predict_timesteps
+            return timestep < predict_timesteps #If doing k2-step lookahead prediction for k2>1, possibly want to 
+            #adjust condition to do appropriate n steps > predict_timesteps... and then combine predictions for those steps to get single prediction, 
+            #e.g. by exponential weighting  backward in time from this step.
 
         # FC projecting layer to get single predicted value from RNN output
         def project_output(tensor):
@@ -563,7 +565,7 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
             """
             Main decoder loop
             :param timestep: timestep number
-            :param prev_output: Output(prediction) from previous step
+            :param prev_output: Output(prediction) from previous step --> from previous K steps: self.lookback_K_actual 
             :param prev_state: RNN state tensor from previous step
             :param array_targets: Predictions, each step will append new value to this array
             :param array_outputs: Raw RNN outputs (for regularization losses)
@@ -571,6 +573,8 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
             """
             # RNN inputs for current step
             features = inputs_by_time[timestep]
+#            print('features',features)
+#            print('previous_y',previous_y)
 
             # [batch, horizon_window_size, readout_depth * n_heads] -> [batch, readout_depth * n_heads]
             if attn_features is not None:
@@ -578,19 +582,13 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
                 attn = attn_features[:, timestep, :]
                 # Append previous predicted value + attention vector to input features
                 next_input = tf.concat([prev_output, features, attn], axis=1)
+               
             else:
-                next_input = tf.concat([prev_output, features], axis=1)
                 # Append previous predicted value to input features
-
+                next_input = tf.concat([prev_output, features], axis=1)
             #If using more of a typical encoder-decoder, also have encoder context each time:
             if self.hparams.RECURSIVE_W_ENCODER_CONTEXT:
                 next_input = tf.concat([next_input, summary_z], axis=1) #!!!!!!!!summary_z[-1]
-#                if self.hparams.encoder_rnn_layers == 1:
-#                    next_input = tf.concat([next_input, summary_z], axis=1) #!!!!!!!!summary_z[-1]
-#                elif self.hparams.encoder_rnn_layers > 1:
-#                    next_input = tf.concat([next_input, summary_z[-1]], axis=1) #!!!!!!!!summary_z[-1]
-#                next_input = tf.Print(next_input,['next_input',tf.shape(next_input),next_input])
-               
                     
             # Run RNN cell
             output, state = cell(next_input, prev_state)
@@ -602,15 +600,28 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
             if return_raw_outputs:
                 array_outputs = array_outputs.write(timestep, output)
             array_targets = array_targets.write(timestep, projected_output)
+            
+            #Update prev_output
+            #(delete oldest left, append rightmost)
+            if self.lookback_K_actual > 1:
+                prev_output = prev_output[:,1:] #All examples in batch, exclude oldest output [leftmost oldest, rightmost most recent]
+#                print('prev_output',prev_output)
+#                print('projected_output',projected_output)
+                updated_outputs = tf.concat([prev_output,projected_output],axis=1)
+#                print('updated_outputs',updated_outputs)
+            elif self.lookback_K_actual==1:
+                updated_outputs = prev_output
+                
             # Increment timestep and return
-            return timestep + 1, projected_output, state, array_targets, array_outputs #!!!!!! quantiles: projected_output will be diff dims
+            return timestep + 1, updated_outputs, state, array_targets, array_outputs #!!!!!! quantiles: projected_output will be diff dims
 
         # Initial values for loop
-        loop_init = [tf.constant(0, dtype=tf.int32),
-                     tf.expand_dims(previous_y, -1),
-                     encoder_state,
-                     tf.TensorArray(dtype=tf.float32, size=predict_timesteps),
-                     tf.TensorArray(dtype=tf.float32, size=predict_timesteps) if return_raw_outputs else tf.constant(0)] #!!!!!!! size= ... x N_pctls
+        loop_init = [tf.constant(0, dtype=tf.int32), #timestep
+#                     previous_y if self.lookback_K_actual  > 1 else tf.expand_dims(previous_y, -1), #prev_output
+                    previous_y, #prev_output
+                     encoder_state, #prev_state
+                     tf.TensorArray(dtype=tf.float32, size=predict_timesteps), #array_targets
+                     tf.TensorArray(dtype=tf.float32, size=predict_timesteps) if return_raw_outputs else tf.constant(0)] #array_outputs #!!!!!!! size= ... x N_pctls
         # Run the loop
         _timestep, _projected_output, _state, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init)
         
diff --git a/trainer.py b/trainer.py
index 8ed1e28..108ebc1 100755
--- a/trainer.py
+++ b/trainer.py
@@ -624,7 +624,7 @@ def ema_vars(model):
             for _ in tqr:
                 #!!!!!!!!!! Variable random length train predict windows
                 #Random draw the train, predict window lengths
-                print(_)
+#                print(_)
                 trainer = random_draw_history_and_horizon_window_sizes(trainer)
 #                print('+++++++++++++++', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers])
 #                print('--------', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers])

From 046ba10c64e07b64efa9bfb5460e32320af899da Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 26 Jul 2018 10:53:56 -0700
Subject: [PATCH 27/42] starting on SMAPE heatmaps

---
 PERFORMANCE_HEATMAPS.py       | 32 +++++++++++++++++++++
 PREPROCESS.py                 |  8 ++++--
 RUN_MANY_TRAIN_VAL_WINDOWS.sh | 53 ++++++++++++++++++++++++++++-------
 Readme.md                     |  4 +--
 holiday_features.py           | 38 +++++++++++++------------
 model.py                      |  2 +-
 trainer.py                    | 32 +++++++++++++++++++--
 7 files changed, 133 insertions(+), 36 deletions(-)
 create mode 100644 PERFORMANCE_HEATMAPS.py

diff --git a/PERFORMANCE_HEATMAPS.py b/PERFORMANCE_HEATMAPS.py
new file mode 100644
index 0000000..9da96a9
--- /dev/null
+++ b/PERFORMANCE_HEATMAPS.py
@@ -0,0 +1,32 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import argparse
+
+
+
+
+
+def make_heatmaps(logdir='data/logs', K_last=3):
+    #Load all saved numpy arrays of performance metrics per PREDICTION run:
+    all_runs = []
+    eval_smapes_lastKmean = []
+    array_names = [i for i in ssssss if i.endswith('epochs_performance.npy')]
+    run_names = [i.split('_')[0] for i in array_names]
+    for i, an in enumerate(array_names):
+        x = np.load(an)
+        #Get last K epoch metrics:
+        j = x[-K_last:]
+        eval_smapes_lastKmean.append(np.mean(j[:,5]))
+        all_runs.append(x)
+        
+        
+
+
+if __name__=='__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--logdir', default='data/logs', help="Directory where numpy arrays of performance are")
+    parser.add_argument('--K_last', default=3, dest='K_last', help='Save out per EPOCH metrics (NOT per step, only per EPOCH')
+    args = parser.parse_args()
+    param_dict = dict(vars(args))
+
+    make_heatmaps(**param_dict)
\ No newline at end of file
diff --git a/PREPROCESS.py b/PREPROCESS.py
index a103c04..c779cd4 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -403,7 +403,7 @@ def low_pass_filter(df, filter_type, kernel_size):
         
 
-def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, start_date=None, end_date=None):
+def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, do_augmentation, start_date=None, end_date=None):
     """
     Take my data and format it exactly as needed to use for the Kaggle seq2seq
     model [requires making train_1.csv, train_2.csv, key_1.csv, key_2.csv]
@@ -548,7 +548,8 @@ def make_index_col_left(df):
                 dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons,OUT_OF_RANGE_FILL_VALUE)
 
             #Data augmentation
-            dd = data_augmentation(dd)
+            if do_augmentation:
+                dd = data_augmentation(dd)
             
             df_list.append(dd)
         
@@ -641,6 +642,7 @@ def make_key_csv(df):
     END_DATE = '2017-12-31' #None
     REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful
     SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly'
+    DO_AUGMENTATION = False #True
     RANDOM_SEED = None
 
     # =============================================================================
@@ -663,5 +665,5 @@ def make_key_csv(df):
     df = remove_cities(df,REMOVE_ID_LIST)
     
     #Put into same format as used by Kaggle, save out csv's    
-    df = format_like_Kaggle(df, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, start_date=START_DATE, end_date=END_DATE)
+    df = format_like_Kaggle(df, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, DO_AUGMENTATION, start_date=START_DATE, end_date=END_DATE)
 
diff --git a/RUN_MANY_TRAIN_VAL_WINDOWS.sh b/RUN_MANY_TRAIN_VAL_WINDOWS.sh
index fd21a5f..e301e3e 100644
--- a/RUN_MANY_TRAIN_VAL_WINDOWS.sh
+++ b/RUN_MANY_TRAIN_VAL_WINDOWS.sh
@@ -3,13 +3,27 @@
 #Run over many history_window_size - horizon_window_size length pairs
 #Compile results, analyze performance as (2D) heatmap
 
+#At this point, models have been trained already. Trained by randomizing over
+#range of history and horizon sizes [~train,validation phases].
+#Now hopefully the models are reasonably good across a range of values of 
+#history/horizon lengths. 
+#Now, assess performance (walk-forward SMAPE on test set) as a function of 
+#(fixed) history and horizon sizes.
+#I.e. during training phase, the history and horizon are random variables that 
+#change randomly for every step of every batch. Vs. during inference, use 
+#fixed settings of history and horizon sizes and get an SMAPE value, then 
+#change the fixed history/horizon parameters and get another SMAPE value, etc.,
+#over a range of histories/horizons. This way we can see if the model does well 
+#on short series also. Of course we expect that as history->infinity and 
+#horizon->1, error will decrease.
 
-#TRAIN_WINDOWS="1 2 5 10 20 50 100 150 200 250 300"
-#VALIDATION_WINDOWS="1 2 5 10 20 50 100"
-#e.g. TRAIN_WINDOWS has NAN SMAPE -> 2 problem with as big as size 50
 
-TRAIN_WINDOWS="100 150"
-VALIDATION_WINDOWS="33 66"
+#HISTORY_SIZES="1 2 5 10 20 50 100 150 200 250 300"
+#HORIZON_SIZES="1 2 5 10 20 50 100"
+#e.g. HISTORY_SIZES has NAN SMAPE -> 2 problem with as big as size 50
+
+HISTORY_SIZES="100 150"
+HORIZON_SIZES="33 66"
 #just to test...
 MAX_EPOCH=2
 
@@ -25,7 +39,11 @@ cd ..
 #ls -l data/
 
 
-for v in $VALIDATION_WINDOWS; do
+
+
+#Now that all training is done, can run predictions
+#python3 PREDICT.py !!!!!make window sizes as params
+for v in $HORIZON_SIZES; do
     #Clea up between feature sets
     cd data
     rm -R vars/
@@ -35,7 +53,7 @@ for v in $VALIDATION_WINDOWS; do
     #Create the features for our data
     echo 'running make_features.py with --add_days='$v
     python3 make_features.py data/vars ours daily full --add_days=$v
-    for t in $TRAIN_WINDOWS; do
+    for t in $HISTORY_SIZES; do
         echo 'history window = '$t 'horizon window = '$v
         echo 'running trainer.py'
         NAME="val$v-train$t"
@@ -46,7 +64,22 @@ done
 
 
-#Now that all training is done, can run predictions
-#python3 PREDICT.py !!!!!make window sizes as params
+#from trainer.py, when have save_epochs_performance==True:
+#format of saved "{logdir}/{name}_epochs_performance.np" numpy array is:
+#2D array, dims = [epochs, 9]
+#where epochs is number of epochs that successfully completed (<max_epochs if there was early stopping)
+#9 is because 9 metrics are tracked. They are ordered as:
+#output_list.append([eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch,
+#           eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch,
+#           trainer.has_active()])
+#For overall performance assessment, average together the last T=2or3 epochs SMAPE values 
+
+
+
+
 
-#now make heatmaps of performance:
\ No newline at end of file
+# ==============================================================================
+# now make heatmaps of performance:
+# ==============================================================================
+echo 'Making performance heatmaps'
+python3 PERFORMANCE_HEATMAPS.py full daily --name $NAME
diff --git a/Readme.md b/Readme.md
index 56ea667..fa780e0 100755
--- a/Readme.md
+++ b/Readme.md
@@ -45,7 +45,7 @@ GK modifications for own forecasting application:
 
 1) Several architecture improvements:
 	- give encoded representation vector as context to every decoder timestep
-	- K step lookback: ideally the RNN would learn a hidden state representation that ~completely describes state of the system. In realiy, that is too much to expect. In addition to previous timestep prediction y_i-1, also feed in y_i-2,...,y_i-K for K-step lookback.
+	- K step lookback: ideally the RNN would learn a hidden state representation that ~completely describes state of the system. In realiy, that is too much to expect. In addition to previous timestep prediction y_i-1, also feed in y_i-2,...,y_i-K for K-step lookback. [~same as using lagged features]
 	- performance analysis of validation set SMAPE as function of history/horizon window sizes [randomized uniformly in training over all min-max range of history/horizon window sizes]
 	- more in development
 2) More features, relevant to my data. More focus on seasonalities, and "spiral encoding" for holidays. Automated data augmentation.
@@ -72,7 +72,7 @@ ll data/
 python3 make_features.py data/vars ours daily full --add_days=50
 #python3 make_features.py data/vars kaggle daily full --add_days=63
 
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=100 --patience=5 --max_epoch=10
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=100 --patience=5 --max_epoch=10 --save_epochs_performance
 
 --horizon_window_size=50 --history_window_size=100
 
diff --git a/holiday_features.py b/holiday_features.py
index 7d9723d..7e7db74 100644
--- a/holiday_features.py
+++ b/holiday_features.py
@@ -36,23 +36,23 @@ def get_fixed_date_holidays__daily(dates_range, month_day):
     # =============================================================================
     # MOVING holidays [variable date]
     # =============================================================================
-    def get_thanksgivings__daily(dates_range):
-        """
-        Get Thanksgiving holiday dates within the few years time range
-        """
-    #    4th Thurs of Novmber...
-    #    if (month==11) and (dayofweek=='Thurs') and (22<=dayofmonth<=28)
-        thanksgiving_dates = []
-        #...
-        return thanksgiving_dates
+#    def get_thanksgivings__daily(dates_range):
+#        """
+#        Get Thanksgiving holiday dates within the few years time range
+#        """
+#    #    4th Thurs of Novmber...
+#    #    if (month==11) and (dayofweek=='Thurs') and (22<=dayofmonth<=28)
+##        thanksgiving_dates = [i.strftime('%Y-%m-%d') for i in dates_range if ((i.month==11) and (i.dayofweek==3) and (i.dayofmonth...))]
+#        #...
+#        return thanksgiving_dates
     
-    def get_Easters__daily(dates_range):
-        """
-        Get Easter holiday dates within the few years time range
-        """
-        easter_dates = []
-        #...
-        return easter_dates  
+#    def get_Easters__daily(dates_range):
+#        """
+#        Get Easter holiday dates within the few years time range
+#        """
+#        easter_dates = []
+#        #...
+#        return easter_dates  
         
 #    def encode_custom_dates__daily(dates_range,dates_list):
 #        """
@@ -124,8 +124,10 @@ def spiral_encoding(dates_range, holiday_date, shoulder):
     
 
     #Rule Based Moving Holidays
-    thanksgiving_dates = get_thanksgivings__daily(dates_range)
-    easter_dates = get_Easters__daily(dates_range)
+#    thanksgiving_dates = get_thanksgivings__daily(dates_range)
+    thanksgiving_dates = ['2014-11-27','2015-11-26','2016-11-24','2017-11-23','2018-11-22','2019-11-28','2020-11-26'] #just m,anually define for now
+#    easter_dates = get_Easters__daily(dates_range) #too complicated : moon stuff, just set custom dates
+    easter_dates = ['2014-04-20','2015-04-05','2016-03-27','2017-04-16','2018-04-01','2019-04-21','2020-04-20']
     #... Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day
     #Custom / Single Event moving Holidays
     suberbowl_dates = ['2014-02-02','2015-02-01','2016-02-07','2017-02-05','2018-02-04','2019-02-03']
diff --git a/model.py b/model.py
index 1aaab86..7e3a46f 100755
--- a/model.py
+++ b/model.py
@@ -509,7 +509,7 @@ def build_cell(idx):
                 #so maybe do a projection down, on the encoder side first [e.g. encoder output??] then better here...
                 if self.is_train and has_dropout:
                     attn_depth = attn_features.shape[-1].value if attn_features is not None else 0
-                    context_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT is not None else 0 #Should just be the encoder RNN depth
+                    context_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT else 0 #Should just be the encoder RNN depth
                     print('attn_depth',attn_depth, 'context_depth',context_depth)
                     input_size = attn_depth + context_depth + prediction_inputs.shape[-1].value + self.lookback_K_actual if idx == 0 else self.hparams.rnn_depth
                     input_size = tf.Print(input_size, ['attn_depth',tf.shape(attn_depth),attn_depth, 'context_depth',tf.shape(context_depth),context_depth, 'input_size',tf.shape(input_size),input_size])#!!!!!!!!!!
diff --git a/trainer.py b/trainer.py
index 108ebc1..75eee3b 100755
--- a/trainer.py
+++ b/trainer.py
@@ -400,7 +400,7 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
           seed=None, logdir='data/logs', max_epoch=100, patience=2, train_sampling=1.0,
           eval_sampling=1.0, eval_memsize=5, gpu=0, gpu_allow_growth=False, save_best_model=False,
           forward_split=False, write_summaries=False, verbose=False, asgd_decay=None, tqdm=True,
-          side_split=True, max_steps=None, save_from_step=None, do_eval=True):#, horizon_window_size=63, history_window_size=283):
+          side_split=True, max_steps=None, save_from_step=None, do_eval=True, save_epochs_performance=False):#, horizon_window_size=63, history_window_size=283):
 
     eval_k = int(round(26214 * eval_memsize / n_models))
     eval_batch_size = int(
@@ -452,6 +452,7 @@ def random_draw_history_and_horizon_window_sizes(trainer):
         history and horizon sizes in TRAINING phase.
         (in prediction phase, use fixed sizes, and then for different sizes see how performance is.)
         """
+#        metrics = []
         history = np.random.randint(low=hparams.history_window_size_minmax[0],high=hparams.history_window_size_minmax[1]+1)
         horizon = np.random.randint(low=hparams.horizon_window_size_minmax[0],high=hparams.horizon_window_size_minmax[1]+1)        
         for TT in trainer.trainers:
@@ -460,6 +461,19 @@ def random_draw_history_and_horizon_window_sizes(trainer):
             TT.train_model.inp.attn_window = history - horizon + 1
             TT.train_model.inp.max_train_empty = int(round(history * (1 - TT.train_model.inp.train_completeness_threshold)))
             TT.train_model.inp.max_predict_empty = int(round(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))
+#            metrics.append(TT.dict_metrics)
+#        MOD_=0
+#        STAGE_=1#index
+#        __ = list(metrics[MOD_].values())[STAGE_]['SMAPE']
+#        print(__.name)
+#        print(__.op)
+#        print(__.smoother)
+#        print(__.epoch_values)
+#        print(__.best_value)
+#        print(__.best_step)
+#        print(__.last_epoch)
+#        print(__.improved)
+#        print(__._top)
         return trainer
 
 
@@ -613,6 +627,10 @@ def ema_vars(model):
         # Contains best value (first item) and subsequent values
         best_epoch_smape = []
 
+        #Save out per epoch values to look at later [only per epoch, not savingout per step]
+        if save_epochs_performance:
+            output_list = []
+                    
         for epoch in range(max_epoch):
 
             # n_steps = pusher.N_time_series // batch_size
@@ -625,7 +643,7 @@ def ema_vars(model):
                 #!!!!!!!!!! Variable random length train predict windows
                 #Random draw the train, predict window lengths
 #                print(_)
-                trainer = random_draw_history_and_horizon_window_sizes(trainer)
+#                trainer = random_draw_history_and_horizon_window_sizes(trainer)
 #                print('+++++++++++++++', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers])
 #                print('--------', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers])
 
@@ -690,6 +708,10 @@ def ema_vars(model):
                            eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch,
                            trainer.has_active())
                 print(status, file=sys.stderr)
+                if save_epochs_performance:
+                    output_list.append([eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch,
+                               eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch,
+                               trainer.has_active()])
             else:
                 print(status, file=sys.stderr)
                 print("Early stopping!", file=sys.stderr)
@@ -699,6 +721,11 @@ def ema_vars(model):
                 break
             sys.stderr.flush()
 
+        if save_epochs_performance:
+            x = np.array(output_list)
+            outname = f"{logdir}/{name}_training_epochs_performance.npy"
+            np.save(outname,x)
+            
         # noinspection PyUnboundLocalVariable
         return np.mean(best_epoch_smape, dtype=np.float64)
 
@@ -822,6 +849,7 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False,
     parser.add_argument('--save_from_step', type=int, help="Save model on each evaluation (10 evals per epoch), starting from this step")
 #    parser.add_argument('--horizon_window_size', default=63, type=int, help="Number of days to predict")
 #    parser.add_argument('--history_window_size', default=283, type=int, help="Train window chunk size")#Now that we want to do train size - val size performance heatmaps
+    parser.add_argument('--save_epochs_performance', default=False, dest='save_epochs_performance', action='store_true', help='Save out per EPOCH metrics (NOT per step, only per EPOCH')
     args = parser.parse_args()
 
     param_dict = dict(vars(args))

From d3d6757c2e193b2f5d81fb8d41e2e7a63747c6ba Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 26 Jul 2018 17:01:32 -0700
Subject: [PATCH 28/42] completely different train test split data

---
 PREPROCESS.py | 307 +++++++++++++++++++++++++++++---------------------
 Readme.md     |   2 +-
 2 files changed, 180 insertions(+), 129 deletions(-)

diff --git a/PREPROCESS.py b/PREPROCESS.py
index c779cd4..a2b04a6 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -39,6 +39,10 @@ def load_my_data(myDataDir):
     dflist = []
     for ii, ff in enumerate(files):
         df = pd.read_csv(os.path.join(myDataDir,ff))
+        #For the test set data, csvs slightly diff format, so need these 2 steps
+        if "id" not in df.columns:
+            df['id'] = ff.split('.')[0]
+            df.rename(columns={'dt':'date'},inplace=True)
         dflist += [df]
     df = pd.concat(dflist,sort=False)
     df = df[['id','date','y']]
@@ -69,13 +73,13 @@ def get_earliest_latest_dates(df):
 
 
-#def __keep_btwn_dates(df,start_date,end_date):
+#def __keep_btwn_dates(df,start_date,test_end_date):
 #    """
 #    Excerpt only the data between [inclusive] start and end date.
 #    Both dates are formatted as 'YYYY-mm-DD'
 #    """
 #    len1 = len(df)
-#    df = df.loc[(df['date']>=start_date) & (df['date']<=end_date)]
+#    df = df.loc[(df['date']>=start_date) & (df['date']<=test_end_date)]
 #    df.reset_index(inplace=True,drop=True)
 #    len2 = len(df)
 #    rows_removed = len1 - len2
@@ -153,92 +157,92 @@ def remove_seasonal_blocks(df):
 
 
-def do_imputation(df,imputation_method):
-    """
-    For places in the data where missing gaps are smalle (<7 days),
-    just fill in those few missing days with a basic 
-    remove 
-    """
-    
-    
-    def imputation_small_gaps(df,imputation_method):
-        """
-        Do missing data imputation using the given forecasting method
-        Only use this for short missing segments; do not use for longer ones.
-        """
-        if imputation_method == 'STL':
-            #stl = seasonal_decompose(x)
-            df_filled = df
-            pass
-        else:
-            raise Exception('That method not implemented yet')
-        return df_filled    
-    
-    
-    def imputation_big_gaps(df):
-        """
-        Do missing data imputation / removal
-        For big gaps [gaps bigger than 1 seasonality]
-        """
-        df_filled = df
-        return df_filled    
-    
-    
-    def imputation__simple(df,imputation_method):
-        """
-        Juat as placeholder for now,
-        fill all missing with zeros,
-        or mean or median imputation
-        """
-        missing_values = [-1]#['NaN', -1]
-        imp = Imputer(missing_values=missing_values,
-                strategy=imputation_method,
-                axis=1)
-        vals = imp.fit_transform(df.values)#[:,1:]) #The data is only [:,1:]. 
-        #"Some rows only contain missing values: [ 35 251 281]"
-        #But get some rows with all missing vals. Since we don't actualyl care about this and never will use this 
-        #for now just use the "Page" number as well to avoid this.
-        
-        
-        cols = df.columns
-        new_df = pd.DataFrame({cols[i]:vals[:,i] for i in range(vals.shape[1])})
-        new_df['Page'] = df['Page']
-        #Put "Page" at left
-        cols = new_df.columns.tolist()
-        new_df = new_df[cols[-1:]+cols[:-1]]
-        new_df.reset_index(drop=True,inplace=True)
-        return new_df
-    
-
-    
-    
-    
-    if (imputation_method == 'median') or (imputation_method == 'mean'):
-        df = imputation__simple(df,imputation_method)
-        
-#    if imputation_method == 'lagKmedian':
-#        #First get rid of the big blocks of mising values [more than 1 seasonality long]
-##        df = imputation_big_gaps(df)
-#        #Then deal with the short missing holes
-#        N_seasons = 4
-#        df = imputation_lagKmedian(df,N_seasons)
-        
-    else:
-        raise Exception('not implemented other methods yet')
-    
-    #First deal with small gaps (missing gaps fewer than e.g. 7 days):
-    #df = imputation_small_gaps(df,imputation_method)
-    
-    #Deal with longer gaps [e.g. by removing enough blocks of length S, where
-    #S is the seasonality, to completely get rid of gaps]
-    #...
-    #df = imputation_big_gaps(df)
-    
-    #Trim start and end of each series/ to align to get in phase
-    #df = 
-    #...
-    
-    return df
+#def do_imputation(df,imputation_method):
+#    """
+#    For places in the data where missing gaps are smalle (<7 days),
+#    just fill in those few missing days with a basic 
+#    remove 
+#    """
+#    
+#    
+#    def imputation_small_gaps(df,imputation_method):
+#        """
+#        Do missing data imputation using the given forecasting method
+#        Only use this for short missing segments; do not use for longer ones.
+#        """
+#        if imputation_method == 'STL':
+#            #stl = seasonal_decompose(x)
+#            df_filled = df
+#            pass
+#        else:
+#            raise Exception('That method not implemented yet')
+#        return df_filled    
+#    
+#    
+#    def imputation_big_gaps(df):
+#        """
+#        Do missing data imputation / removal
+#        For big gaps [gaps bigger than 1 seasonality]
+#        """
+#        df_filled = df
+#        return df_filled    
+#    
+#    
+#    def imputation__simple(df,imputation_method):
+#        """
+#        Juat as placeholder for now,
+#        fill all missing with zeros,
+#        or mean or median imputation
+#        """
+#        missing_values = [-1]#['NaN', -1]
+#        imp = Imputer(missing_values=missing_values,
+#                strategy=imputation_method,
+#                axis=1)
+#        vals = imp.fit_transform(df.values)#[:,1:]) #The data is only [:,1:]. 
+#        #"Some rows only contain missing values: [ 35 251 281]"
+#        #But get some rows with all missing vals. Since we don't actualyl care about this and never will use this 
+#        #for now just use the "Page" number as well to avoid this.
+#        
+#        
+#        cols = df.columns
+#        new_df = pd.DataFrame({cols[i]:vals[:,i] for i in range(vals.shape[1])})
+#        new_df['Page'] = df['Page']
+#        #Put "Page" at left
+#        cols = new_df.columns.tolist()
+#        new_df = new_df[cols[-1:]+cols[:-1]]
+#        new_df.reset_index(drop=True,inplace=True)
+#        return new_df
+#    
+#
+#    
+#    
+#    
+#    if (imputation_method == 'median') or (imputation_method == 'mean'):
+#        df = imputation__simple(df,imputation_method)
+#        
+##    if imputation_method == 'lagKmedian':
+##        #First get rid of the big blocks of mising values [more than 1 seasonality long]
+###        df = imputation_big_gaps(df)
+##        #Then deal with the short missing holes
+##        N_seasons = 4
+##        df = imputation_lagKmedian(df,N_seasons)
+#        
+#    else:
+#        raise Exception('not implemented other methods yet')
+#    
+#    #First deal with small gaps (missing gaps fewer than e.g. 7 days):
+#    #df = imputation_small_gaps(df,imputation_method)
+#    
+#    #Deal with longer gaps [e.g. by removing enough blocks of length S, where
+#    #S is the seasonality, to completely get rid of gaps]
+#    #...
+#    #df = imputation_big_gaps(df)
+#    
+#    #Trim start and end of each series/ to align to get in phase
+#    #df = 
+#    #...
+#    
+#    return df
 
 
@@ -403,7 +407,7 @@ def low_pass_filter(df, filter_type, kernel_size):
         
 
-def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, do_augmentation, start_date=None, end_date=None):
+def format_like_Kaggle(df, mode, myDataDir, imputation_method, sampling_period, do_augmentation, train_test_split_date, start_date=None, test_end_date=None):
     """
     Take my data and format it exactly as needed to use for the Kaggle seq2seq
     model [requires making train_1.csv, train_2.csv, key_1.csv, key_2.csv]
@@ -411,7 +415,7 @@ def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, do_aug
     """
     
     
-    def make_train_csv(df, save_path, imputation_method, sampling_period, start_date, end_date):
+    def make_train_csv(df, mode, save_path, imputation_method, sampling_period, start_date, test_end_date):
         """
         Make the train_2.csv
         """
@@ -503,13 +507,36 @@ def make_index_col_left(df):
         earliest, latest = get_earliest_latest_dates(df)
         
         #Excerpt only the relevant time interval, if manually specified
+        #Earliest start date is applied to both training and testing
+        #(testing is a superset of training)
         if start_date:
             earliest = max(earliest,start_date)
-        if end_date:
-            latest = min(latest,end_date)
+            
+        #In training mode, set th end date by clipoing the data so it does not 
+        #contain the most recent data that is used for TEST set:
+        if mode=='TRAIN':
+            latest = min(latest, train_test_split_date)   
+
+        
+        if mode=='TEST':
+            #In TEST mode, to have a COMPLETELY distinct test set, start from day after last day of taining set:
+            #(this means in TEST phase, not even the known history will overlap with the training set, 
+            #which arguably wiould be ok as long as the horizon is completely outside the training data,
+            #but to be extra conservative, do this):
+            assert (earliest < train_test_split_date), 'TRAIN end date (/TEST start date) must be after start of data'
+            next_day = pd.to_datetime(train_test_split_date) + pd.Timedelta(1,unit='D')
+#            next_day_string = next_day.dt.strftime('%Y-%m-%d')
+            next_day_string = next_day.strftime('%Y-%m-%d')
+            earliest = max(earliest,next_day_string)
+            
+            if test_end_date:
+                #In TEST mode, if there is a manually defined end date, clip there:
+                latest = min(latest,test_end_date)
+                assert (latest > train_test_split_date), 'TEST end date must be after TRAIN end date'
+                
         
         idx = pd.date_range(earliest,latest) #!!!!!! fro now doing daily. When doing weekly also keep with default freq='D' . If change to 'W' alignment gets messed up. Just do daily 'D', then later can correct easily.
-        OUT_OF_RANGE_FILL_VALUE = np.NaN #0 #-1 #puttign as nan casts to float and cannot convert to int
+        OUT_OF_RANGE_FILL_VALUE = np.NaN #0 #-1 
 
 
         #Do aggregation from DAILY --> WEEKLY before doing any kind of imputation
@@ -518,7 +545,7 @@ def make_index_col_left(df):
             df = aggregate_to_weekly(df, AGGREGATION_TYPE)    
 
     
-        #Some id's [15,16] have their missing values recorded as "-1"
+        #Some id's [15,16 in training;  multiple in testing] have their missing values recorded as "-1"
         #vs. later id's have their missing values simply missing from the original csv
         #So for those id's that actually have -1, convert to NAN first:
         df.replace(-1.,np.nan,inplace=True)
@@ -538,18 +565,20 @@ def make_index_col_left(df):
             dd['Page'] = u
             
             print(i,u, 'of {}'.format(unique_ids[-1]))
-            if imputation_method=='lagKmedian':
-                if sampling_period=='daily':
-                    N_seasons = 4
-                    seasonality = 7
-                elif sampling_period=='weekly':
-                    N_seasons = 4
-                    seasonality = 1
-                dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons,OUT_OF_RANGE_FILL_VALUE)
-
-            #Data augmentation
-            if do_augmentation:
-                dd = data_augmentation(dd)
+            #Only do imputation on training data, NOT on test data.
+            if mode=='TRAIN':
+                if imputation_method=='lagKmedian':
+                    if sampling_period=='daily':
+                        N_seasons = 4
+                        seasonality = 7
+                    elif sampling_period=='weekly':
+                        N_seasons = 4
+                        seasonality = 1
+                    dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons,OUT_OF_RANGE_FILL_VALUE)
+    
+                #Data augmentation
+                if do_augmentation:
+                    dd = data_augmentation(dd)
             
             df_list.append(dd)
         
@@ -577,13 +606,13 @@ def make_index_col_left(df):
             
 
-        
-        #Imputation, dealing with missing seasonality blocks / out of phase
-        if imputation_method=='median' or imputation_method=='mean':
-            df = do_imputation(df,imputation_method)
-            #Could do impoutation then downsampling, vs. downsampling then imputation ... unclear which is better here in general.
-            #for now assume we do ipmutation THEN aggregation:
-            #df = aggregate(df,sampling_period)
+#        #No longer use this: imputation done per series at creation
+#        #Imputation, dealing with missing seasonality blocks / out of phase
+#        if imputation_method=='median' or imputation_method=='mean':
+#            df = do_imputation(df,imputation_method)
+#            #Could do impoutation then downsampling, vs. downsampling then imputation ... unclear which is better here in general.
+#            #for now assume we do ipmutation THEN aggregation:
+#            #df = aggregate(df,sampling_period)
 
 
         #Reorder some things just in case
@@ -609,8 +638,9 @@ def make_key_csv(df):
     
     #Make the train csv [for now just do 1, ignore the train 2 part ???]
     #save_path = os.path.join(os.path.split(myDataDir)[0],f"train_2[ours_{sampling_period}].csv")
-    save_path = os.path.join(os.path.split(myDataDir)[0],"train_2_ours_{}.csv".format(sampling_period))
-    df = make_train_csv(df, save_path, imputation_method, sampling_period, start_date, end_date)
+    suffix = '_TEST' if mode=='TEST' else ''
+    save_path = os.path.join(os.path.split(myDataDir)[0],"train_2_ours_{}{}.csv".format(sampling_period,suffix))
+    df = make_train_csv(df, mode, save_path, imputation_method, sampling_period, start_date, test_end_date)
 
     #For the prediction phase, need the key ????
 #    make_key_csv(df)
@@ -636,34 +666,55 @@ def make_key_csv(df):
     #     PARAMETERS
     # =============================================================================
     # TOTAL COMPLETED TRIPS:
-    myDataDir = r"/Users/kocher/Desktop/forecasting/exData/totalCompletedTripsDaily"
+    #myDataDir_TRAIN = r"/Users/kocher/Desktop/forecasting/exData/totalCTDaily"
+    myDataDir_TRAIN = r"/Users/kocher/Desktop/forecasting/exData/totalCTDaily___2018"#Since the test data is just the same data, but a superset, just use it for consistency
+    myDataDir_TEST = r"/Users/kocher/Desktop/forecasting/exData/totalCTDaily___2018"     
     IMPUTATION_METHOD = 'lagKmedian' #'median' #'STL' #'lagKmedian' #None
     START_DATE = '2015-01-01' #None
-    END_DATE = '2017-12-31' #None
+    TEST_END_DATE = '2018-07-05' #None #'2018-07-05' just trim off 2 rightmost days since many cities NAN on 7/7/18
+    TRAIN_TEST_SPLIT_DATE = '2017-04-30' #The last day date to include in training set [and 1st NEW day of test set will be the next day.]
     REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful
     SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly'
-    DO_AUGMENTATION = False #True
+    DO_AUGMENTATION = False #False #True
     RANDOM_SEED = None
 
     # =============================================================================
     #     MAIN
     # =============================================================================
+   
     print('START_DATE',START_DATE)
-    print('END_DATE',END_DATE)
+    print('TEST_END_DATE',TEST_END_DATE)
+    print('TRAIN_TEST_SPLIT_DATE',TRAIN_TEST_SPLIT_DATE)
+    print('DO_AUGMENTATION',DO_AUGMENTATION)
+    print('RANDOM_SEED',RANDOM_SEED)        
     print('REMOVE_ID_LIST',REMOVE_ID_LIST)
     print('IMPUTATION_METHOD',IMPUTATION_METHOD)
-    print('myDataDir',myDataDir)
+    print('myDataDir_TRAIN',myDataDir_TRAIN)
+    print('myDataDir_TEST',myDataDir_TEST)
     print('SAMPLING_PERIOD',SAMPLING_PERIOD)
     
+
+    
     #Seed random number generator in case of doing data augmentation:
     np.random.seed(RANDOM_SEED)
     
-    #Load
-    df = load_my_data(myDataDir)
-    
-    #Remove any bad/irrelevant cities
-    df = remove_cities(df,REMOVE_ID_LIST)
     
-    #Put into same format as used by Kaggle, save out csv's    
-    df = format_like_Kaggle(df, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, DO_AUGMENTATION, start_date=START_DATE, end_date=END_DATE)
-
+    #For TRAIN and TEST data
+    modes = ['TRAIN','TEST']
+    for i, myDataDir in enumerate([myDataDir_TRAIN,myDataDir_TEST]):
+        mode=modes[i]
+        print(mode)
+        #Don't do augmentation for test phase [test only on real]
+        if i==1:
+            DO_AUGMENTATION=False
+        
+        #Load
+        df = load_my_data(myDataDir)
+        
+        #Remove any bad/irrelevant cities
+        df = remove_cities(df,REMOVE_ID_LIST)
+        
+        #Put into same format as used by Kaggle, save out csv's    
+        df = format_like_Kaggle(df, mode, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, DO_AUGMENTATION, TRAIN_TEST_SPLIT_DATE, start_date=START_DATE, test_end_date=TEST_END_DATE)
+        
+        print('Finished with ', mode)
diff --git a/Readme.md b/Readme.md
index fa780e0..509df86 100755
--- a/Readme.md
+++ b/Readme.md
@@ -124,7 +124,7 @@ To do:
 
 2. for weekly. monthly inputs, need to change few places in tensorflow code
 3. Prediction intervals
-4. Architecture improvements: bi enc, dil
+4. Architecture improvements: bi enc, dilated; randomly dilated; randomly dilated with bounds per layer
 4. K step recursive as hybrid of 1step recursive and K step direct
 4. MLP direct multihorizon
 5. custom attention [e.g. position specific]

From 54fab192ebf65aa3b970c4ad0c5467d88913b9ad Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Fri, 27 Jul 2018 18:19:16 -0700
Subject: [PATCH 29/42] doing all (t,v,b) predictions

---
 PREDICT.py             |  12 ++-
 RUN_ALL_PREDICTIONS.py | 205 +++++++++++++++++++++++++++++++++++++++++
 holiday_features.py    |   2 +
 input_pipe.py          |   8 +-
 make_features.py       |   2 +-
 trainer.py             |  30 +++---
 6 files changed, 238 insertions(+), 21 deletions(-)
 create mode 100644 RUN_ALL_PREDICTIONS.py

diff --git a/PREDICT.py b/PREDICT.py
index befdad6..ba0e21f 100755
--- a/PREDICT.py
+++ b/PREDICT.py
@@ -56,6 +56,14 @@ def mean_smape(true, pred):
 
 
+def bias(true, pred):
+    """
+    Check if the forecasts are biased up or down
+    """
+    return np.sum(true - pred) / np.sum(true + pred)
+
+
+
 # =============================================================================
 # 
 # =============================================================================
@@ -79,8 +87,10 @@ def mean_smape(true, pred):
 t_preds = []
 for tm in range(Nmodels):
     tf.reset_default_graph()
-    t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(PARAM_SETTING_FULL_NAME), back_offset=0, history_window_size=283, horizon_window_size=63,
+    t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(PARAM_SETTING_FULL_NAME), history_window_size, horizon_window_size, back_offset=0, 
                     n_models=Nmodels, target_model=tm, seed=2, batch_size=2048, asgd=True))
+#def predict(features_set, sampling_period, checkpoints, hparams, history_window_size, horizon_window_size, return_x=False, verbose=False, back_offset=0, n_models=1,
+#            target_model=0, asgd=False, seed=1, batch_size=1024): #For predict: allow horizon_window_size to be fixed
 
 
 # =============================================================================
diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
new file mode 100644
index 0000000..9c5dce3
--- /dev/null
+++ b/RUN_ALL_PREDICTIONS.py
@@ -0,0 +1,205 @@
+import tensorflow as tf
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+import os
+import pandas as pd
+import numpy as np
+from trainer import predict
+from hparams import build_hparams
+import hparams
+
+
+
+
+
+# =============================================================================
+# 
+# =============================================================================
+#For histories, we care most about shorter series, so sample lower numbers more densely
+HISTORY_SIZES=[7,8,10,15,20,25,35,50,70,100,150,250,350]
+HORIZON_SIZES=[7,8,10,15,20,25,35,50]
+
+# =============================================================================
+# PARAMETRS
+# =============================================================================
+FEATURES_SET = 'full'# 'arturius' 'simple' 'full'
+SAMPLING_PERIOD = 'daily'
+DATA_TYPE = 'ours' #'kaggle' #'ours'
+Nmodels = 3
+PARAM_SETTING = 's32' #Which of the parameter settings to use [s32 is the default Kaggle one, with a few thigns modified as I want]
+PARAM_SETTING_FULL_NAME = hparams.params_s32 #Which of the parameter settings to use corresponding to the PARAM_SETTING. The mapping is defined in hparams.py at the end in "sets = {'s32':params_s32,..."
+OUTPUT_DIR = 'output'
+
+RETURN_X = True
+SAVE_PREDICTIONS = True
+
+
+
+
+
+
+
+
+
+
+
+# =============================================================================
+# MAIN
+# =============================================================================
+
+
+# =============================================================================
+# Performance Metrics
+# =============================================================================
+def smape(true, pred):
+    summ = np.abs(true) + np.abs(pred)
+    smape = np.where(summ == 0, 0, np.abs(true - pred) / summ)
+    #return np.mean(kaggle_smape) * 200
+    return smape * 200
+
+def mean_smape(true, pred):
+    raw_smape = smape(true, pred)
+    masked_smape = np.ma.array(raw_smape, mask=np.isnan(raw_smape))
+    return masked_smape.mean()
+
+
+
+def bias(true, pred):
+    """
+    Check if the forecasts are biased up or down
+    """
+    return np.sum(true - pred) / np.sum(true + pred)
+
+
+    
+
+def do_predictions_one_setting(history,horizon,backoffset,save_predictions,return_x):
+    
+    # =============================================================================
+    # 
+    # =============================================================================
+    #read_all funcion loads the (hardcoded) file "data/all.pkl", or otherwise train2.csv
+    print('loading data...')
+    from make_features import read_all
+    df_all = read_all(DATA_TYPE,SAMPLING_PERIOD)
+    print('df_all.columns')
+    print(df_all.columns)
+    
+    
+    # =============================================================================
+    # 
+    # =============================================================================
+    prev = df_all#.loc[:,:'2017-07-08']
+    paths = [p for p in tf.train.get_checkpoint_state(f'data/cpt/{PARAM_SETTING}').all_model_checkpoint_paths]
+    
+    #tf.reset_default_graph()
+    #preds = predict(paths, default_hparams(), back_offset=0,
+    #                    n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True)
+    t_preds = []
+    x_true = []
+    for tm in range(Nmodels):
+        tf.reset_default_graph()
+        _ = predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(PARAM_SETTING_FULL_NAME), history_window_size, horizon_window_size, back_offset=backoffset, return_x=return_x,
+                        n_models=Nmodels, target_model=tm, seed=2, batch_size=2048, asgd=True)        
+        if return_x:
+            t_preds.append(_[0])
+            x_true.append(_[1])
+        else:
+            t_preds.append(_)
+    #def predict(features_set, sampling_period, checkpoints, hparams, history_window_size, horizon_window_size, return_x=False, verbose=False, back_offset=0, n_models=1,
+    #            target_model=0, asgd=False, seed=1, batch_size=1024): #For predict: allow horizon_window_size to be fixed
+    
+    
+    # =============================================================================
+    # average the N models predictions
+    # =============================================================================
+    preds = sum(t_preds)/float(Nmodels)
+    
+    
+    
+    # =============================================================================
+    # look at missing
+    # =============================================================================
+    missing_pages = prev.index.difference(preds.index)
+    print('missing_pages',missing_pages)
+    # Use zeros for missing pages
+    rmdf = pd.DataFrame(index=missing_pages,
+                    data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns)
+    if DATA_TYPE=='kaggle':
+        f_preds = preds.append(rmdf).sort_index()
+    elif DATA_TYPE=='ours':
+        f_preds = preds
+    # Use zero for negative predictions
+    f_preds[f_preds < 0.5] = 0
+    # Rouns predictions to nearest int
+    f_preds = np.round(f_preds).astype(np.int64)
+    
+    
+    
+    
+    print(f_preds)
+    
+    # =============================================================================
+    # save out all predictions all days (for our stuff will be relevant, for his Kaggle maybe just needed one day)
+    # =============================================================================
+    #firstK = 1000 #for size issues, for now while dev, just a few to look at
+    #ggg = f_preds.iloc[:firstK]
+    #ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True)
+    if save_predictions:
+        f_preds.to_csv(f'{OUTPUT_DIR}/all_predictions_ours.csv.gz', compression='gzip', index=False, header=True)
+    
+    
+    
+    
+    # =============================================================================
+    # visualize to do wuick check
+    # =============================================================================
+    randomK = 1000
+    print('Saving figs of {} time series as checks'.format(randomK))
+    pagenames = list(f_preds.index)
+    pages = np.random.choice(pagenames, size=min(randomK,len(pagenames)), replace=False)
+    N = pages.size
+    for jj, page in enumerate(pages):
+        print(f"{jj} of {N}")
+        plt.figure()
+        if DATA_TYPE=='kaggle':
+            prev.loc[page].fillna(0).plot()#logy=True)
+            f_preds.loc[page].fillna(0).plot(logy=True)
+        elif DATA_TYPE=='ours':
+            prev.loc[int(page)].plot()
+            f_preds.loc[page].plot()
+        plt.title(page)
+        if not os.path.exists(OUTPUT_DIR):
+            os.mkdir(OUTPUT_DIR)
+        pathname = os.path.join(OUTPUT_DIR, 'fig_{}.png'.format(jj))
+        plt.savefig(pathname)
+        plt.close()
+        
+        
+        
+    #Cannot view on the AWS so move to local:   
+    #zip -r output.zip output
+    #cp output.zip /home/...../sync
+    return preds, x_true
+    
+    
+    
+    
+    
+if __name__ == '__main__':
+    for history in HISTORY_SIZES:
+        for horizon in HORIZON_SIZES:
+            print('HISTORY ',history, 'of ', HISTORY_SIZES)
+            print('HORIZON ',horizon, 'of ', HORIZON_SIZES)
+            #Get the range of values that will step through for 
+            offs = [i for i in range(data_timesteps-history,horizon+1)]
+            for backoffset in offs:
+                print('backoffset ',backoffset, 'of ', offs)
+                preds, x_true = do_predictions_one_setting(history,horizon,backoffset,SAVE_PREDICTIONS,RETURN_X)
+                print(x_true)
+                print(preds)
+                x=yyyyyyyy                
+                print('\n'*10)
\ No newline at end of file
diff --git a/holiday_features.py b/holiday_features.py
index 7e7db74..f2f6d7c 100644
--- a/holiday_features.py
+++ b/holiday_features.py
@@ -69,6 +69,8 @@ def spiral_encoding(dates_range, holiday_date, shoulder):
         """
         Encode holiday and shoulders as a spiral:
         Rotation over 2pi, with radius goes from 0 to 1 [on holiday] back to 0
+        
+        Right now assuming symmetric beofre / after shoulders.
         """
         N_real_days = len(dates_range)
         real_min = min(dates_range)
diff --git a/input_pipe.py b/input_pipe.py
index e8d2787..1a26f10 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -219,7 +219,7 @@ def cut_train(self, counts, *args):
         """
         n_timesteps = self.horizon_window_size + self.history_window_size
         # How much free space we have to choose starting day
-        free_space = self.inp.data_days - n_timesteps - self.back_offset - self.start_offset
+        free_space = self.inp.data_timesteps - n_timesteps - self.back_offset - self.start_offset
         if self.verbose:
             #!!!!!! doesn't really matter since this is just printout, but would need to change for WEEKLY / MONTHLY
             lower_train_start = self.inp.data_start + pd.Timedelta(self.start_offset, 'D')
@@ -433,15 +433,15 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         self.back_offset = back_offset
         if verbose:
             print("Mode:%s, data days:%d, Data start:%s, data end:%s, features end:%s " % (
-            mode, inp.data_days, inp.data_start, inp.data_end, inp.features_end))
+            mode, inp.data_timesteps, inp.data_start, inp.data_end, inp.features_end))
 
         if mode == ModelMode.TRAIN:
             # reserve horizon_window_size at the end for validation
-            assert inp.data_days - horizon_window_size > horizon_window_size + history_window_size, \
+            assert inp.data_timesteps - horizon_window_size > horizon_window_size + history_window_size, \
                 "Predict+train window length (+predict window for validation) is larger than total number of days in dataset"
             self.start_offset = train_skip_first
         elif mode == ModelMode.EVAL or mode == ModelMode.PREDICT:
-            self.start_offset = inp.data_days - history_window_size - back_offset
+            self.start_offset = inp.data_timesteps - history_window_size - back_offset #!!!!!
             if verbose:
                 train_start = inp.data_start + pd.Timedelta(self.start_offset, 'D')
                 eval_start = train_start + pd.Timedelta(history_window_size, 'D')
diff --git a/make_features.py b/make_features.py
index 5940cb0..ebd9d6d 100755
--- a/make_features.py
+++ b/make_features.py
@@ -513,7 +513,7 @@ def run():
     
     plain = dict(
         features_times=len(features_times),
-        data_days=len(df.columns),
+        data_timesteps=len(df.columns),
         N_time_series=len(df),
         data_start=data_start,
         data_end=data_end,
diff --git a/trainer.py b/trainer.py
index 75eee3b..c0b7f02 100755
--- a/trainer.py
+++ b/trainer.py
@@ -480,9 +480,9 @@ def random_draw_history_and_horizon_window_sizes(trainer):
 
     def create_model(features_set, sampling_period, scope, index, prefix, seed):
 
-        #Just dummy filler, not important what value
-        history_dummy = 111
-        horizon_dummy = 42
+        #Just dummy filler, not important what value [since in training we will randomly vary these]
+        HISTORY_DUMMY = 111
+        HORIZON_DUMMY = 42
 
         with tf.variable_scope('input') as inp_scope:
             with tf.device("/cpu:0"):
@@ -490,28 +490,28 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed):
                 pipe = InputPipe(features_set, sampling_period, inp, features=split.train_set, N_time_series=split.train_size,
                                  mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose,
                                  train_completeness_threshold=train_completeness_threshold,
-                                 predict_completeness_threshold=train_completeness_threshold, history_window_size=history_dummy,
-                                 horizon_window_size=horizon_dummy,
+                                 predict_completeness_threshold=train_completeness_threshold, history_window_size=HISTORY_DUMMY,
+                                 horizon_window_size=HORIZON_DUMMY,
                                  rand_seed=seed, train_skip_first=hparams.train_skip_first,
-                                 back_offset=horizon_dummy if forward_split else 0)
+                                 back_offset=HORIZON_DUMMY if forward_split else 0)
                 inp_scope.reuse_variables()
                 TCT = 0.01
                 if side_split:
                     side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
                                                mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
-                                               verbose=verbose, horizon_window_size=horizon_dummy,
+                                               verbose=verbose, horizon_window_size=HORIZON_DUMMY,
                                                train_completeness_threshold=TCT, predict_completeness_threshold=0,
-                                               history_window_size=history_dummy, rand_seed=seed, runs_in_burst=eval_batches,
-                                               back_offset=horizon_dummy * (2 if forward_split else 1))
+                                               history_window_size=HISTORY_DUMMY, rand_seed=seed, runs_in_burst=eval_batches,
+                                               back_offset=HORIZON_DUMMY * (2 if forward_split else 1))
                 else:
                     side_eval_pipe = None
                 if forward_split:
                     forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
                                                   mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
-                                                  verbose=verbose, horizon_window_size=horizon_dummy,
+                                                  verbose=verbose, horizon_window_size=HORIZON_DUMMY,
                                                   train_completeness_threshold=TCT, predict_completeness_threshold=0,
-                                                  history_window_size=history_dummy, rand_seed=seed, runs_in_burst=eval_batches,
-                                                  back_offset=horizon_dummy)
+                                                  history_window_size=HISTORY_DUMMY, rand_seed=seed, runs_in_burst=eval_batches,
+                                                  back_offset=HORIZON_DUMMY)
                 else:
                     forward_eval_pipe = None
         avg_sgd = asgd_decay is not None
@@ -730,8 +730,8 @@ def ema_vars(model):
         return np.mean(best_epoch_smape, dtype=np.float64)
 
 
-def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, verbose=False, horizon_window_size=6, back_offset=0, n_models=1,
-            target_model=0, asgd=False, seed=1, batch_size=1024, history_window_size=283): #For predict: allow horizon_window_size to be fixed
+def predict(features_set, sampling_period, checkpoints, hparams, history_window_size, horizon_window_size, return_x=False, verbose=False, back_offset=0, n_models=1,
+            target_model=0, asgd=False, seed=1, batch_size=1024): #For predict: allow horizon_window_size to be fixed
     with tf.variable_scope('input') as inp_scope:
         with tf.device("/cpu:0"):
             inp = VarFeeder.read_vars("data/vars")
@@ -766,7 +766,7 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False,
         pipe.load_vars(sess)
         for checkpoint in checkpoints:
             pred_buffer = []
-            pipe.init_iterator(sess)
+              .init_iterator(sess)
             saver.restore(sess, checkpoint)
             cnt = 0
             while True:

From dd37b805a3ec180d184c4143433fa1b84d82c800 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 30 Jul 2018 17:54:12 -0700
Subject: [PATCH 30/42] working predictions by t,v, backoffset

---
 RUN_ALL_PREDICTIONS.py | 158 +++++++++++++++++++++++++++--------------
 Readme.md              |  12 ++--
 hparams.py             |   2 +-
 input_pipe.py          | 122 ++++++++++++++++++-------------
 make_features.py       |  53 +++++++++-----
 model.py               |  46 +++++++++++-
 trainer.py             |  94 +++++++++++++-----------
 7 files changed, 315 insertions(+), 172 deletions(-)

diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index 9c5dce3..0697373 100644
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -11,6 +11,7 @@
 from hparams import build_hparams
 import hparams
 
+from make_features import read_all
 
 
@@ -19,12 +20,15 @@
 # 
 # =============================================================================
 #For histories, we care most about shorter series, so sample lower numbers more densely
-HISTORY_SIZES=[7,8,10,15,20,25,35,50,70,100,150,250,350]
-HORIZON_SIZES=[7,8,10,15,20,25,35,50]
+HISTORY_SIZES=[7,350]#[7,8,10,15,20,25,35,50,70,100,150,250,350]
+HORIZON_SIZES=[7,60]#[7,10,20,30,40,50,60]
+EVAL_STEP_SIZE=4#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
 
 # =============================================================================
 # PARAMETRS
 # =============================================================================
+TEST_DF_PATH = r"data/train_2_ours_daily_TEST.csv"
+TEST_dir = r"data/vars_TEST"
 FEATURES_SET = 'full'# 'arturius' 'simple' 'full'
 SAMPLING_PERIOD = 'daily'
 DATA_TYPE = 'ours' #'kaggle' #'ours'
@@ -33,9 +37,7 @@
 PARAM_SETTING_FULL_NAME = hparams.params_s32 #Which of the parameter settings to use corresponding to the PARAM_SETTING. The mapping is defined in hparams.py at the end in "sets = {'s32':params_s32,..."
 OUTPUT_DIR = 'output'
 
-RETURN_X = True
-SAVE_PREDICTIONS = True
-
+SAVE_PLOTS = False
 
 
@@ -65,53 +67,45 @@ def mean_smape(true, pred):
     masked_smape = np.ma.array(raw_smape, mask=np.isnan(raw_smape))
     return masked_smape.mean()
 
-
-
-def bias(true, pred):
+def mean_bias(true, pred):
     """
     Check if the forecasts are biased up or down
     """
-    return np.sum(true - pred) / np.sum(true + pred)
+    return np.mean(np.sum(true - pred) / np.sum(true + pred))
 
 
-
-def do_predictions_one_setting(history,horizon,backoffset,save_predictions,return_x):
+def do_predictions_one_setting(history,horizon,backoffset,TEST_dir,save_plots,n_series):
     
     # =============================================================================
     # 
     # =============================================================================
     #read_all funcion loads the (hardcoded) file "data/all.pkl", or otherwise train2.csv
     print('loading data...')
-    from make_features import read_all
-    df_all = read_all(DATA_TYPE,SAMPLING_PERIOD)
+
+    df_all = read_all(DATA_TYPE,SAMPLING_PERIOD,'test')
     print('df_all.columns')
     print(df_all.columns)
+#        filename = f'train_2_{data_type}_{sampling_period}'
+#        df = read_file(filename)    
     
-    
+
+    batchsize = n_series #For simplicity, just do all series at once if not too many for memory
     # =============================================================================
     # 
     # =============================================================================
     prev = df_all#.loc[:,:'2017-07-08']
     paths = [p for p in tf.train.get_checkpoint_state(f'data/cpt/{PARAM_SETTING}').all_model_checkpoint_paths]
-    
     #tf.reset_default_graph()
     #preds = predict(paths, default_hparams(), back_offset=0,
     #                    n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True)
     t_preds = []
-    x_true = []
     for tm in range(Nmodels):
         tf.reset_default_graph()
-        _ = predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(PARAM_SETTING_FULL_NAME), history_window_size, horizon_window_size, back_offset=backoffset, return_x=return_x,
-                        n_models=Nmodels, target_model=tm, seed=2, batch_size=2048, asgd=True)        
-        if return_x:
-            t_preds.append(_[0])
-            x_true.append(_[1])
-        else:
-            t_preds.append(_)
-    #def predict(features_set, sampling_period, checkpoints, hparams, history_window_size, horizon_window_size, return_x=False, verbose=False, back_offset=0, n_models=1,
-    #            target_model=0, asgd=False, seed=1, batch_size=1024): #For predict: allow horizon_window_size to be fixed
-    
+        _ = predict(FEATURES_SET, SAMPLING_PERIOD, paths, TEST_dir, build_hparams(PARAM_SETTING_FULL_NAME), history, horizon, back_offset=backoffset, return_x=False,
+                    n_models=Nmodels, target_model=tm, seed=2, batch_size=batchsize, asgd=True)
+        t_preds.append(_)
+        
     
     # =============================================================================
     # average the N models predictions
@@ -140,7 +134,7 @@ def do_predictions_one_setting(history,horizon,backoffset,save_predictions,retur
     
     
-    print(f_preds)
+#    print(f_preds)
     
     # =============================================================================
     # save out all predictions all days (for our stuff will be relevant, for his Kaggle maybe just needed one day)
@@ -148,8 +142,8 @@ def do_predictions_one_setting(history,horizon,backoffset,save_predictions,retur
     #firstK = 1000 #for size issues, for now while dev, just a few to look at
     #ggg = f_preds.iloc[:firstK]
     #ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True)
-    if save_predictions:
-        f_preds.to_csv(f'{OUTPUT_DIR}/all_predictions_ours.csv.gz', compression='gzip', index=False, header=True)
+    #Instead of saving indivual, just wait and append and look at finals.
+#    f_preds.to_csv(f'{OUTPUT_DIR}/all_predictions_ours.csv.gz', compression='gzip', index=False, header=True)
     
     
@@ -157,49 +151,103 @@ def do_predictions_one_setting(history,horizon,backoffset,save_predictions,retur
     # =============================================================================
     # visualize to do wuick check
     # =============================================================================
-    randomK = 1000
-    print('Saving figs of {} time series as checks'.format(randomK))
-    pagenames = list(f_preds.index)
-    pages = np.random.choice(pagenames, size=min(randomK,len(pagenames)), replace=False)
-    N = pages.size
-    for jj, page in enumerate(pages):
-        print(f"{jj} of {N}")
-        plt.figure()
-        if DATA_TYPE=='kaggle':
-            prev.loc[page].fillna(0).plot()#logy=True)
-            f_preds.loc[page].fillna(0).plot(logy=True)
-        elif DATA_TYPE=='ours':
-            prev.loc[int(page)].plot()
-            f_preds.loc[page].plot()
-        plt.title(page)
-        if not os.path.exists(OUTPUT_DIR):
-            os.mkdir(OUTPUT_DIR)
-        pathname = os.path.join(OUTPUT_DIR, 'fig_{}.png'.format(jj))
-        plt.savefig(pathname)
-        plt.close()
+    if save_plots:
+        randomK = 1000
+        print('Saving figs of {} time series as checks'.format(randomK))
+        pagenames = list(f_preds.index)
+        pages = np.random.choice(pagenames, size=min(randomK,len(pagenames)), replace=False)
+        N = pages.size
+        for jj, page in enumerate(pages):
+            print(f"{jj} of {N}")
+            plt.figure()
+            if DATA_TYPE=='kaggle':
+                prev.loc[page].fillna(0).plot()#logy=True)
+                f_preds.loc[page].fillna(0).plot(logy=True)
+            elif DATA_TYPE=='ours':
+                prev.loc[int(page)].plot()
+                f_preds.loc[page].plot()
+            plt.title(page)
+            if not os.path.exists(OUTPUT_DIR):
+                os.mkdir(OUTPUT_DIR)
+            pathname = 'ddddddddddd'#os.path.join(OUTPUT_DIR, 'fig_{}.png'.format(jj))
+            plt.savefig(pathname)
+            plt.close()
         
         
     #Cannot view on the AWS so move to local:   
     #zip -r output.zip output
     #cp output.zip /home/...../sync
-    return preds, x_true
+    return f_preds
     
     
+def get_data_timesteps_Nseries(df_path):
+    """
+    Get the data_timesteps value from the TEST set data.
+    Because every day will be used, it is just the number of days in the df.
     
+    And get number of time series to prdict on [number of rows], to use as batchsize
+    """
+    df = pd.read_csv(df_path)
+    columns = list(df.columns)
+    columns.remove('Page')
+    return len(columns), len(df)
     
+
+
 if __name__ == '__main__':
+    
+    print('TEST_DF_PATH',TEST_DF_PATH)
+
+    groundtruth = pd.read_csv(TEST_DF_PATH)
+    groundtruth.sort_values(['Page'])    
+    
+    data_timesteps, N_series = get_data_timesteps_Nseries(TEST_DF_PATH)
+    
+    hist_horiz__all = {}
+#    hist_horiz__real_only = {}
+#    hist_horiz__dayofweek = {}
+#    hist_horiz__holidays = {}
     for history in HISTORY_SIZES:
         for horizon in HORIZON_SIZES:
             print('HISTORY ',history, 'of ', HISTORY_SIZES)
             print('HORIZON ',horizon, 'of ', HORIZON_SIZES)
             #Get the range of values that will step through for 
-            offs = [i for i in range(data_timesteps-history,horizon+1)]
+            offs = [i for i in range(horizon, data_timesteps - history +1, EVAL_STEP_SIZE)]
+            
+            
             for backoffset in offs:
                 print('backoffset ',backoffset, 'of ', offs)
-                preds, x_true = do_predictions_one_setting(history,horizon,backoffset,SAVE_PREDICTIONS,RETURN_X)
-                print(x_true)
-                print(preds)
-                x=yyyyyyyy                
+                f_preds = do_predictions_one_setting(history,horizon,backoffset,TEST_dir,SAVE_PLOTS,N_series)
+                print(f_preds)
+                
+                dates = f_preds.columns
+                print(dates)
+                
+                #For each series
+                inds = f_preds.index.values
+                for i, series in enumerate(f_preds):
+                    _id = inds[i]
+                    true = groundtruth.loc[groundtruth['Page']==_id]
+                    true = true.loc[true.isin(groundtruth)]
+                    print(true)
+                    #Get smape, mae, bias over this prediction
+                    smape = mean_smape(true, series.values)
+#                    mae = asdasdasd
+                    bias = mean_bias(true, series.values)                    
+                    hist_horiz__all[(history,horizon,backoffset,seris)] = {'SMAPE':smape, 'MAE':0, 'bias':bias}
+                
+
+                
+                #Care about the metrics within different partitions:
+                #Beside just history and horizon size, also consider:
+                #real vs. synthetic augmented series
+                #training ID vs. new ID only in TEST set
+                #series contains holiday vs. only non-holidays
+                #day of week
+                
+                
+                
+                print('f_preds',f_preds)
                 print('\n'*10)
\ No newline at end of file
diff --git a/Readme.md b/Readme.md
index 509df86..96e98d2 100755
--- a/Readme.md
+++ b/Readme.md
@@ -61,7 +61,7 @@ python3 make_features.py data/vars kaggle daily full --add_days=63
 
 #Just in case making new features
 cd data
-rm -R vars/
+rm -R vars*
 rm -R cpt/
 rm -R cpt_tmp/
 rm -R logs/
@@ -69,12 +69,16 @@ rm *.pkl
 cd ..
 ll data/
 
-python3 make_features.py data/vars ours daily full --add_days=50
+python3 make_features.py train ours daily full
+python3 make_features.py test ours daily full
+
 #python3 make_features.py data/vars kaggle daily full --add_days=63
 
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=100 --patience=5 --max_epoch=10 --save_epochs_performance
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=3 --patience=5 --max_epoch=15 --save_epochs_performance
+
+python3 RUN_ALL_PREDICTIONS.py
+
 
---horizon_window_size=50 --history_window_size=100
 
 
diff --git a/hparams.py b/hparams.py
index a899502..43222bf 100755
--- a/hparams.py
+++ b/hparams.py
@@ -58,7 +58,7 @@
     #Lookback K steps: [without specifying, default previous Kaggle setting is K=1]:
     #for predicting y_i, insteda of just feeding in previous K=1 prediction (y_i-1),
     #feed in all previous K predictions: y_
-    LOOKBACK_K = 3, #!!!!Can NOT set this to be bigger than min history size (history_window_size_minmax[0])
+    LOOKBACK_K = 1, #!!!!Can NOT set this to be bigger than min history size (history_window_size_minmax[0])
     #since then depending on random draw would possibly need to look back further than history size.
 )
 
diff --git a/input_pipe.py b/input_pipe.py
index 1a26f10..0a2a550 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -170,7 +170,7 @@ def cut(self, counts, start, end):
         
         #If used Arturius' original feature set then will include the lagged data:
 #        if self.features_set == 'arturius':
-        if self.inp.lagged_ix:
+        if self.features_set=='arturius':
             # Cut lagged counts
             # gather() accepts only int32 indexes
             cropped_lags = tf.cast(self.inp.lagged_ix[start:end], tf.int32)
@@ -195,7 +195,7 @@ def cut(self, counts, start, end):
 
 
-        if self.features_set=='arturius' or self.features_set=='full':#for now, for full just do sam [include lagged]
+        if self.features_set=='arturius':
             if self.sampling_period=='daily':
                 return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy, cropped_doy, cropped_year, cropped_holidays
             if self.sampling_period=='weekly':
@@ -203,8 +203,15 @@ def cut(self, counts, start, end):
             if self.sampling_period=='monthly':
                 return x_counts, y_counts, lagged_count, cropped_moy, cropped_year
             
-#        elif self.features_set=='full':
-#            return aaaaaaaaaaa #can drop lagged 
+            
+            
+        elif self.features_set=='full':
+            if self.sampling_period=='daily':
+                return x_counts, y_counts, cropped_dow, cropped_woy, cropped_doy, cropped_year, cropped_holidays
+            if self.sampling_period=='weekly':
+                return x_counts, y_counts, cropped_woy, cropped_year, cropped_holidays
+            if self.sampling_period=='monthly':
+                return x_counts, y_counts, cropped_moy, cropped_year, cropped_holidays
         else:
             print(self.features_set)
             raise Exception('problem with features_set')
@@ -280,20 +287,20 @@ def make_features(self, *args):
             if self.sampling_period == 'daily':
                 x_counts, y_counts, lagged_counts, dow, woy, doy, year, holidays, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args
             elif self.sampling_period == 'weekly':
-                x_counts, y_counts, lagged_counts, woy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args        
+                x_counts, y_counts, lagged_counts, woy, year, holidays, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args        
             elif self.sampling_period == 'monthly':
-                x_counts, y_counts, lagged_counts, moy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args          
+                x_counts, y_counts, lagged_counts, moy, year, holidays, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args          
         #For now just use the same ...
 #        count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance)
         elif self.features_set == 'full':
-            if self.sampling_period == 'daily':
-                x_counts, y_counts, lagged_counts, dow, woy, doy, year, holidays, page_ix, count_median, year_autocorr, quarter_autocorr,\
-                count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args
+            if self.sampling_period == 'daily': 
+                x_counts, y_counts, dow, woy, doy, year, holidays, page_ix, count_median,\
+                count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args                
             elif self.sampling_period == 'weekly':
-                x_counts, y_counts, lagged_counts, woy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\
+                x_counts, y_counts, woy, year, holidays, page_ix, count_median,\
                 count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args
             elif self.sampling_period == 'monthly':
-                x_counts, y_counts, lagged_counts, moy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\
+                x_counts, y_counts, moy, year, holidays, page_ix, count_median,\
                 count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args
         
         # =============================================================================
@@ -319,27 +326,16 @@ def make_features(self, *args):
         std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean)))
         norm_x_counts = (x_counts - mean) / std
         norm_y_counts = (y_counts - mean) / std
-        norm_lagged_counts = (lagged_counts - mean) / std
-
-        # Split lagged counts to train and test
-        x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.history_window_size, self.horizon_window_size], axis=0)
-
-
-        # Combine all page features into single tensor
-#        scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr,
-#                                                count_pctl_0,
-#                                                count_pctl_5,
-#                                                count_pctl_25,
-#                                                count_pctl_75,
-#                                                count_pctl_95,
-#                                                count_pctl_100,
-#                                                count_variance])
+        
         if self.features_set == 'arturius':
+            norm_lagged_counts = (lagged_counts - mean) / std
+            # Split lagged counts to train and test
+            x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.history_window_size, self.horizon_window_size], axis=0)            
             scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])
             flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
             series_features = tf.expand_dims(flat_features, 0)
         elif self.features_set == 'full':
-            scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance])
+            scalar_features = tf.stack([count_median, count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance])
             #flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) 
             flat_features = tf.concat([scalar_features], axis=0) 
             series_features = tf.expand_dims(flat_features, 0)
@@ -361,28 +357,43 @@ def make_features(self, *args):
         if self.sampling_period == 'daily':
             x_features = tf.concat([x_features, x_dow, x_woy, tf.expand_dims(x_doy,-1), x_year, x_holidays], axis=1)
         elif self.sampling_period == 'weekly':
-            x_features = tf.concat([x_features, x_woy, x_year], axis=1)            
+            x_features = tf.concat([x_features, x_woy, x_year, x_holidays], axis=1)            
         elif self.sampling_period == 'monthly':
-            x_features = tf.concat([x_features, x_moy, x_year], axis=1)             
+            x_features = tf.concat([x_features, x_moy, x_year, x_holidays], axis=1)             
+        
         #Regardess of period/frequency will have below features:
-        x_features = tf.concat([x_features, x_lagged,
-                                # Stretch series_features to all training days
-                                # [1, features] -> [n_timesteps, features]
-                                tf.tile(series_features, [self.history_window_size, 1])], axis=1)
-
+        if self.features_set == 'arturius':
+            x_features = tf.concat([x_features, x_lagged,
+                                    # Stretch series_features to all training days
+                                    # [1, features] -> [n_timesteps, features]
+                                    tf.tile(series_features, [self.history_window_size, 1])], axis=1)
+        elif self.features_set == 'full':
+            x_features = tf.concat([x_features, 
+                                    # Stretch series_features to all training days
+                                    # [1, features] -> [n_timesteps, features]
+                                    tf.tile(series_features, [self.history_window_size, 1])], axis=1)
+                
+                
         # Test features
         if self.sampling_period == 'daily':
             y_features = tf.concat([y_dow, y_woy, tf.expand_dims(y_doy,-1), y_year, y_holidays], axis=1)
         elif self.sampling_period == 'weekly':
-            y_features = tf.concat([y_woy, y_year], axis=1)
+            y_features = tf.concat([y_woy, y_year, y_holidays], axis=1)
         elif self.sampling_period == 'monthly':
-            y_features = tf.concat([y_moy, y_year], axis=1)
-        #Regardess of period/frequency will have below features:
-        y_features = tf.concat([y_features, y_lagged,
-                                # Stretch series_features to all testing days
-                                # [1, features] -> [n_timesteps, features]
-                                tf.tile(series_features, [self.horizon_window_size, 1])
-                                ], axis=1)
+            y_features = tf.concat([y_moy, y_year, y_holidays], axis=1)
+
+        if self.features_set == 'arturius':
+            y_features = tf.concat([y_features, y_lagged,
+                                    # Stretch series_features to all testing days
+                                    # [1, features] -> [n_timesteps, features]
+                                    tf.tile(series_features, [self.horizon_window_size, 1])
+                                    ], axis=1)
+        elif self.features_set == 'full':
+            y_features = tf.concat([y_features,
+                                    # Stretch series_features to all testing days
+                                    # [1, features] -> [n_timesteps, features]
+                                    tf.tile(series_features, [self.horizon_window_size, 1])
+                                    ], axis=1)                
 
 #        print(x_features)
         
@@ -392,11 +403,13 @@ def make_features(self, *args):
         print('x_features')
         print(x_features)
         print(x_features.shape)
-        return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix
+        if self.features_set == 'arturius':
+            return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix
+        if self.features_set == 'full':
+            return x_counts, x_features, norm_x_counts, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix
         #Must match up with setting self.XYZ = it_tensors below in __init__. 
 
 
-
     def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None,
                  batch_size=127, runs_in_burst=1, verbose=True, horizon_window_size=60, history_window_size=500,
                  train_completeness_threshold=1, predict_completeness_threshold=1, back_offset=0,
@@ -441,6 +454,9 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
                 "Predict+train window length (+predict window for validation) is larger than total number of days in dataset"
             self.start_offset = train_skip_first
         elif mode == ModelMode.EVAL or mode == ModelMode.PREDICT:
+            print('inp.data_timesteps',inp.data_timesteps)
+            print('history_window_size',history_window_size)
+            print('back_offset',back_offset)
             self.start_offset = inp.data_timesteps - history_window_size - back_offset #!!!!!
             if verbose:
                 train_start = inp.data_start + pd.Timedelta(self.start_offset, 'D')
@@ -535,12 +551,18 @@ def random_draw_new_window_sizes():
         it_tensors = self.iterator.get_next()
 
         # Assign all tensors to class variables
-#        if self.features_set=='arturius' or self.features_set=='full':
         #self.time_x is the tensor of features, regardless of which feature set, so this can stay same.
-        #But if not doing lagged then can return None for that ???
-        self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
-        self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures
-        print(self.true_x)
+        if self.features_set=='arturius':
+            self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
+            self.norm_std, self.series_features, self.page_ix = it_tensors
+        elif self.features_set=='full':
+            self.true_x, self.time_x, self.norm_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
+            self.norm_std, self.series_features, self.page_ix = it_tensors
+        print('self.true_x', self.true_x)
+        print('self.time_x', self.time_x)
+#        print('self.time_y', self.time_y)
+#        self.time_y = tf.Print(self.time_y,['self.time_y',self.time_y])
+        
         """if self.features_set=='simple':
             pass
 #        if self.features_set=='full':
@@ -581,7 +603,7 @@ def page_features(inp: VarFeeder, features_set):
         d = (inp.counts,
             inp.page_ix,
             inp.count_median,
-            inp.year_autocorr, inp.quarter_autocorr,
+#            inp.year_autocorr, inp.quarter_autocorr,
             inp.count_pctl_0,
             inp.count_pctl_5,
             inp.count_pctl_25,
diff --git a/make_features.py b/make_features.py
index ebd9d6d..7fb944b 100755
--- a/make_features.py
+++ b/make_features.py
@@ -18,8 +18,8 @@ def read_cached(name) -> pd.DataFrame:
     :param name: file name without extension
     :return: file content
     """
-    cached = 'data/%s.pkl' % name
-    sources = ['data/%s.csv' % name, 'data/%s.csv.zip' % name]
+    cached = '%s.pkl' % name
+    sources = ['%s.csv' % name, '%s.csv.zip' % name]
     if os.path.exists(cached):
         return pd.read_pickle(cached)
     else:
@@ -30,7 +30,7 @@ def read_cached(name) -> pd.DataFrame:
                 return df
 
 
-def read_all(data_type,sampling_period) -> pd.DataFrame:
+def read_all(data_type,sampling_period,mode) -> pd.DataFrame:
     """
     Reads source data for training/prediction
     """
@@ -43,17 +43,23 @@ def read_file(file):
         return df
 
     # Path to cached data
-    path = os.path.join('data', 'all.pkl')
+    if mode=='train':
+        path = os.path.join('data', 'all_TRAIN.pkl')
+    elif mode=='test':
+        path = os.path.join('data', 'all_TEST.pkl')
+        
     if os.path.exists(path):
         df = pd.read_pickle(path)
     else:
-        # Official data
-        filename = f'train_2_{data_type}_{sampling_period}'
+        end = '' if mode=='train' else '_TEST'
+        if data_type=='kaggle':
+            end='' 
+        filename = f'data/train_2_{data_type}_{sampling_period}{end}'
         df = read_file(filename)
         
         if data_type=='kaggle':
             # Scraped data
-            scraped = read_file('2017-08-15_2017-09-11')
+            scraped = read_file(r'data/2017-08-15_2017-09-11')
             # Update last two days by scraped data
             df[pd.Timestamp('2017-09-10')] = scraped['2017-09-10']
             df[pd.Timestamp('2017-09-11')] = scraped['2017-09-11']
@@ -63,6 +69,9 @@ def read_file(file):
         df.to_pickle(path)
     return df
 
+
+
+
 ## todo:remove
 #def make_holidays(tagged, start, end) -> pd.DataFrame:
 #    def read_df(lang):
@@ -76,11 +85,11 @@ def read_file(file):
 #    return result
 
 
-def read_x(start, end, data_type, sampling_period) -> pd.DataFrame:
+def read_x(start, end, data_type, sampling_period, mode) -> pd.DataFrame:
     """
     Gets source data from start to end date. Any date can be None
     """
-    df = read_all(data_type,sampling_period)
+    df = read_all(data_type,sampling_period,mode)
     # User GoogleAnalitycsRoman has really bad data with huge traffic spikes in all incarnations.
     # Wikipedia banned him, we'll ban it too
 #    bad_roman = df.index.str.startswith("User:GoogleAnalitycsRoman")
@@ -172,7 +181,7 @@ def find_start_end(data: np.ndarray):
     return start_idx, end_idx
 
 
-def prepare_data(start, end, valid_threshold, data_type, sampling_period) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]:
+def prepare_data(start, end, valid_threshold, data_type, sampling_period, mode) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]:
     """
     Reads source data, calculates start and end of each series, drops bad series, calculates log1p(series)
     :param start: start date of effective time interval, can be None to start from beginning
@@ -181,7 +190,7 @@ def prepare_data(start, end, valid_threshold, data_type, sampling_period) -> Tup
     ratio is less than threshold
     :return: tuple(log1p(series), nans, series start, series end)
     """
-    df = read_x(start, end, data_type, sampling_period)
+    df = read_x(start, end, data_type, sampling_period, mode)
     starts, ends = find_start_end(df.values)
     # boolean mask for bad (too short) series
     page_mask = (ends - starts) / df.shape[1] < valid_threshold
@@ -274,7 +283,8 @@ def normalize(values: np.ndarray):
 
 def run():
     parser = argparse.ArgumentParser(description='Prepare data')
-    parser.add_argument('data_dir')
+    #parser.add_argument('data_dir')
+    parser.add_argument('mode', help="Which mode running in, determines some directories: {'train','test'}")
     
     parser.add_argument('data_type', help="Which data set to use: {'kaggle','ours'}")
     parser.add_argument('sampling_period', help="Sampling period for our data: {'daily','weekly','monthly'}")
@@ -287,10 +297,15 @@ def run():
     parser.add_argument('--corr_backoffset', default=0, type=int, help='Offset for correlation calculation')
     args = parser.parse_args()
 
-    print(args.data_dir, args.data_type, args.features_set)
+
+    if args.mode=='train':
+        data_dir = r"data/vars_TRAIN" 
+    elif args.mode=='test':
+        data_dir = r"data/vars_TEST"
+    print(data_dir, args.data_type, args.features_set)
 
     # Get the data
-    df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type, args.sampling_period)
+    df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type, args.sampling_period, args.mode)
 
     # =============================================================================
     # STATIC FEATURES
@@ -452,12 +467,12 @@ def run():
     elif (args.features_set == 'full') or (args.features_set == 'full_w_context'):
         tensors = dict(
             counts=df,
-            lagged_ix=lagged_ix,
-            page_map=np.zeros(len(df)),#just set to a dummy all 0's
+#            lagged_ix=lagged_ix,
+            page_map=np.zeros(len(df)),#just set to a dummy all 0's  #could also use this place to code 0,1 for real vs. synthetic augmented data... #page_map is only used in "Splitter", which is only for side_split validation option.
             page_ix=df.index.values,#!!!!!! 
 
-            year_autocorr=year_autocorr,
-            quarter_autocorr=quarter_autocorr,
+#            year_autocorr=year_autocorr,
+#            quarter_autocorr=quarter_autocorr,
             count_median=count_median,#this is just the median feature, can put in others too
             count_variance=count_variance,#variance
             #entropy
@@ -528,7 +543,7 @@ def run():
     print(plain.keys())
 
     # Store data to the disk
-    VarFeeder(args.data_dir, tensors, plain)
+    VarFeeder(data_dir, tensors, plain)
 
 
 if __name__ == '__main__':
diff --git a/model.py b/model.py
index 7e3a46f..088d9e0 100755
--- a/model.py
+++ b/model.py
@@ -88,8 +88,17 @@ def build_rnn():
                    dropout=hparams.encoder_dropout if is_train else 0, seed=seed)
 
     static_p_size = cuda_params_size(build_rnn)
+#    static_p_size = tf.Print(static_p_size,['static_p_size',static_p_size])
     cuda_model = build_rnn()
+    
+    
+#    time_inputs = tf.check_numerics(time_inputs,'time_inputs')
+
+    
+    
     params_size_t = cuda_model.params_size()
+#    params_size_t = tf.Print(static_p_size,['params_size_t',params_size_t])
+#    print('params_size_t',params_size_t)
     with tf.control_dependencies([tf.assert_equal(params_size_t, [static_p_size])]):
         cuda_params = tf.get_variable("cuda_rnn_params",
                                       initializer=tf.random_uniform([static_p_size], minval=-0.05, maxval=0.05,
@@ -97,7 +106,7 @@ def build_rnn():
                                       )
 
     def build_init_state():
-        batch_len = tf.shape(time_inputs)[0]
+        batch_len = tf.shape(time_inputs)[0] #!!!!!!!! for random history/horizon size, may need to adjust
         return tf.zeros([hparams.encoder_rnn_layers, batch_len, hparams.rnn_depth], dtype=tf.float32)
 
     input_h = build_init_state()
@@ -105,6 +114,10 @@ def build_init_state():
     # [batch, time, features] -> [time, batch, features]
     time_first = tf.transpose(time_inputs, [1, 0, 2])
     rnn_time_input = time_first
+    
+    
+#    cuda_params = tf.Print(cuda_params,['cuda_params',tf.shape(cuda_params),cuda_params]) #???? shape is [233892]
+    
     model = partial(cuda_model, input_data=rnn_time_input, input_h=input_h, params=cuda_params)
     if RNN == tf.contrib.cudnn_rnn.CudnnLSTM:
         rnn_out, rnn_state, c_state = model(input_c=build_init_state())
@@ -113,6 +126,16 @@ def build_init_state():
         c_state = None
     if transpose_output:
         rnn_out = tf.transpose(rnn_out, [1, 0, 2])
+        
+    
+    #Need to check for NANs that are sometimes happening
+    rnn_out = tf.check_numerics(rnn_out,'rnn_out')    
+    rnn_state = tf.check_numerics(rnn_state,'rnn_state')
+
+        
+#    rnn_out = tf.Print(rnn_out,['rnn_out',rnn_out])
+#    rnn_state = tf.Print(rnn_state,['rnn_state',rnn_state,'encoder_features_depth',encoder_features_depth])
+#    encoder_features_depth = tf.Print(encoder_features_depth,['encoder_features_depth',encoder_features_depth])
     return rnn_out, rnn_state, c_state
 
 
@@ -351,9 +374,14 @@ def wrap_dropout(structure):
     # encoder_layers < decoder_layers: feed encoder outputs to lower decoder layers, feed zeros to top layers
     h_layers = tf.unstack(h_state)
     
+#    h_layers = tf.Print(h_layers,['h_layers',h_layers])
+    
     #Regardless of relative number of layers in encoder vs. decoder, simple approach is 
     #use topmost encoder layer hidden state as the (fixed) context
     encoded_representation = wrap_dropout(h_layers[-1])
+    
+#    encoded_representation = tf.Print(encoded_representation,['encoded_representation',encoded_representation])
+    
     #above uses a different random dropout for the "encoded representaiton" than the actual top level output.
     #This is possibly a good regularization thing since we dont expect the final hidden state to be  perfect summar/context vector,
     #so a little randomness is probably good here.
@@ -416,8 +444,18 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         self.lookback_K_actual = min(hparams.LOOKBACK_K, hparams.history_window_size_minmax[0])
         print('self.lookback_K_actual',self.lookback_K_actual)
 
+
+#        inp.time_x = tf.Print(inp.time_x, ['where NANs in inp.time_x :', tf.where(tf.is_nan(inp.time_x))])
+#        inp.time_x = tf.check_numerics(inp.time_x,'inp.time_x has NANs')
+
+
+
         encoder_output, h_state, c_state = make_encoder(inp.time_x, inp.encoder_features_depth, is_train, hparams, seed,
                                                         transpose_output=False)
+        
+#        h_state = tf.Print(h_state,['h_state',h_state,'encoder_output',encoder_output,'inp.time_x',inp.time_x])
+        
+        
         # Encoder activation losses
         enc_stab_loss = rnn_stability_loss(encoder_output, hparams.encoder_stability_loss / inp.history_window_size)
         enc_activation_loss = rnn_activation_loss(encoder_output, hparams.encoder_activation_loss / inp.history_window_size)
@@ -447,6 +485,12 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
                                                         attn_features if hparams.use_attn else None,
                                                         summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
                                                         inp.time_y, inp.norm_x[:, -self.lookback_K_actual:]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
+        
+        
+#        decoder_targets = tf.Print(decoder_targets,['encoder_state',encoder_state,'inp.time_y',inp.time_y,'inp.norm_x',inp.norm_x])
+#        decoder_targets = tf.Print(decoder_targets,['decoder_targets',decoder_targets,'decoder_outputs',decoder_outputs])
+        
+        
         # Decoder activation losses
         dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.horizon_window_size)
         dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.horizon_window_size)
diff --git a/trainer.py b/trainer.py
index c0b7f02..b6ad598 100755
--- a/trainer.py
+++ b/trainer.py
@@ -413,7 +413,7 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
         tf.set_random_seed(seed)
 
     with tf.device("/cpu:0"):
-        inp = VarFeeder.read_vars("data/vars")
+        inp = VarFeeder.read_vars("data/vars_TRAIN")
         if side_split:
             splitter = Splitter(page_features(inp, features_set), inp.page_map, 3, train_sampling=train_sampling,
                                 test_sampling=eval_sampling, seed=seed)
@@ -730,16 +730,31 @@ def ema_vars(model):
         return np.mean(best_epoch_smape, dtype=np.float64)
 
 
-def predict(features_set, sampling_period, checkpoints, hparams, history_window_size, horizon_window_size, return_x=False, verbose=False, back_offset=0, n_models=1,
+def predict(features_set, sampling_period, checkpoints, TEST_dir, hparams, history_window_size, horizon_window_size, return_x=False, verbose=False, back_offset=0, n_models=1,
             target_model=0, asgd=False, seed=1, batch_size=1024): #For predict: allow horizon_window_size to be fixed
     with tf.variable_scope('input') as inp_scope:
         with tf.device("/cpu:0"):
-            inp = VarFeeder.read_vars("data/vars")
+#            inp = VarFeeder.read_vars("data/vars")
+            inp = VarFeeder.read_vars(TEST_dir)
+#            tf.Print(inp,['inp.counts',inp.counts])
+            print(inp)
+#            try:
+#            
+#            except:
+#                pass
             pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size,
-                             train_completeness_threshold=0.01,
+                             #train_completeness_threshold=0.01,
+                             train_completeness_threshold=1.0,
                              horizon_window_size=horizon_window_size,
-                             predict_completeness_threshold=0.0, history_window_size=history_window_size,#hparams.history_window_size,
+                             predict_completeness_threshold=0.0, history_window_size=history_window_size,
                              back_offset=back_offset)
+            
+            print('pipe.time_x',pipe.time_x)
+            _ = tf.where(tf.is_nan(pipe.time_x))
+            #pipe.time_x = tf.Print(pipe.time_x, ['where NANs in inp.time_x :', tf.shape(_), _, _[0], _[1], _[2], _[-3], _[-2], _[-1]])
+#            pipe.time_x = tf.Print(pipe.time_x, ['where NANs in inp.time_x :', tf.shape(_), _])
+            pipe.time_x = tf.check_numerics(pipe.time_x,'pipe.time_x has NANs')            
+            
     asgd_decay = 0.99 if asgd else None
     if n_models == 1:
         model = Model(pipe, hparams, is_train=False, seed=seed, asgd_decay=asgd_decay)
@@ -765,55 +780,50 @@ def predict(features_set, sampling_period, checkpoints, hparams, history_window_
     with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
         pipe.load_vars(sess)
         for checkpoint in checkpoints:
+            print('checkpoint',checkpoint)
             pred_buffer = []
-              .init_iterator(sess)
+            pipe.init_iterator(sess)
             saver.restore(sess, checkpoint)
-            cnt = 0
-            while True:
-                try:
-                    if return_x:
-                        pred, x, pname = sess.run([model.predictions, model.inp.true_x, model.inp.page_ix])
-                    else:
-                        pred, pname = sess.run([model.predictions, model.inp.page_ix])
-
-                    #Our data already has page names (id's) as ints, so this decoding won't work, so just do str(id)
-                    try:
-                        utf_names = [str(name, 'utf-8') for name in pname]
-                    except UnicodeDecodeError:
-                        utf_names = [str(name) for name in pname]
-                        
-                    pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred))
-                    print(pred_df)
-                    
-                    pred_buffer.append(pred_df)
-                    if return_x:
-                        # noinspection PyUnboundLocalVariable
-                        x_values = pd.DataFrame(index=utf_names, data=np.round(np.expm1(x)).astype(np.int64))
-                        x_buffer.append(x_values)
-                    newline = cnt % 80 == 0
-                    if cnt > 0:
-                        print('.', end='\n' if newline else '', flush=True)
-                    if newline:
-                        print(cnt, end='')
-                    cnt += 1
-                except tf.errors.OutOfRangeError:
-                    print('🎉')
-                    break
+            
+            if return_x:
+                pred, x, pname = sess.run([model.predictions, model.inp.true_x, model.inp.page_ix])
+#                print('pred',pred)
+#                print('x',x)
+#                print('pname',pname)
+            else:
+                pred, pname = sess.run([model.predictions, model.inp.page_ix])
+
+            #Our data already has page names (id's) as ints, so this decoding won't work, so just do str(id)
+            try:
+                utf_names = [str(name, 'utf-8') for name in pname]
+            except UnicodeDecodeError:
+                utf_names = [str(name) for name in pname]
+                
+            pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred))
+#            print(pred_df)
+#            print()
+            pred_buffer.append(pred_df)
+            if return_x:
+                # noinspection PyUnboundLocalVariable
+                x_values = pd.DataFrame(index=utf_names, data=np.round(np.expm1(x)).astype(np.int64))
+                x_buffer.append(x_values)
+            
+            
             cp_predictions = pd.concat(pred_buffer)
             if predictions is None:
                 predictions = cp_predictions
             else:
                 predictions += cp_predictions
     predictions /= len(checkpoints) #Since it is averaging predictions over the chckpoints
-    offset = pd.Timedelta(back_offset, 'D') #!!!!!!!!!!!! need to change these lines when sampling WEEKLY MONTHLY
-    start_prediction = inp.data_end + pd.Timedelta('1D') - offset
+    
+    #!!!!!!!!!!!! need to change these lines when sampling WEEKLY MONTHLY
+    start_prediction = inp.data_end + pd.Timedelta('1D') - pd.Timedelta(back_offset, 'D')
     end_prediction = start_prediction + pd.Timedelta(horizon_window_size - 1, 'D')
     predictions.columns = pd.date_range(start_prediction, end_prediction)
     if return_x:
         x = pd.concat(x_buffer)
-        #start_data = inp.data_end - pd.Timedelta(hparams.history_window_size - 1, 'D') - back_offset
-        start_data = inp.data_end - pd.Timedelta(history_window_size - 1, 'D') - back_offset #!!!!!now for heatmaps
-        end_data = inp.data_end - back_offset
+        start_data = inp.data_end - pd.Timedelta(history_window_size - 1, 'D') - pd.Timedelta(back_offset, 'D')
+        end_data = inp.data_end - pd.Timedelta(back_offset, 'D')
         x.columns = pd.date_range(start_data, end_data)
         return predictions, x
     else:

From d63c3e493f90e09f29fd94914e97f2647c72aeb6 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Tue, 31 Jul 2018 09:44:29 -0700
Subject: [PATCH 31/42] saving out smapes, bias

---
 RUN_ALL_PREDICTIONS.py | 71 +++++++++++++++++++++++++++++-------------
 1 file changed, 49 insertions(+), 22 deletions(-)

diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index 0697373..59b03e3 100644
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -13,6 +13,8 @@
 
 from make_features import read_all
 
+import pickle
+import time
 
 
@@ -20,8 +22,8 @@
 # 
 # =============================================================================
 #For histories, we care most about shorter series, so sample lower numbers more densely
-HISTORY_SIZES=[7,350]#[7,8,10,15,20,25,35,50,70,100,150,250,350]
-HORIZON_SIZES=[7,60]#[7,10,20,30,40,50,60]
+HISTORY_SIZES=[7,8,10,12,15,20,30,50,70,100,150,250,366]
+HORIZON_SIZES=[7,10,20,30,40,50,60]
 EVAL_STEP_SIZE=4#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
 
 # =============================================================================
@@ -67,11 +69,18 @@ def mean_smape(true, pred):
     masked_smape = np.ma.array(raw_smape, mask=np.isnan(raw_smape))
     return masked_smape.mean()
 
-def mean_bias(true, pred):
+def bias(true, pred):
     """
     Check if the forecasts are biased up or down
     """
-    return np.mean(np.sum(true - pred) / np.sum(true + pred))
+    summ = true + pred
+    bias = np.where(summ == 0, 0, (true - pred) / summ)
+    return 100. * bias
+
+def mean_bias(true, pred):
+    raw_bias = bias(true, pred)
+    masked_bias = np.ma.array(raw_bias, mask=np.isnan(raw_bias))
+    return raw_bias.mean()
 
 
@@ -91,6 +100,7 @@ def do_predictions_one_setting(history,horizon,backoffset,TEST_dir,save_plots,n_
     
 
     batchsize = n_series #For simplicity, just do all series at once if not too many for memory
+    print('batchsize',batchsize)
     # =============================================================================
     # 
     # =============================================================================
@@ -201,7 +211,11 @@ def get_data_timesteps_Nseries(df_path):
     print('TEST_DF_PATH',TEST_DF_PATH)
 
     groundtruth = pd.read_csv(TEST_DF_PATH)
+#    print('groundtruth',groundtruth)
     groundtruth.sort_values(['Page'])    
+    print('groundtruth',groundtruth)
+#    print(groundtruth[groundtruth['Page']==1])
+
     
     data_timesteps, N_series = get_data_timesteps_Nseries(TEST_DF_PATH)
     
@@ -209,6 +223,7 @@ def get_data_timesteps_Nseries(df_path):
 #    hist_horiz__real_only = {}
 #    hist_horiz__dayofweek = {}
 #    hist_horiz__holidays = {}
+    t0 = time.clock()
     for history in HISTORY_SIZES:
         for horizon in HORIZON_SIZES:
             print('HISTORY ',history, 'of ', HISTORY_SIZES)
@@ -220,25 +235,34 @@ def get_data_timesteps_Nseries(df_path):
             for backoffset in offs:
                 print('backoffset ',backoffset, 'of ', offs)
                 f_preds = do_predictions_one_setting(history,horizon,backoffset,TEST_dir,SAVE_PLOTS,N_series)
-                print(f_preds)
+#                print(f_preds)
+                #!!!! save out the set of predictions as excel multiple sheets
+                #savename = f"{history}_{horizon}.xlsx"
+                #each sheet is for a single backoffset, so each sheet contains all ~1800 id's
                 
-                dates = f_preds.columns
+                dates = [i.strftime('%Y-%m-%d') for i in f_preds.columns]
                 print(dates)
                 
                 #For each series
-                inds = f_preds.index.values
-                for i, series in enumerate(f_preds):
-                    _id = inds[i]
-                    true = groundtruth.loc[groundtruth['Page']==_id]
-                    true = true.loc[true.isin(groundtruth)]
-                    print(true)
+#                inds = f_preds.index.values
+#                print('inds',inds)
+                for i in range(len(f_preds)):
+                    series = f_preds.iloc[i]
+                    _id = series.name
+                    true = groundtruth[groundtruth['Page'].astype(str) ==_id]
+#                    print('true',true)
+                    true = true[dates].values[0]
                     #Get smape, mae, bias over this prediction
-                    smape = mean_smape(true, series.values)
+                    smp = mean_smape(true, series.values)
 #                    mae = asdasdasd
-                    bias = mean_bias(true, series.values)                    
-                    hist_horiz__all[(history,horizon,backoffset,seris)] = {'SMAPE':smape, 'MAE':0, 'bias':bias}
-                
-
+                    bi = mean_bias(true, series.values)
+#                    print(smape,bias)
+                    hist_horiz__all[(history,horizon,backoffset,_id)] = {'SMAPE':smp, 
+                                    'bias':bi,
+                                    #'MAE':mae,
+                                    'predict_start_date':dates[0],
+                                    'predict_end_date':dates[-1]}
+#                    print(hist_horiz__all)
                 
                 #Care about the metrics within different partitions:
                 #Beside just history and horizon size, also consider:
@@ -246,8 +270,11 @@ def get_data_timesteps_Nseries(df_path):
                 #training ID vs. new ID only in TEST set
                 #series contains holiday vs. only non-holidays
                 #day of week
-                
-                
-                
-                print('f_preds',f_preds)
-                print('\n'*10)
\ No newline at end of file
+    
+    t1 = time.clock()
+    print('elapsed time: ',t1-t0)
+    #Now that all metrics stored in dict, save dict, and analyze further
+    #pickle ... hist_horiz__all
+#    print(hist_horiz__all)
+    with open(r"hist_horiz__all.pickle", "wb") as outp:
+        pickle.dump(hist_horiz__all, outp)
\ No newline at end of file

From 697e98f1314f50efc213191f013b72d057380f96 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Wed, 1 Aug 2018 15:35:13 -0700
Subject: [PATCH 32/42] performance heatmaps for 1 time test validation  done

---
 .gitignore              |   1 +
 PERFORMANCE_HEATMAPS.py | 163 ++++++++++++++++++++++++++++++++++------
 RUN_ALL_PREDICTIONS.py  | 101 +++++++++++++++++++------
 trainer.py              |   3 +-
 4 files changed, 221 insertions(+), 47 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8be5e29..44207ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ images/
 ex_figs/
 
 *.png
+output/
diff --git a/PERFORMANCE_HEATMAPS.py b/PERFORMANCE_HEATMAPS.py
index 9da96a9..5022f49 100644
--- a/PERFORMANCE_HEATMAPS.py
+++ b/PERFORMANCE_HEATMAPS.py
@@ -1,32 +1,153 @@
-import numpy as np
+#Analyze the different performance metrics 
+#Make the performance heatmaps
+
+#There will be 4 different TRAIN-TEST sets, 
+#each has a model trained on that train set and tested on that test set.
+#So asssume to simulate production environment where we would retrain model 
+#every so often, we have e.g. 4 tests of the model, each with say 3 months more 
+#data appended to it. So, assume we will just do 4 separate analyses.
+
+import matplotlib
+matplotlib.use('Agg')
 import matplotlib.pyplot as plt
-import argparse
+
+import os
+import numpy as np
+import pickle
+from collections import defaultdict
+
+
+# =============================================================================
+# PARAMETERS
+# =============================================================================
+OUTDIR = 'output'
 
 
+# =============================================================================
+# MAIN
+# =============================================================================
 
-def make_heatmaps(logdir='data/logs', K_last=3):
-    #Load all saved numpy arrays of performance metrics per PREDICTION run:
-    all_runs = []
-    eval_smapes_lastKmean = []
-    array_names = [i for i in ssssss if i.endswith('epochs_performance.npy')]
-    run_names = [i.split('_')[0] for i in array_names]
-    for i, an in enumerate(array_names):
-        x = np.load(an)
-        #Get last K epoch metrics:
-        j = x[-K_last:]
-        eval_smapes_lastKmean.append(np.mean(j[:,5]))
-        all_runs.append(x)
+def load_dict(path):
+    with open(path,'rb') as gg:
+        d = pickle.load(gg)
+#    print(d)
+    return d
+    
+    
+def aggregate__overall(data_dict):
+    """
+    For each (history,horizon) pair, marginalized over all id's and dates
+    
+    format is (history,horizon,backoffset,id) : {'SMAPE':smape, 'bias':bi, #'MAE':mae, 'predict_start_date':dates[0], 'predict_end_date':dates[-1]}
+    """
+#    print(data_dict.items())
+    agg_dict = defaultdict(lambda:[])
+    for k,v in data_dict.items():
+        history = k[0]
+        horizon = k[1]
+        smape = v['SMAPE']
+        agg_dict[(history,horizon)] += [smape]        
+    
+    
+    
+    #Now get mean SMAPE
+    metrics_dict = {}
+    for k,v in agg_dict.items():
+        mean = np.nanmean(v)
+        median = np.nanmedian(v)
+        sd = np.nanstd(v)
+        pctl_5 = np.percentile([i for i in v if np.isfinite(i)],5)#nanpercentile
+        pctl_95 = np.percentile([i for i in v if np.isfinite(i)],95)
+        metrics_dict[k] = {'mean':mean, 'median':median, 'sd':sd, '5pctl':pctl_5, '95pctl':pctl_95}
+    
+    histories = list(np.unique([i[0] for i in metrics_dict.keys()]))
+    horizons = list(np.unique([i[1] for i in metrics_dict.keys()]))
+#    print(metrics_dict)
+#    print(histories)
+#    print(horizons)
+    
+    metrics_arrays = {}
+    for metric in ['mean','median']:
+        _array = np.nan * np.ones((len(histories),len(horizons)))
+        for k,v in metrics_dict.items():
+            i = histories.index(k[0])
+            j = horizons.index(k[1])
+            _array[i,j] = v[metric]
+        metrics_arrays[metric] = _array
+    print(metrics_arrays)
+    return metrics_dict, histories, horizons, metrics_arrays
+    
+
+
+def make_heatmap(metrics_arrays, histories, horizons, outdir):
+    """
+    Visualize the SMAPE values
+    """
+    #For scale, get highest value for heatmap.
+    #Use 200 (worst possible SMAPE), vs.
+    #to improve dynamic range use the highest measured SMAPE value from the heatmaps
+    
+    print('metrics_arrays')
+    print(metrics_arrays)
+    for k,v in metrics_arrays.items():
         
+        vmax = np.nanmin([200.,np.nanmax(np.ceil(v))])
         
+        plt.figure()
+        plt.imshow(v,vmin=0.,vmax=vmax)
+        plt.title(k,fontsize=15)
+        plt.colorbar()
+        plt.xlabel('Horizon',fontsize=15)
+        plt.ylabel('History',fontsize=15)
+        plt.xticks(np.arange(len(horizons)),horizons,fontsize=15)
+        plt.yticks(np.arange(len(histories)),histories,fontsize=15)
+    #    plt.grid()
+        savepath = os.path.join(outdir,f'history_horizon_heatmap__{k}.png')
+        plt.savefig(savepath)
+    
+    
+    
+    
 
 
-if __name__=='__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--logdir', default='data/logs', help="Directory where numpy arrays of performance are")
-    parser.add_argument('--K_last', default=3, dest='K_last', help='Save out per EPOCH metrics (NOT per step, only per EPOCH')
-    args = parser.parse_args()
-    param_dict = dict(vars(args))
 
-    make_heatmaps(**param_dict)
\ No newline at end of file
+
+
+#def make_heatmaps(logdir='data/logs', K_last=3):
+#    #Load all saved numpy arrays of performance metrics per PREDICTION run:
+#    all_runs = []
+#    eval_smapes_lastKmean = []
+#    array_names = [i for i in ssssss if i.endswith('epochs_performance.npy')]
+#    run_names = [i.split('_')[0] for i in array_names]
+#    for i, an in enumerate(array_names):
+#        x = np.load(an)
+#        #Get last K epoch metrics:
+#        j = x[-K_last:]
+#        eval_smapes_lastKmean.append(np.mean(j[:,5]))
+#        all_runs.append(x)
+#        
+#        
+#
+#
+if __name__=='__main__':
+#    parser = argparse.ArgumentParser()
+#    parser.add_argument('--logdir', default='data/logs', help="Directory where numpy arrays of performance are")
+#    parser.add_argument('--K_last', default=3, dest='K_last', help='Save out per EPOCH metrics (NOT per step, only per EPOCH')
+#    args = parser.parse_args()
+#    param_dict = dict(vars(args))
+#
+#    make_heatmaps(**param_dict)
+    
+    
+    #for each of the 4 dicts:
+    
+    path = os.path.join(OUTDIR,'hist_horiz__all.pickle')
+    data = load_dict(path)
+    metrics_dict, histories, horizons, metrics_arrays = aggregate__overall(data)
+    make_heatmap(metrics_arrays, histories, horizons, OUTDIR)
+    #Save out the metrics dict    
+    dict_savename = os.path.join(OUTDIR,"hist_horiz__all__metrics.pickle")
+    with open(dict_savename, "wb") as outp:
+        pickle.dump(metrics_dict, outp)    
\ No newline at end of file
diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index 59b03e3..69e1c63 100644
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -1,8 +1,8 @@
 import tensorflow as tf
 
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
+#import matplotlib
+#matplotlib.use('Agg')
+#import matplotlib.pyplot as plt
 
 import os
 import pandas as pd
@@ -15,16 +15,16 @@
 
 import pickle
 import time
-
+from pandas import ExcelWriter
 
 
 # =============================================================================
 # 
 # =============================================================================
 #For histories, we care most about shorter series, so sample lower numbers more densely
-HISTORY_SIZES=[7,8,10,12,15,20,30,50,70,100,150,250,366]
-HORIZON_SIZES=[7,10,20,30,40,50,60]
-EVAL_STEP_SIZE=4#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
+HISTORY_SIZES=[200,300]#[7,8,10,12,15,20,30,50,70,100,150,250,366]
+HORIZON_SIZES=[80,150]#[7,10,20,30,40,50,60]
+EVAL_STEP_SIZE=60#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
 
 # =============================================================================
 # PARAMETRS
@@ -72,9 +72,13 @@ def mean_smape(true, pred):
 def bias(true, pred):
     """
     Check if the forecasts are biased up or down
+    
+    All of the predictions have already been clipped to 0 min.
+    Actual is always nonnegative (and 0 means missing so can mask)
+    So if pred+true is 0, means missing, can ignore those
     """
-    summ = true + pred
-    bias = np.where(summ == 0, 0, (true - pred) / summ)
+    summ = pred + true
+    bias = np.where(summ == 0, 0, (pred - true) / summ)
     return 100. * bias
 
 def mean_bias(true, pred):
@@ -203,6 +207,22 @@ def get_data_timesteps_Nseries(df_path):
     columns = list(df.columns)
     columns.remove('Page')
     return len(columns), len(df)
+
+
+
+
+
+
+def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
+    """
+    xls_path - must be .xls or else will not save sheets properly
+    """
+    writer = ExcelWriter(xls_path)
+    for s in range(len(list_dfs)):
+        list_dfs[s].to_excel(writer,list_sheetnames[s],index=False)
+    writer.save()
+    
+    
     
 
@@ -211,11 +231,11 @@ def get_data_timesteps_Nseries(df_path):
     print('TEST_DF_PATH',TEST_DF_PATH)
 
     groundtruth = pd.read_csv(TEST_DF_PATH)
-#    print('groundtruth',groundtruth)
     groundtruth.sort_values(['Page'])    
     print('groundtruth',groundtruth)
-#    print(groundtruth[groundtruth['Page']==1])
 
+    if not os.path.exists(OUTPUT_DIR):
+        os.makedirs(OUTPUT_DIR)
     
     data_timesteps, N_series = get_data_timesteps_Nseries(TEST_DF_PATH)
     
@@ -228,26 +248,24 @@ def get_data_timesteps_Nseries(df_path):
         for horizon in HORIZON_SIZES:
             print('HISTORY ',history, 'of ', HISTORY_SIZES)
             print('HORIZON ',horizon, 'of ', HORIZON_SIZES)
+            if history+horizon >= data_timesteps:
+                print(f'history+horizon ({history+horizon}) >= data set size ({data_timesteps})')
+                continue
+            
             #Get the range of values that will step through for 
             offs = [i for i in range(horizon, data_timesteps - history +1, EVAL_STEP_SIZE)]
             
-            
+            dflist = []
             for backoffset in offs:
                 print('backoffset ',backoffset, 'of ', offs)
                 f_preds = do_predictions_one_setting(history,horizon,backoffset,TEST_dir,SAVE_PLOTS,N_series)
-#                print(f_preds)
-                #!!!! save out the set of predictions as excel multiple sheets
-                #savename = f"{history}_{horizon}.xlsx"
-                #each sheet is for a single backoffset, so each sheet contains all ~1800 id's
-                
-                dates = [i.strftime('%Y-%m-%d') for i in f_preds.columns]
+                cols = f_preds.columns
+                dates = [i.strftime('%Y-%m-%d') for i in cols]
                 print(dates)
                 
                 #For each series
-#                inds = f_preds.index.values
-#                print('inds',inds)
-                for i in range(len(f_preds)):
-                    series = f_preds.iloc[i]
+                for jj in range(len(f_preds)):
+                    series = f_preds.iloc[jj]
                     _id = series.name
                     true = groundtruth[groundtruth['Page'].astype(str) ==_id]
 #                    print('true',true)
@@ -263,18 +281,53 @@ def get_data_timesteps_Nseries(df_path):
                                     'predict_start_date':dates[0],
                                     'predict_end_date':dates[-1]}
 #                    print(hist_horiz__all)
+                    
+                    
+                #For saving out predictions:
+                dates = [i.strftime('%m/%d/%Y') for i in cols]
+                d = {cols[i]:dates[i] for i in range(len(cols))}
+                f_preds.rename(columns=d,inplace=True)
+                f_preds['Page'] = f_preds.index.values
+                #Depending on missing data in the test set in the history window for this backoffset,
+                #it oculd be that that particular id did not pass the train completeness threshold.
+                #Then it will not be included, but the batchsize will still be len(df), so to fill that missing
+                #id, it will repeat id's that already had predictions. THey will be identical, 
+                #so just take the 1st occurrence for those repeated id's:
+                df = []
+                u_ids = np.unique(f_preds['Page'].values)
+                for u in u_ids:
+                    s = f_preds[f_preds['Page']==u]
+                    if len(s)>1:
+                        s = s.head(1)
+                    df += [s]
+                f_preds = pd.concat(df,axis=0)
+                cols = list(f_preds.columns)
+                cols.remove('Page')
+                cols = ['Page'] + cols
+                f_preds = f_preds[cols]                 
+                print(f_preds)
                 
+                dflist += [f_preds]
                 #Care about the metrics within different partitions:
                 #Beside just history and horizon size, also consider:
                 #real vs. synthetic augmented series
                 #training ID vs. new ID only in TEST set
                 #series contains holiday vs. only non-holidays
                 #day of week
+                
+            savename = f"{str(history)}_{str(horizon)}.xls"
+            savename = os.path.join(OUTPUT_DIR,savename)
+            sheetnames = [str(i) for i in offs]
+            SaveMultisheetXLS(dflist, sheetnames, savename)
+            #each sheet is for a single backoffset, so each sheet contains all ~1800 id's
+
     
+    print(hist_horiz__all)
     t1 = time.clock()
     print('elapsed time: ',t1-t0)
     #Now that all metrics stored in dict, save dict, and analyze further
     #pickle ... hist_horiz__all
 #    print(hist_horiz__all)
-    with open(r"hist_horiz__all.pickle", "wb") as outp:
-        pickle.dump(hist_horiz__all, outp)
\ No newline at end of file
+    dict_savename = os.path.join(OUTPUT_DIR,"hist_horiz__all.pickle")
+    with open(dict_savename, "wb") as outp:
+        pickle.dump(hist_horiz__all, outp)#, protocol=2)
\ No newline at end of file
diff --git a/trainer.py b/trainer.py
index b6ad598..c2c792a 100755
--- a/trainer.py
+++ b/trainer.py
@@ -743,8 +743,7 @@ def predict(features_set, sampling_period, checkpoints, TEST_dir, hparams, histo
 #            except:
 #                pass
             pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size,
-                             #train_completeness_threshold=0.01,
-                             train_completeness_threshold=1.0,
+                             train_completeness_threshold=0.01,
                              horizon_window_size=horizon_window_size,
                              predict_completeness_threshold=0.0, history_window_size=history_window_size,
                              back_offset=back_offset)

From f476338ff53bbc7f9dad20962ebd1f4a71b960f5 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 2 Aug 2018 15:21:57 -0700
Subject: [PATCH 33/42] fixed kaggle's NAN issue from train_complete_threshold
 rounding

---
 PREDICT.py             |  9 +++++++++
 RUN_ALL_PREDICTIONS.py |  6 +++---
 Readme.md              |  6 +++---
 input_pipe.py          | 42 ++++++++++++++++++++++++++++++++++--------
 make_features.py       |  2 +-
 5 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/PREDICT.py b/PREDICT.py
index ba0e21f..d51d207 100755
--- a/PREDICT.py
+++ b/PREDICT.py
@@ -1,4 +1,13 @@
 """
+
+
+
+
+
+NOT USED ANYMORE
+
+
+
 Created on Mon Jun 18 14:03:35 2018
 
 @author: gk
diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index 69e1c63..e4a4c78 100644
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -22,9 +22,9 @@
 # 
 # =============================================================================
 #For histories, we care most about shorter series, so sample lower numbers more densely
-HISTORY_SIZES=[200,300]#[7,8,10,12,15,20,30,50,70,100,150,250,366]
-HORIZON_SIZES=[80,150]#[7,10,20,30,40,50,60]
-EVAL_STEP_SIZE=60#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
+HISTORY_SIZES=[7,8,50]#[7,8,10,12,15,20,30,50,70,100,150,250,366]
+HORIZON_SIZES=[30,60]#[7,10,20,30,40,50,60]
+EVAL_STEP_SIZE=4#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
 
 # =============================================================================
 # PARAMETRS
diff --git a/Readme.md b/Readme.md
index 96e98d2..f106791 100755
--- a/Readme.md
+++ b/Readme.md
@@ -69,12 +69,12 @@ rm *.pkl
 cd ..
 ll data/
 
-python3 make_features.py train ours daily full
-python3 make_features.py test ours daily full
+python3 make_features.py train ours daily full --add_day=0
+python3 make_features.py test ours daily full --add_days=0
 
 #python3 make_features.py data/vars kaggle daily full --add_days=63
 
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=3 --patience=5 --max_epoch=15 --save_epochs_performance
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=3 --patience=5 --max_epoch=25 --save_epochs_performance
 
 python3 RUN_ALL_PREDICTIONS.py
 
diff --git a/input_pipe.py b/input_pipe.py
index 0a2a550..73f0871 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -125,6 +125,14 @@ def prepare_split(i):
 
 
 class InputPipe:
+#    def randomize_window_sizes(self, *args):
+#        self.horizon_window_size = tf.random_uniform((), 7, 60, dtype=tf.int32)
+#        self.history_window_size = tf.random_uniform((), 7, 366, dtype=tf.int32)
+#        self.attn_window = self.history_window_size - self.horizon_window_size + 1
+#        self.max_train_empty = tf.cast(tf.round(tf.multiply(tf.cast(self.history_window_size,tf.float32),(1 - self.train_completeness_threshold))),tf.int32)
+#        self.max_predict_empty = tf.cast(tf.round(tf.multiply(tf.cast(self.horizon_window_size,tf.float32),(1 - self.predict_completeness_threshold))),tf.int32)
+#        return args
+        
     def cut(self, counts, start, end):
         """
         Cuts [start:end] diapason from input data
@@ -133,6 +141,7 @@ def cut(self, counts, start, end):
         :param end: end index
         :return: tuple (train_counts, test_counts, lagged_counts, [subset of: dow,woy,moy,doy,year,holidays])
         """
+        
         # Pad counts to ensure we have enough array length for prediction
         counts = tf.concat([counts, tf.fill([self.horizon_window_size], np.NaN)], axis=0)
         cropped_count = counts[start:end]
@@ -224,6 +233,15 @@ def cut_train(self, counts, *args):
         :param args: pass-through data, will be appended to result
         :return: result of cut() + args
         """
+#        def randomize_window_sizes():#, *args):
+#            self.horizon_window_size = tf.random_uniform((), 7, 60, dtype=tf.int32)
+#            self.history_window_size = tf.random_uniform((), 7, 366, dtype=tf.int32)
+#            self.attn_window = self.history_window_size - self.horizon_window_size + 1
+#            self.max_train_empty = tf.cast(tf.round(tf.multiply(tf.cast(self.history_window_size,tf.float32),(1 - self.train_completeness_threshold))),tf.int32)
+#            self.max_predict_empty = tf.cast(tf.round(tf.multiply(tf.cast(self.horizon_window_size,tf.float32),(1 - self.predict_completeness_threshold))),tf.int32)
+#            #return args        
+#        randomize_window_sizes()        
+        
         n_timesteps = self.horizon_window_size + self.history_window_size
         # How much free space we have to choose starting day
         free_space = self.inp.data_timesteps - n_timesteps - self.back_offset - self.start_offset
@@ -468,8 +486,12 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         self.history_window_size = history_window_size #!!!!!!!!!!!random resize
         self.horizon_window_size = horizon_window_size#!!!!!!!!!!!random resize
         self.attn_window = history_window_size - horizon_window_size + 1#!!!!!!!!!!!random resize
-        self.max_train_empty = int(round(history_window_size * (1 - train_completeness_threshold)))#!!!!!!!!!!!random resize
-        self.max_predict_empty = int(round(horizon_window_size * (1 - predict_completeness_threshold)))#!!!!!!!!!!!random resize
+        #For train empty, if max_train_empty=history, then on data with many missing values, 
+        #you can get all NAN series, which then causes NANs in inp.time_x and 
+        #destroys the encoded_state and kills everything. You need to have at 
+        #least 1 valid value in the history window, so do min(history-1, xxxxx)
+        self.max_train_empty = min(history_window_size-1, int(np.floor(history_window_size * (1 - train_completeness_threshold))))
+        self.max_predict_empty = int(np.floor(horizon_window_size * (1 - predict_completeness_threshold)))
         self.mode = mode
         self.verbose = verbose
         
@@ -490,8 +512,8 @@ def random_draw_new_window_sizes():
             self.history_window_size = history
             self.horizon_window_size = horizon
             self.attn_window = history - horizon + 1
-            self.max_train_empty = int(round(history * (1 - self.train_completeness_threshold)))
-            self.max_predict_empty = int(round(horizon * (1 - self.predict_completeness_threshold)))
+            self.max_train_empty = min(history_window_size-1, int(np.floor(history_window_size * (1 - train_completeness_threshold))))
+            self.max_predict_empty = int(np.floor(horizon * (1 - self.predict_completeness_threshold)))
     
     
@@ -505,8 +527,9 @@ def random_draw_new_window_sizes():
         print('features',features)
     
     
-#        for _ in range(max(n_epoch,20)):
-##            random_draw_new_window_sizes()
+#        for _ in range(10):#max(n_epoch,20)):
+#            print('\n'*5)
+#            random_draw_new_window_sizes()
 #            print('max_train_empty',self.max_train_empty)
 #            print('max_predict_empty',self.max_predict_empty)
 #            print('history_window_size',self.history_window_size)
@@ -535,9 +558,10 @@ def random_draw_new_window_sizes():
 #            batch = batch.prefetch(runs_in_burst * 2)
 #            print('batch P', batch)
 #            batch = (batch)
-        
+
         root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch)
         batch = (root_ds
+#                 .map(self.randomize_window_sizes)
                  .map(cutter[mode])
                  .filter(self.reject_filter)
                  .map(self.make_features, num_parallel_calls=num_threads)
@@ -549,6 +573,8 @@ def random_draw_new_window_sizes():
         print(batch)
         self.iterator = batch.make_initializable_iterator()
         it_tensors = self.iterator.get_next()
+#        print('self.iterator',self.iterator)
+#        print('it_tensors',it_tensors)
 
         # Assign all tensors to class variables
         #self.time_x is the tensor of features, regardless of which feature set, so this can stay same.
@@ -559,10 +585,10 @@ def random_draw_new_window_sizes():
             self.true_x, self.time_x, self.norm_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \
             self.norm_std, self.series_features, self.page_ix = it_tensors
         print('self.true_x', self.true_x)
+#        self.true_x = tf.Print(self.true_x,['self.true_x',self.true_x])
         print('self.time_x', self.time_x)
 #        print('self.time_y', self.time_y)
 #        self.time_y = tf.Print(self.time_y,['self.time_y',self.time_y])
-        
         """if self.features_set=='simple':
             pass
 #        if self.features_set=='full':
diff --git a/make_features.py b/make_features.py
index 7fb944b..fcfe9fe 100755
--- a/make_features.py
+++ b/make_features.py
@@ -291,7 +291,7 @@ def run():
     parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full','full_w_context'}")
 
     parser.add_argument('--valid_threshold', default=0.0, type=float, help="Series minimal length threshold (pct of data length)")
-    parser.add_argument('--add_days', default=64, type=int, help="Add N days in a future for prediction")
+    parser.add_argument('--add_days', default=0, type=int, help="Add N days in a future for prediction")
     parser.add_argument('--start', help="Effective start date. Data before the start is dropped")
     parser.add_argument('--end', help="Effective end date. Data past the end is dropped")
     parser.add_argument('--corr_backoffset', default=0, type=int, help='Offset for correlation calculation')

From f3874742e617ed90babbc8959275d542522855aa Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 2 Aug 2018 15:26:10 -0700
Subject: [PATCH 34/42] all working performance heatmaps

---
 Adam_HD_optimizer.py          |  0
 PERFORMANCE_HEATMAPS.py       |  0
 RUN_ALL_PREDICTIONS.py        |  0
 RUN_MANY_TRAIN_VAL_WINDOWS.sh | 85 -----------------------------------
 SGDN_HD_optimizer.py          |  0
 __init__.py                   |  0
 classification_models.py      |  0
 drnn.py                       |  0
 holiday_features.py           |  0
 9 files changed, 85 deletions(-)
 mode change 100644 => 100755 Adam_HD_optimizer.py
 mode change 100644 => 100755 PERFORMANCE_HEATMAPS.py
 mode change 100644 => 100755 RUN_ALL_PREDICTIONS.py
 delete mode 100644 RUN_MANY_TRAIN_VAL_WINDOWS.sh
 mode change 100644 => 100755 SGDN_HD_optimizer.py
 mode change 100644 => 100755 __init__.py
 mode change 100644 => 100755 classification_models.py
 mode change 100644 => 100755 drnn.py
 mode change 100644 => 100755 holiday_features.py

diff --git a/Adam_HD_optimizer.py b/Adam_HD_optimizer.py
old mode 100644
new mode 100755
diff --git a/PERFORMANCE_HEATMAPS.py b/PERFORMANCE_HEATMAPS.py
old mode 100644
new mode 100755
diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
old mode 100644
new mode 100755
diff --git a/RUN_MANY_TRAIN_VAL_WINDOWS.sh b/RUN_MANY_TRAIN_VAL_WINDOWS.sh
deleted file mode 100644
index e301e3e..0000000
--- a/RUN_MANY_TRAIN_VAL_WINDOWS.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-#chmod 777 RUN_MANY_TRAIN_VAL_WINDOWS.sh
-#./RUN_MANY_TRAIN_VAL_WINDOWS.sh
-#Run over many history_window_size - horizon_window_size length pairs
-#Compile results, analyze performance as (2D) heatmap
-
-#At this point, models have been trained already. Trained by randomizing over
-#range of history and horizon sizes [~train,validation phases].
-#Now hopefully the models are reasonably good across a range of values of 
-#history/horizon lengths. 
-#Now, assess performance (walk-forward SMAPE on test set) as a function of 
-#(fixed) history and horizon sizes.
-#I.e. during training phase, the history and horizon are random variables that 
-#change randomly for every step of every batch. Vs. during inference, use 
-#fixed settings of history and horizon sizes and get an SMAPE value, then 
-#change the fixed history/horizon parameters and get another SMAPE value, etc.,
-#over a range of histories/horizons. This way we can see if the model does well 
-#on short series also. Of course we expect that as history->infinity and 
-#horizon->1, error will decrease.
-
-
-#HISTORY_SIZES="1 2 5 10 20 50 100 150 200 250 300"
-#HORIZON_SIZES="1 2 5 10 20 50 100"
-#e.g. HISTORY_SIZES has NAN SMAPE -> 2 problem with as big as size 50
-
-HISTORY_SIZES="100 150"
-HORIZON_SIZES="33 66"
-#just to test...
-MAX_EPOCH=2
-
-
-#One time clean up
-cd data
-rm -R vars/
-rm -R cpt/
-rm -R cpt_tmp/
-rm -R logs/
-rm *.pkl
-cd ..
-#ls -l data/
-
-
-
-
-#Now that all training is done, can run predictions
-#python3 PREDICT.py !!!!!make window sizes as params
-for v in $HORIZON_SIZES; do
-    #Clea up between feature sets
-    cd data
-    rm -R vars/
-    rm -R cpt_tmp/
-    rm *.pkl
-    cd ..
-    #Create the features for our data
-    echo 'running make_features.py with --add_days='$v
-    python3 make_features.py data/vars ours daily full --add_days=$v
-    for t in $HISTORY_SIZES; do
-        echo 'history window = '$t 'horizon window = '$v
-        echo 'running trainer.py'
-        NAME="val$v-train$t"
-        echo 'NAAME='$NAME
-        python3 trainer.py full daily --name $NAME --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=$MAX_EPOCH --patience=5 --verbose --horizon_window_size=$v --history_window_size=$t
-    done
-done
-
-
-
-#from trainer.py, when have save_epochs_performance==True:
-#format of saved "{logdir}/{name}_epochs_performance.np" numpy array is:
-#2D array, dims = [epochs, 9]
-#where epochs is number of epochs that successfully completed (<max_epochs if there was early stopping)
-#9 is because 9 metrics are tracked. They are ordered as:
-#output_list.append([eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch,
-#           eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch,
-#           trainer.has_active()])
-#For overall performance assessment, average together the last T=2or3 epochs SMAPE values 
-
-
-
-
-
-# ==============================================================================
-# now make heatmaps of performance:
-# ==============================================================================
-echo 'Making performance heatmaps'
-python3 PERFORMANCE_HEATMAPS.py full daily --name $NAME
diff --git a/SGDN_HD_optimizer.py b/SGDN_HD_optimizer.py
old mode 100644
new mode 100755
diff --git a/__init__.py b/__init__.py
old mode 100644
new mode 100755
diff --git a/classification_models.py b/classification_models.py
old mode 100644
new mode 100755
diff --git a/drnn.py b/drnn.py
old mode 100644
new mode 100755
diff --git a/holiday_features.py b/holiday_features.py
old mode 100644
new mode 100755

From 3e12a0558c7dfe8575b3b5c3366818421b1de75f Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 6 Aug 2018 18:04:46 -0700
Subject: [PATCH 35/42] fixed Kaggle features future leaking back problem

---
 hparams.py       |   7 ++-
 input_pipe.py    |  42 ++++++++++++-----
 make_features.py |  58 ++++++++++++++++++-----
 model.py         | 120 ++++++++++++++++++++++++++++++++++++++++++-----
 trainer.py       |  63 ++++++++++++++++++++-----
 5 files changed, 239 insertions(+), 51 deletions(-)
 mode change 100755 => 100644 model.py

diff --git a/hparams.py b/hparams.py
index 43222bf..2b05551 100755
--- a/hparams.py
+++ b/hparams.py
@@ -14,7 +14,7 @@
     attention_heads=1,
     encoder_readout_dropout=0.4768781146510798,
 
-    encoder_rnn_layers=1,
+    encoder_rnn_layers=3,
     decoder_rnn_layers=1,
 
     # decoder_state_dropout_type=['outside','outside'],
@@ -58,8 +58,11 @@
     #Lookback K steps: [without specifying, default previous Kaggle setting is K=1]:
     #for predicting y_i, insteda of just feeding in previous K=1 prediction (y_i-1),
     #feed in all previous K predictions: y_
-    LOOKBACK_K = 1, #!!!!Can NOT set this to be bigger than min history size (history_window_size_minmax[0])
+    LOOKBACK_K = 3, #!!!!Can NOT set this to be bigger than min history size (history_window_size_minmax[0])
     #since then depending on random draw would possibly need to look back further than history size.
+    
+    #True or False to use MLP module postprocessor to locally adjust estimates
+    DO_MLP_POSTPROCESS=False,
 )
 
 
diff --git a/input_pipe.py b/input_pipe.py
index 73f0871..624c87d 100755
--- a/input_pipe.py
+++ b/input_pipe.py
@@ -234,11 +234,11 @@ def cut_train(self, counts, *args):
         :return: result of cut() + args
         """
 #        def randomize_window_sizes():#, *args):
-#            self.horizon_window_size = tf.random_uniform((), 7, 60, dtype=tf.int32)
-#            self.history_window_size = tf.random_uniform((), 7, 366, dtype=tf.int32)
+#            self.horizon_window_size = tf.Variable(tf.random_uniform((), 7, 60, dtype=tf.int32))
+#            self.history_window_size = tf.Variable(tf.random_uniform((), 7, 366, dtype=tf.int32))
 #            self.attn_window = self.history_window_size - self.horizon_window_size + 1
-#            self.max_train_empty = tf.cast(tf.round(tf.multiply(tf.cast(self.history_window_size,tf.float32),(1 - self.train_completeness_threshold))),tf.int32)
-#            self.max_predict_empty = tf.cast(tf.round(tf.multiply(tf.cast(self.horizon_window_size,tf.float32),(1 - self.predict_completeness_threshold))),tf.int32)
+#            self.max_train_empty = tf.cast(tf.floor(tf.multiply(tf.cast(self.history_window_size,tf.float32),(1 - self.train_completeness_threshold))),tf.int32)
+#            self.max_predict_empty = tf.cast(tf.floor(tf.multiply(tf.cast(self.horizon_window_size,tf.float32),(1 - self.predict_completeness_threshold))),tf.int32)
 #            #return args        
 #        randomize_window_sizes()        
         
@@ -429,7 +429,7 @@ def make_features(self, *args):
 
 
     def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None,
-                 batch_size=127, runs_in_burst=1, verbose=True, horizon_window_size=60, history_window_size=500,
+                 batch_size=127, runs_in_burst=1, verbose=True, horizon_window_size=60, history_window_size=200,
                  train_completeness_threshold=1, predict_completeness_threshold=1, back_offset=0,
                  train_skip_first=0, rand_seed=None):
         """
@@ -483,6 +483,8 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
                 print("Train start %s, predict start %s, end %s" % (train_start, eval_start, end))
             assert self.start_offset >= 0
 
+
+        #For random unfirom draws: leave these as placeholders
         self.history_window_size = history_window_size #!!!!!!!!!!!random resize
         self.horizon_window_size = horizon_window_size#!!!!!!!!!!!random resize
         self.attn_window = history_window_size - horizon_window_size + 1#!!!!!!!!!!!random resize
@@ -492,6 +494,20 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         #least 1 valid value in the history window, so do min(history-1, xxxxx)
         self.max_train_empty = min(history_window_size-1, int(np.floor(history_window_size * (1 - train_completeness_threshold))))
         self.max_predict_empty = int(np.floor(horizon_window_size * (1 - predict_completeness_threshold)))
+        
+#        self.history_window_size = tf.placeholder(tf.int32, shape=[], name='history_window_size')
+#        self.horizon_window_size = tf.placeholder(tf.int32, shape=[], name='horizon_window_size')
+#        self.attn_window = tf.placeholder(tf.int32, shape=[], name='attn_window')
+#        self.max_train_empty = tf.placeholder(tf.int32, shape=[], name='max_train_empty')
+#        self.max_predict_empty = tf.placeholder(tf.int32, shape=[], name='max_predict_empty')
+        
+#        self.history_window_size = tf.constant(222,tf.int32, shape=[], name='history_window_size')
+#        self.horizon_window_size = tf.constant(33,tf.int32, shape=[], name='horizon_window_size')
+#        self.attn_window = 111#history_window_size - horizon_window_size + 1
+#        self.max_train_empty = 111#tf.placeholder(tf.int32, shape=[], name='max_train_empty')
+#        self.max_predict_empty = 1111#tf.placeholder(tf.int32, shape=[], name='max_predict_empty')        
+        
+        
         self.mode = mode
         self.verbose = verbose
         
@@ -506,14 +522,14 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter
         print('attn_window',self.attn_window)
 
         
-        def random_draw_new_window_sizes():
-            history = np.random.randint(low=7,high=120+1)
-            horizon = np.random.randint(low=7,high=60+1)        
-            self.history_window_size = history
-            self.horizon_window_size = horizon
-            self.attn_window = history - horizon + 1
-            self.max_train_empty = min(history_window_size-1, int(np.floor(history_window_size * (1 - train_completeness_threshold))))
-            self.max_predict_empty = int(np.floor(horizon * (1 - self.predict_completeness_threshold)))
+#        def random_draw_new_window_sizes():
+#            history = np.random.randint(low=7,high=120+1)
+#            horizon = np.random.randint(low=7,high=60+1)        
+#            self.history_window_size = history
+#            self.horizon_window_size = horizon
+#            self.attn_window = history - horizon + 1
+#            self.max_train_empty = min(history_window_size-1, int(np.floor(history_window_size * (1 - train_completeness_threshold))))
+#            self.max_predict_empty = int(np.floor(horizon * (1 - self.predict_completeness_threshold)))
     
     
diff --git a/make_features.py b/make_features.py
index fcfe9fe..73ab56f 100755
--- a/make_features.py
+++ b/make_features.py
@@ -346,20 +346,52 @@ def run():
         encoded_page_features = encode_page_features(page_features)
 
 
-    #To get idea of overall scale of a time series, to compare between time series, which would be lost if just used standard scaled values:
-    count_median = df.median(axis=1)
-    count_median = normalize(count_median)
-    #Play around w a few other basic summary stats
+#    #To get idea of overall scale of a time series, to compare between time series, which would be lost if just used standard scaled values:
+#    count_median = df.median(axis=1)
+#    count_median = normalize(count_median)
+#    #Play around w a few other basic summary stats
+#    percentiles = []
+#    for pctl in [0,5,25,75,95,100]:
+#        percentiles.append(normalize(np.percentile(df.values,pctl,axis=1)))
+#    count_variance = normalize(np.var(df.values,axis=1))
+#    #entropy = normalize(entropy(df.values,axis=1))
+#    #filled_len = df.values.shape[1] - np.count_nonzero(np.isnan(df.values),axis=1) #non-nans
+#    #series_length = (df.values>0).sum(axis=1) #actually it has already been log transofmred so this is not correct
+#
+    #!!!! Seems like the above way Kaggle does it is kind of against best practices [seems to use ~TEST set information to create features,
+    #in terms of how it does a whole series median, e.g., but at any given prediction, that would contribute to the median...]
+    #Better: use e.g. 1st 7 observations to get median, etc. features, since 7 is min history size.
+    #This is a lot more fair. Technixally, even this is ~cheating because depending on train_complete_threshold the model may have
+    #to make a prediction after seeing as few as 1 observation, so using first 7 is potentially unfair as well, depending on TCT.
+    #But is OK, and especially since most series esp. those we care about, will not have such bad missing.
+    MIN_HIST = 7
+    count_median = []
+    percs = [0,5,25,75,95,100]
+#    percentiles = np.zeros((len(df),len(percs)))
     percentiles = []
-    for pctl in [0,5,25,75,95,100]:
-        percentiles.append(normalize(np.percentile(df.values,pctl,axis=1)))
-    count_variance = normalize(np.var(df.values,axis=1))
-    #entropy = normalize(entropy(df.values,axis=1))
-    #filled_len = df.values.shape[1] - np.count_nonzero(np.isnan(df.values),axis=1) #non-nans
-    #series_length = (df.values>0).sum(axis=1) #actually it has already been log transofmred so this is not correct
-
-
-
+    count_variance = []
+    for rr in range(len(df)):
+        _ = df.iloc[rr].values
+        gg = np.where(_>0.)[0]
+        if gg.size>MIN_HIST-1:
+            #last_ind = gg[max(MIN_HIST-1,gg.size)]
+            last_ind = gg[MIN_HIST-1]
+            v = _[:last_ind][(_[:last_ind])>0.]
+            count_median.extend([np.nanmedian(v)])
+            p = []
+            for pctl in percs:
+                p.extend([np.percentile(v,pctl)])
+            percentiles.append(p)
+            count_variance.extend([np.nanvar(v)])
+        else:
+            #If the entire time series has fewer than MIN_HIST-1 valid values, just put in 0, won't matter because this series wouldn't be included anyway since so few values it shouldn't pass filter.
+            count_median.extend([0.])
+            percentiles.append([0.]*len(percs))
+            count_variance.extend([0.])
+    count_median = normalize(np.array(count_median))
+    percentiles = [normalize(np.array(percentiles)[:,rr]) for rr in range(len(percs))]
+    count_variance = normalize(np.array(count_variance))
+    
     # =============================================================================
     # TIME-VARYING FEATURES
     # =============================================================================
diff --git a/model.py b/model.py
old mode 100755
new mode 100644
index 088d9e0..b8c0ab7
--- a/model.py
+++ b/model.py
@@ -14,7 +14,7 @@
 
 GRAD_CLIP_THRESHOLD = 10
 RNN = cudnn_rnn.CudnnGRU
-# RNN = tf.contrib.cudnn_rnn.CudnnLSTM
+#RNN = tf.contrib.cudnn_rnn.CudnnLSTM
 # RNN = tf.contrib.cudnn_rnn.CudnnRNNRelu
 
 
@@ -78,23 +78,20 @@ def make_encoder(time_inputs, encoder_features_depth, is_train, hparams, seed, t
     :param transpose_output: Transform RNN output to batch-first shape
     :return:
     """
-
+    DIRECTION = 'unidirectional'#'bidirectional',#'unidirectional', #Let's try bidirectional as well, or ,ay as well try keeping unidirectional but with order reversed, just see what happens
     def build_rnn():
         return RNN(num_layers=hparams.encoder_rnn_layers, num_units=hparams.rnn_depth,
                    input_size=encoder_features_depth,
-                   direction='unidirectional', #Let's try bidirectional as well, or ,ay as well try keeping unidirectional but with order reversed, just see what happens
+                   direction=DIRECTION,
                    #assume merge mode default is concat??
                    #need to fix dimensions error. If could change merge mode to sum or mean or something then at least output dimension is same so might be easiest way to avoid  error ?
                    dropout=hparams.encoder_dropout if is_train else 0, seed=seed)
 
     static_p_size = cuda_params_size(build_rnn)
 #    static_p_size = tf.Print(static_p_size,['static_p_size',static_p_size])
+#    print('static_p_size',static_p_size)
     cuda_model = build_rnn()
-    
-    
 #    time_inputs = tf.check_numerics(time_inputs,'time_inputs')
-
-    
     
     params_size_t = cuda_model.params_size()
 #    params_size_t = tf.Print(static_p_size,['params_size_t',params_size_t])
@@ -108,6 +105,12 @@ def build_rnn():
     def build_init_state():
         batch_len = tf.shape(time_inputs)[0] #!!!!!!!! for random history/horizon size, may need to adjust
         return tf.zeros([hparams.encoder_rnn_layers, batch_len, hparams.rnn_depth], dtype=tf.float32)
+#    def build_init_state():
+#        batch_len = tf.shape(time_inputs)[0] #!!!!!!!! for random history/horizon size, may need to adjust
+#        if DIRECTION == 'unidirectional':
+#            return tf.zeros([1, hparams.encoder_rnn_layers, batch_len, hparams.rnn_depth], dtype=tf.float32)
+#        if DIRECTION == 'bidirectional':
+#            return tf.zeros([2, hparams.encoder_rnn_layers, batch_len, hparams.rnn_depth], dtype=tf.float32)        
 
     input_h = build_init_state()
 
@@ -192,6 +195,56 @@ def make_fingerprint(x, is_train, fc_dropout, seed):
     return out_encoder
 
 
+
+def MLP_postprocess(decoder_outputs, decoder_inputs, kernel_size, kernel_offset, seed):
+    """
+    As a postprocess decoder step:
+    
+    After the decoder has output predictions for each decoder timestep 
+    (in standard mode this is a vector of predictions: one per timestep
+    vs. if doing quantile regression, is a matrix timesteps x quantiles...)
+    
+    Refine those predictions by taking into account the neighboring predictions.
+    Ideally, the RNN/LSTM/GRU state would contain all necessary information about state 
+    to make good predictions. In reality, might be asking too much, could be useful 
+    to do a second pass over the outputs to improve predictions.
+    
+    E.g. if encoding shock events: binary 0,1 with 1 on change day:
+    if there is no pre-event shoulder, no information on Monday takes advantage 
+    of known change about to ocur on Tuesday. This postprocessor will help with that.
+    
+    kernel_size - int
+    The size, in #timesteps, of the MLP postprocess kernel.
+    For a given timestep of decoder, a window containing that day is used to 
+    hard select which nearby timesteps are allowed to contribute to refinement 
+    for that timestep. I.e. bigger kernel means looking at more nearby RNN outputs.
+
+    kernel_offset - int
+    The offset, in #timesteps, of the MLP postprocess kernel from the given timestep under consideration.
+    0 offset means kernel has rightmost index exactly on the given timestep,
+    1 offset means the rightmost timestep included in the postprocess window is 
+    the timestep exactly 1 position right of the timestep under consideration, ..., 
+    ...
+    K-1 is when the kernel window is trasnlated as far as possible to the right,
+    i.e. has the given timestep as the LEFTmost position, and K-1 positions to the right of the day in question.
+    In other words, offset tells you how many positions to the right of the given timestep are included in the kernel window.
+    
+    For this mini feedforward net, just use 1 hidden layer with nonlinearity, and for #of nodes there, just make it half of #input nodes to this MLP.
+    """
+#    with tf.variable_scope("MLP_postprocess"):
+#        with tf.variable_scope('mlp',
+#                               initializer=layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN', seed=seed)):
+#            flattend_input = tf.concat([asdasd,asd,asd])
+#            decoder_outs_windowed, decoder_ins__windowed = asdasdasd func of kernel sizes...
+#            N_nodes = 
+#            fc_encoder = tf.layers.dense(cnn_out, 512, activation=selu, name='fc_encoder')
+#            out_encoder = tf.layers.dense(fc_encoder, 16, activation=selu, name='out_encoder')
+#    return out_encoder
+
+
+
+
+
 def attn_readout_v3(readout, attn_window, attn_heads, page_features, seed):
     # input: [n_days, batch, readout_depth]
     # [n_days, batch, readout_depth] -> [batch(readout_depth), width=n_days, channels=batch]
@@ -441,10 +494,21 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         self.hparams = hparams
         self.seed = seed
         self.inp = inp
+        self.DO_MLP_POSTPROCESS = self.hparams.DO_MLP_POSTPROCESS
         self.lookback_K_actual = min(hparams.LOOKBACK_K, hparams.history_window_size_minmax[0])
         print('self.lookback_K_actual',self.lookback_K_actual)
 
 
+
+#        with tf.Graph().as_default():
+#            config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
+#            with tf.Session(config=config) as sess:
+#                result = sess.run([self.inp.history_window_size, self.inp.horizon_window_size])
+#                print(result)
+        print('Model.py history, horizon : ', self.inp.history_window_size, self.inp.horizon_window_size)
+
+
+
 #        inp.time_x = tf.Print(inp.time_x, ['where NANs in inp.time_x :', tf.where(tf.is_nan(inp.time_x))])
 #        inp.time_x = tf.check_numerics(inp.time_x,'inp.time_x has NANs')
 
@@ -457,11 +521,15 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         
         
         # Encoder activation losses
-        enc_stab_loss = rnn_stability_loss(encoder_output, hparams.encoder_stability_loss / inp.history_window_size)
-        enc_activation_loss = rnn_activation_loss(encoder_output, hparams.encoder_activation_loss / inp.history_window_size)
-
+#        enc_stab_loss = rnn_stability_loss(encoder_output, hparams.encoder_stability_loss / tf.cast(inp.history_window_size, tf.float32)) #!!!!!!!!when random history-horizons, need to cast dtypes here
+#        enc_activation_loss = rnn_activation_loss(encoder_output, hparams.encoder_activation_loss / tf.cast(inp.history_window_size, tf.float32)) #!!!!!!
+        enc_stab_loss = rnn_stability_loss(encoder_output, hparams.encoder_stability_loss / inp.history_window_size) #!!!!!!!!when random history-horizons, need to cast dtypes here
+        enc_activation_loss = rnn_activation_loss(encoder_output, hparams.encoder_activation_loss / inp.history_window_size) #!!!!!!
+        
+        
         # Convert state from cuDNN representation to TF RNNCell-compatible representation
         encoder_state, summary_z = convert_cudnn_state_v3(h_state, hparams, c_state,
+#        encoder_state, summary_z = convert_cudnn_state_v3(h_state, hparams, seed, c_state,
                                                dropout=hparams.gate_dropout if is_train else 1.0)
 #        encoder_state = tf.Print(encoder_state, ['encoder_state',tf.shape(encoder_state),encoder_state])
 #        summary_z = tf.Print(summary_z, ['summary_z',tf.shape(summary_z),summary_z])
@@ -479,12 +547,28 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
             attn_features, attn_weights = attn_readout_v3(enc_readout, inp.attn_window, hparams.attention_heads,
                                                       fingerprint, seed=seed)
 
+        print('building decoder')
+
         # Run decoder
         #... = decoder(encoder_state, attn_features, prediction_inputs, previous_y)
         decoder_targets, decoder_outputs = self.decoder(encoder_state,
                                                         attn_features if hparams.use_attn else None,
                                                         summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
                                                         inp.time_y, inp.norm_x[:, -self.lookback_K_actual:]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
+#        decoder_targets = tf.Print(decoder_targets,['decoder_targets',decoder_targets,'decoder_outputs',decoder_outputs])        
+        
+        
+        #If doing the MLP postprocessing step to adjust predictions:
+#        if self.DO_MLP_POSTPROCESS:
+#            self.kernel_size
+#            self.kernel_offset
+#            decoder_targets, decoder_outputs = self.MLP_postprocess(encoder_state,
+#                                                            attn_features if hparams.use_attn else None,
+#                                                            summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
+#                                                            inp.time_y, inp.norm_x[:, -self.kernel_size+self.kernel_offset:])  
+#            decoder_targets = tf.Print(decoder_targets,['decoder_targets',decoder_targets,'decoder_outputs',decoder_outputs])        
+        
+        
         
         
 #        decoder_targets = tf.Print(decoder_targets,['encoder_state',encoder_state,'inp.time_y',inp.time_y,'inp.norm_x',inp.norm_x])
@@ -494,6 +578,8 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         # Decoder activation losses
         dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.horizon_window_size)
         dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.horizon_window_size)
+        print('dec_stab_loss',dec_stab_loss)
+        print('dec_activation_loss',dec_activation_loss)
 
         # Get final denormalized predictions
         self.predictions = decode_predictions(decoder_targets, inp)
@@ -545,7 +631,9 @@ def decoder(self, encoder_state, attn_features, summary_z, prediction_inputs, pr
 
         def build_cell(idx):
             with tf.variable_scope('decoder_cell', initializer=self.default_init(idx)):
-                cell = rnn.GRUBlockCell(self.hparams.rnn_depth)
+                print('self.hparams.rnn_depth',self.hparams.rnn_depth)
+                
+                cell = rnn.GRUBlockCell(self.hparams.rnn_depth) #GRUBlockCellV2 same butnewer version just variables named differently?
                 has_dropout = hparams.decoder_input_dropout[idx] < 1 \
                               or hparams.decoder_state_dropout[idx] < 1 or hparams.decoder_output_dropout[idx] < 1
 
@@ -585,8 +673,12 @@ def build_cell(idx):
 
 
         nest.assert_same_structure(encoder_state, cell.state_size)
+#        predict_timesteps = 7777#tf.reshape(self.inp.horizon_window_size, [])
         predict_timesteps = self.inp.horizon_window_size
+#        print('predict_timesteps',predict_timesteps,type(predict_timesteps))
+        #When random size steps, cannot know dims in advance to do below assert:
         assert prediction_inputs.shape[1] == predict_timesteps #!!!!!!!quantiles
+#        print('predict_timesteps',predict_timesteps,'prediction_inputs.shape[1]',prediction_inputs.shape)
 
         # [batch_size, time, input_depth] -> [time, batch_size, input_depth]
         inputs_by_time = tf.transpose(prediction_inputs, [1, 0, 2])
@@ -634,6 +726,8 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
             if self.hparams.RECURSIVE_W_ENCODER_CONTEXT:
                 next_input = tf.concat([next_input, summary_z], axis=1) #!!!!!!!!summary_z[-1]
                     
+            print('next_input',next_input)
+            print('prev_state',prev_state)            
             # Run RNN cell
             output, state = cell(next_input, prev_state)
             # Make prediction from RNN outputs
@@ -669,6 +763,9 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
         # Run the loop
         _timestep, _projected_output, _state, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init)
         
+        #!!!!!!!!if do the decoder as fixed max predict window, then here, when in train mode,  slice to get only those really evaluated
+        
+        
         
         print('decoder')
 #        print('_timestep',_timestep)
@@ -699,3 +796,4 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
 #        raw_outputs = tf.Print(raw_outputs, print_list)
         
         return targets, raw_outputs
+
diff --git a/trainer.py b/trainer.py
index c2c792a..35600fe 100755
--- a/trainer.py
+++ b/trainer.py
@@ -182,6 +182,7 @@ def metrics(self):
     @property
     def train_ops(self):
         model = self.train_model
+#        print('model.train_op',model.train_op)
         return [model.train_op]  # , model.summaries
 
     def metric_ops(self, key):
@@ -247,7 +248,9 @@ def train_step(self, sess: tf.Session, epoch: int):
         ops = [self.inc_step] + self.global_ops
         for trainer in self.active():
             ops.extend(trainer.train_ops)
+        print('ops', ops)
         results = self._metric_step(Stage.TRAIN, ops, sess, epoch, summary_every=20)
+        print('results: ', results)
         #return results[:len(self.global_ops) + 1] # step, grad_norm
         return results[0]
 
@@ -446,7 +449,7 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
     print('eval_every_step', eval_every_step)
 
 
-    def random_draw_history_and_horizon_window_sizes(trainer):
+    def random_draw_history_and_horizon_window_sizes(trainer,sess):
         """
         Want to not only have random start end, but also variable size chunks for 
         history and horizon sizes in TRAINING phase.
@@ -455,12 +458,35 @@ def random_draw_history_and_horizon_window_sizes(trainer):
 #        metrics = []
         history = np.random.randint(low=hparams.history_window_size_minmax[0],high=hparams.history_window_size_minmax[1]+1)
         horizon = np.random.randint(low=hparams.horizon_window_size_minmax[0],high=hparams.horizon_window_size_minmax[1]+1)        
+        print('random draw: history, horizon', history, horizon)
+#        attn_window = history - horizon + 1
+#        max_train_empty = min(history-1, int(np.floor(history * (1 - TT.train_model.inp.train_completeness_threshold))))
+#        max_predict_empty = int(np.floor(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))   
         for TT in trainer.trainers:
-            TT.train_model.inp.history_window_size = history
-            TT.train_model.inp.horizon_window_size = horizon
-            TT.train_model.inp.attn_window = history - horizon + 1
-            TT.train_model.inp.max_train_empty = int(round(history * (1 - TT.train_model.inp.train_completeness_threshold)))
-            TT.train_model.inp.max_predict_empty = int(round(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))
+#            TT.train_model.inp.history_window_size = history
+#            TT.train_model.inp.horizon_window_size = horizon
+#            TT.train_model.inp.attn_window = history - horizon + 1
+#            TT.train_model.inp.max_train_empty = min(history-1, int(np.floor(history * (1 - TT.train_model.inp.train_completeness_threshold))))
+#            TT.train_model.inp.max_predict_empty = int(np.floor(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))   
+##            TT.train_model.inp = InputPipeline
+##            TT.train_model.init(sess)
+#            TT.train_model.inp.inp.restore(sess)
+#            TT.train_model.inp.init_iterator(sess)
+            
+            #TT.train_model.inp = 77777
+#            TT.train_model = 77777
+            pass
+            
+            
+            
+            #model.pipe = InputPipeline(...) #!!!!can just reinit new pipe each time?        
+#                #In InputPipe __init__:
+#                def init_iterator(self, session):
+#                    session.run(self.iterator.initializer) 
+            
+            
+            
+            
 #            metrics.append(TT.dict_metrics)
 #        MOD_=0
 #        STAGE_=1#index
@@ -481,8 +507,8 @@ def random_draw_history_and_horizon_window_sizes(trainer):
     def create_model(features_set, sampling_period, scope, index, prefix, seed):
 
         #Just dummy filler, not important what value [since in training we will randomly vary these]
-        HISTORY_DUMMY = 111
-        HORIZON_DUMMY = 42
+        HISTORY_DUMMY = 333
+        HORIZON_DUMMY = 77
 
         with tf.variable_scope('input') as inp_scope:
             with tf.device("/cpu:0"):
@@ -549,6 +575,8 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed):
                               summary_writer=summ_writer)
 
 
+
+
     if n_models == 1:
         with tf.device(f"/gpu:{gpu}"):
             scope = tf.get_variable_scope()
@@ -618,7 +646,12 @@ def ema_vars(model):
         # pipe.load_vars(sess)
         inp.restore(sess)
         for model in all_models:
-            model.init(sess)
+            model.init(sess)#is just doing:
+#            class ModelTrainerV2:
+#                def init(self, sess):
+#                    for model in list(self.eval_models) + [self.train_model]:
+#                        model.inp.init_iterator(sess)            
+            
         # if beholder:
         #    visualizer = Beholder(session=sess, logdir=summ_path)
         step = 0
@@ -642,11 +675,17 @@ def ema_vars(model):
             for _ in tqr:
                 #!!!!!!!!!! Variable random length train predict windows
                 #Random draw the train, predict window lengths
-#                print(_)
-#                trainer = random_draw_history_and_horizon_window_sizes(trainer)
+                print(_)
+                trainer = random_draw_history_and_horizon_window_sizes(trainer,sess)
 #                print('+++++++++++++++', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers])
 #                print('--------', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers])
-
+#                print('::::::::::::::', [(TT.train_model.inp.iterator.get_next()) for TT in trainer.trainers])
+#                print('::::::::::::::', [(TT.train_model.inp.time_x,TT.train_model.inp.time_y) for TT in trainer.trainers])
+                #model.init(sess) ????
+                #model.pipe = InputPipeline(...) #!!!!can just reinit new pipe each time?        
+#                #In InputPipe __init__:
+#                def init_iterator(self, session):
+#                    session.run(self.iterator.initializer)                
                 try:
                     step = trainer.train_step(sess, epoch)
 #                    print('+-+-+-+-+-+-+-', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers])

From d62b9de1afb5097162ce5c1113b9ed85ba7dc4c7 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Fri, 10 Aug 2018 12:43:11 -0700
Subject: [PATCH 36/42] 4 chunk backtesting

---
 MAKEFEATURES_TRAIN_ALL.sh |  61 ++++++++
 PERFORMANCE_HEATMAPS.py   |  79 ++++++----
 PREPROCESS.py             | 153 ++++++++++++++----
 RUN_ALL_PREDICTIONS.py    |  28 +++-
 Readme.md                 |  18 ++-
 holiday_features.py       |   3 +
 hparams.py                | 292 ++++++-----------------------------
 make_features.py          |  41 +++--
 model.py                  | 315 ++++++++++++++++++++++++++++++--------
 trainer.py                |   6 +-
 10 files changed, 583 insertions(+), 413 deletions(-)
 create mode 100644 MAKEFEATURES_TRAIN_ALL.sh

diff --git a/MAKEFEATURES_TRAIN_ALL.sh b/MAKEFEATURES_TRAIN_ALL.sh
new file mode 100644
index 0000000..3b2b37d
--- /dev/null
+++ b/MAKEFEATURES_TRAIN_ALL.sh
@@ -0,0 +1,61 @@
+#WHen doing the chunking backtest approach, need to train/retrain model after 
+#each new chunk of training data comes in.
+
+#For this setup, just retrain from scrach (not starting at last checkpoint of 
+#previous training chunk; completely starting over again)
+
+
+# ==============================================================================
+# PARAMETERS
+# ==============================================================================
+#For each of the N training sets: train model
+#true false whether to remake feature sets  vs. just skip directly to training
+MAKE_FEATURESETS=false
+#Make some cached features for all the training/test sets
+makefeats_names="TRAINset1 TRAINset2 TRAINset3 TRAINset4 TESTset1 TESTset2 TESTset3 TESTset4"
+train_names="TRAINset1 TRAINset2 TRAINset3 TRAINset4"
+#In training, max number of epochs to do. By 25-50 things have usually plateaud
+MAX_EPOCH=50
+
+
+
+
+if $MAKE_FEATURESETS; then
+
+    echo 'Cleaning up, then remaking feature sets'
+    #Clean up between feature sets
+    cd data
+    rm -R TRAIN*
+    rm -R TEST*
+    rm -R cpt/
+    rm -R cpt_tmp/
+    rm -R logs/
+    rm *.pkl
+    cd ..
+    ll data/
+    
+        
+    # =============================================================================
+    # make_features.py
+    # =============================================================================
+    for v in $makefeats_names; do
+        #Create the features for our data
+        echo 'running make_features.py'
+        echo $v
+        python3 make_features.py data/$v ours daily full --add_days=0
+    done
+fi
+    
+    
+# =============================================================================
+# trainer.py    
+# ============================================================================= 
+for v in $train_names; do
+    echo 'running trainer.py'
+    echo $v
+    #By default, is already doing forward split, so also do side split
+    python3 trainer.py full daily --name=$v --hparam_set='encdec' --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=$MAX_EPOCH --patience=5 --verbose --save_epochs_performance
+    # --side_split    #using the side_split option gives unrealistic values for SMAPE: 
+    #says training, side split, and forward step SMAPEs are all only 3-8 %, so clearly unrealistic. 
+    #Not sure if Kaggle guy calculated things differently when doing side_eval option??? Just leave off for now, only do forward eval.
+done
\ No newline at end of file
diff --git a/PERFORMANCE_HEATMAPS.py b/PERFORMANCE_HEATMAPS.py
index 5022f49..6b21f63 100755
--- a/PERFORMANCE_HEATMAPS.py
+++ b/PERFORMANCE_HEATMAPS.py
@@ -36,7 +36,7 @@ def load_dict(path):
     return d
     
     
-def aggregate__overall(data_dict):
+def aggregate__overall(data_dict, real_only, id_hold_out_list, bad_ids):
     """
     For each (history,horizon) pair, marginalized over all id's and dates
     
@@ -45,6 +45,20 @@ def aggregate__overall(data_dict):
 #    print(data_dict.items())
     agg_dict = defaultdict(lambda:[])
     for k,v in data_dict.items():
+        series_id = k[3]
+        #Only use the real series, ignore the synthetic ones
+        #(synthetic series have name like {id}__... )
+        if real_only:
+            if '__' in series_id:
+                continue
+        #If have a set of holdout id's:
+        if id_hold_out_list:
+            if series_id not in id_hold_out_list:
+                continue
+        #Regardless of mode, if this is one of the corrupted time series, ignore it:
+        if series_id in bad_ids:
+            continue
+    
         history = k[0]
         horizon = k[1]
         smape = v['SMAPE']
@@ -81,7 +95,8 @@ def aggregate__overall(data_dict):
     
 
-def make_heatmap(metrics_arrays, histories, horizons, outdir):
+
+def make_heatmap(metrics_arrays, histories, horizons, outdir, name):
     """
     Visualize the SMAPE values
     """
@@ -89,48 +104,29 @@ def make_heatmap(metrics_arrays, histories, horizons, outdir):
     #Use 200 (worst possible SMAPE), vs.
     #to improve dynamic range use the highest measured SMAPE value from the heatmaps
     
-    print('metrics_arrays')
-    print(metrics_arrays)
+#    print('metrics_arrays')
+#    print(metrics_arrays)
     for k,v in metrics_arrays.items():
         
+        savename = k+'_'+name
+        
         vmax = np.nanmin([200.,np.nanmax(np.ceil(v))])
         
         plt.figure()
         plt.imshow(v,vmin=0.,vmax=vmax)
-        plt.title(k,fontsize=15)
+        plt.title(savename,fontsize=15)
         plt.colorbar()
         plt.xlabel('Horizon',fontsize=15)
         plt.ylabel('History',fontsize=15)
         plt.xticks(np.arange(len(horizons)),horizons,fontsize=15)
         plt.yticks(np.arange(len(histories)),histories,fontsize=15)
     #    plt.grid()
-        savepath = os.path.join(outdir,f'history_horizon_heatmap__{k}.png')
+        savepath = os.path.join(outdir,f'history_horizon_heatmap__{savename}.png')
         plt.savefig(savepath)
     
     
-
-
-
-
-
-#def make_heatmaps(logdir='data/logs', K_last=3):
-#    #Load all saved numpy arrays of performance metrics per PREDICTION run:
-#    all_runs = []
-#    eval_smapes_lastKmean = []
-#    array_names = [i for i in ssssss if i.endswith('epochs_performance.npy')]
-#    run_names = [i.split('_')[0] for i in array_names]
-#    for i, an in enumerate(array_names):
-#        x = np.load(an)
-#        #Get last K epoch metrics:
-#        j = x[-K_last:]
-#        eval_smapes_lastKmean.append(np.mean(j[:,5]))
-#        all_runs.append(x)
-#        
-#        
-#
-#
 if __name__=='__main__':
 #    parser = argparse.ArgumentParser()
 #    parser.add_argument('--logdir', default='data/logs', help="Directory where numpy arrays of performance are")
@@ -143,11 +139,28 @@ def make_heatmap(metrics_arrays, histories, horizons, outdir):
     
     #for each of the 4 dicts:
     
+    #Make list of id's that were held out from training, to assess transfer ability
+    HOLD_OUTS = [str(i) for i in range(500)]
+    
+    #Some of the ID's are just bad, have multiple month long gaps from corrupted data, etc., so can ignore them
+    BAD_IDs = []#['44','46','581','582','583','584']
+    
     path = os.path.join(OUTDIR,'hist_horiz__all.pickle')
     data = load_dict(path)
-    metrics_dict, histories, horizons, metrics_arrays = aggregate__overall(data)
-    make_heatmap(metrics_arrays, histories, horizons, OUTDIR)
-    #Save out the metrics dict    
-    dict_savename = os.path.join(OUTDIR,"hist_horiz__all__metrics.pickle")
-    with open(dict_savename, "wb") as outp:
-        pickle.dump(metrics_dict, outp)    
\ No newline at end of file
+    
+    for real_only in [True,False]:
+        for id_hold_out_list in [HOLD_OUTS,[]]:
+            
+            r = 'real' if real_only else 'realAndsynthetic'
+            h = 'holdoutsOnly' if id_hold_out_list else 'allIDs'
+            name = r+'_'+h
+            print(name)
+            
+            
+            metrics_dict, histories, horizons, metrics_arrays = aggregate__overall(data, real_only, id_hold_out_list, BAD_IDs)
+            make_heatmap(metrics_arrays, histories, horizons, OUTDIR, name)
+            
+            #Save out the metrics dict
+            dict_savename = os.path.join(OUTDIR,f"hist_horiz__{name}__metrics.pickle")
+            with open(dict_savename, "wb") as outp:
+                pickle.dump(metrics_dict, outp)    
\ No newline at end of file
diff --git a/PREPROCESS.py b/PREPROCESS.py
index a2b04a6..01fdb55 100755
--- a/PREPROCESS.py
+++ b/PREPROCESS.py
@@ -19,7 +19,7 @@
 #from statsmodels.tsa.seasonal import seasonal_decompose
 #stl = seasonal_decompose(x)
 
-from sklearn.preprocessing import Imputer
+#from sklearn.preprocessing import Imputer
 from collections import Counter
 
 from copy import deepcopy
@@ -407,7 +407,7 @@ def low_pass_filter(df, filter_type, kernel_size):
         
 
-def format_like_Kaggle(df, mode, myDataDir, imputation_method, sampling_period, do_augmentation, train_test_split_date, start_date=None, test_end_date=None):
+def format_like_Kaggle(df, mode, myDataDir, imputation_method, sampling_period, do_augmentation, validation_method, train_test_split_date, start_date=None, test_end_date=None, chunk_name=None):
     """
     Take my data and format it exactly as needed to use for the Kaggle seq2seq
     model [requires making train_1.csv, train_2.csv, key_1.csv, key_2.csv]
@@ -517,17 +517,24 @@ def make_index_col_left(df):
         if mode=='TRAIN':
             latest = min(latest, train_test_split_date)   
 
-        
         if mode=='TEST':
             #In TEST mode, to have a COMPLETELY distinct test set, start from day after last day of taining set:
             #(this means in TEST phase, not even the known history will overlap with the training set, 
             #which arguably wiould be ok as long as the horizon is completely outside the training data,
             #but to be extra conservative, do this):
             assert (earliest < train_test_split_date), 'TRAIN end date (/TEST start date) must be after start of data'
-            next_day = pd.to_datetime(train_test_split_date) + pd.Timedelta(1,unit='D')
-#            next_day_string = next_day.dt.strftime('%Y-%m-%d')
-            next_day_string = next_day.strftime('%Y-%m-%d')
-            earliest = max(earliest,next_day_string)
+            
+            #For backtest chunking vs. disjoint train-test split, test set date range is defined differently:
+            if validation_method=='disjoint':
+                next_day = pd.to_datetime(train_test_split_date) + pd.Timedelta(1,unit='D')
+                next_day_string = next_day.strftime('%Y-%m-%d')
+                earliest = max(earliest,next_day_string)
+            #for backtesting in chunks, always have the same start date, which is same as the train start date:
+            #Just resuse the already defined earliest date from training set
+            #elif validation_method=='backtest_chunks':
+            #    earliest = max(earliest,next_day_string)            
+            
+            
             
             if test_end_date:
                 #In TEST mode, if there is a manually defined end date, clip there:
@@ -638,8 +645,11 @@ def make_key_csv(df):
     
     #Make the train csv [for now just do 1, ignore the train 2 part ???]
     #save_path = os.path.join(os.path.split(myDataDir)[0],f"train_2[ours_{sampling_period}].csv")
-    suffix = '_TEST' if mode=='TEST' else ''
-    save_path = os.path.join(os.path.split(myDataDir)[0],"train_2_ours_{}{}.csv".format(sampling_period,suffix))
+    suffix = 'TEST' if mode=='TEST' else 'TRAIN'
+    ind = chunk_name if chunk_name else ''
+#    augmented = 'augmented' if do_augmentation else ''
+#    save_path = os.path.join(os.path.split(myDataDir)[0],"ours_{}{}__{}{}.csv".format(sampling_period,augmented,suffix,ind))
+    save_path = os.path.join(os.path.split(myDataDir)[0],"ours_{}_{}{}.csv".format(sampling_period,suffix,ind))    
     df = make_train_csv(df, mode, save_path, imputation_method, sampling_period, start_date, test_end_date)
 
     #For the prediction phase, need the key ????
@@ -665,56 +675,131 @@ def make_key_csv(df):
     # =============================================================================
     #     PARAMETERS
     # =============================================================================
+    RANDOM_SEED = 123456   
+    #Seed random number generator [holdout ID's, data augmentation]
+    np.random.seed(RANDOM_SEED)
+    
+    
     # TOTAL COMPLETED TRIPS:
     #myDataDir_TRAIN = r"/Users/kocher/Desktop/forecasting/exData/totalCTDaily"
     myDataDir_TRAIN = r"/Users/kocher/Desktop/forecasting/exData/totalCTDaily___2018"#Since the test data is just the same data, but a superset, just use it for consistency
     myDataDir_TEST = r"/Users/kocher/Desktop/forecasting/exData/totalCTDaily___2018"     
     IMPUTATION_METHOD = 'lagKmedian' #'median' #'STL' #'lagKmedian' #None
-    START_DATE = '2015-01-01' #None
-    TEST_END_DATE = '2018-07-05' #None #'2018-07-05' just trim off 2 rightmost days since many cities NAN on 7/7/18
-    TRAIN_TEST_SPLIT_DATE = '2017-04-30' #The last day date to include in training set [and 1st NEW day of test set will be the next day.]
     REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful
+    HOLDOUT_ID_LIST = list(np.random.choice(1800,30,replace=False)) #As a holdout set, exclude a random ~30 ID's  (approximate, since not all ID's 1 to N are present, so will be fewer than Nchose)
     SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly'
-    DO_AUGMENTATION = False #False #True
-    RANDOM_SEED = None
+    DO_AUGMENTATION = True #False #True
+    
+    #Regardless of validation method, just use this 
+    START_DATE = '2015-01-01' #None
+    
+    #Whether to do a single disjoint train-test split, vs. backtest in chunks that partially overlap
+    VALIDATION_METHOD = 'backtest_chunks' #'disjoint'
+    
+    
+    
+
 
     # =============================================================================
     #     MAIN
     # =============================================================================
    
-    print('START_DATE',START_DATE)
-    print('TEST_END_DATE',TEST_END_DATE)
-    print('TRAIN_TEST_SPLIT_DATE',TRAIN_TEST_SPLIT_DATE)
+
     print('DO_AUGMENTATION',DO_AUGMENTATION)
     print('RANDOM_SEED',RANDOM_SEED)        
     print('REMOVE_ID_LIST',REMOVE_ID_LIST)
+    print('HOLDOUT_ID_LIST',HOLDOUT_ID_LIST)
     print('IMPUTATION_METHOD',IMPUTATION_METHOD)
     print('myDataDir_TRAIN',myDataDir_TRAIN)
     print('myDataDir_TEST',myDataDir_TEST)
     print('SAMPLING_PERIOD',SAMPLING_PERIOD)
+    print('VALIDATION_METHOD',VALIDATION_METHOD)
+    
+
     
 
-    #Seed random number generator in case of doing data augmentation:
-    np.random.seed(RANDOM_SEED)
     
     
-    #For TRAIN and TEST data
-    modes = ['TRAIN','TEST']
-    for i, myDataDir in enumerate([myDataDir_TRAIN,myDataDir_TEST]):
-        mode=modes[i]
-        print(mode)
-        #Don't do augmentation for test phase [test only on real]
-        if i==1:
-            DO_AUGMENTATION=False
+    #If doing a single test set, completely disjoint from training set:
+    if VALIDATION_METHOD == 'disjoint':
         
-        #Load
-        df = load_my_data(myDataDir)
+        TEST_END_DATE = '2018-07-05' #None #'2018-07-05' just trim off 2 rightmost days since many cities NAN on 7/7/18
+        TRAIN_TEST_SPLIT_DATE = '2017-04-30' #The last day date to include in training set [and 1st NEW day of test set will be the next day.]
+        print('START_DATE',START_DATE)
+        print('TRAIN_TEST_SPLIT_DATE',TRAIN_TEST_SPLIT_DATE)
+        print('TEST_END_DATE',TEST_END_DATE)        
         
-        #Remove any bad/irrelevant cities
-        df = remove_cities(df,REMOVE_ID_LIST)
+        #For TRAIN and TEST data
+        modes = ['TRAIN','TEST']
+        for i, myDataDir in enumerate([myDataDir_TRAIN,myDataDir_TEST]):
+            mode=modes[i]
+            print(mode)
+            #Don't do augmentation for test phase [test only on real]
+            #Actually, just leave it same as however things were trained, but then separate performance statistics later so can still see how this affects things
+            #if i==1:
+            #    DO_AUGMENTATION=False
+            
+            #Load
+            df = load_my_data(myDataDir)
+            
+            #Remove any bad/irrelevant cities
+            df = remove_cities(df,REMOVE_ID_LIST)
+            
+            #Remove random holdout IDs [for TRAIN only: still want to test on them]:
+            if mode=='TRAIN':
+                df = remove_cities(df,HOLDOUT_ID_LIST)
+            
+            #Put into same format as used by Kaggle, save out csv's    
+            df = format_like_Kaggle(df, mode, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, DO_AUGMENTATION, VALIDATION_METHOD, TRAIN_TEST_SPLIT_DATE, start_date=START_DATE, test_end_date=TEST_END_DATE, chunk_name=None)
+            
+            print('Finished with ', mode)
+
+
+
+    #If doing a single test set, completely disjoint from training set:
+    elif VALIDATION_METHOD == 'backtest_chunks':
         
-        #Put into same format as used by Kaggle, save out csv's    
-        df = format_like_Kaggle(df, mode, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, DO_AUGMENTATION, TRAIN_TEST_SPLIT_DATE, start_date=START_DATE, test_end_date=TEST_END_DATE)
+        chunks_dict = {'set1':(START_DATE,'2017-07-05','2017-10-05'),
+                       'set2':(START_DATE,'2017-10-05','2018-01-05'),
+                       'set3':(START_DATE,'2018-01-05','2018-04-05'),
+                       'set4':(START_DATE,'2018-04-05','2018-07-05'),
+                       }
         
-        print('Finished with ', mode)
+        print(chunks_dict)
+        for k,v in chunks_dict.items():
+        
+            chunk_name=k
+            start = v[0] #train and test both use same start date in this backtest mode
+            
+            train_last_day = v[1]
+            test_end = v[2]
+        
+            print('chunk name : ', chunk_name)
+            print('START_DATE',start)
+            print('TRAIN_TEST_SPLIT_DATE',train_last_day)  
+            print('TEST_END_DATE',test_end)
+            
+            #For TRAIN and TEST data
+            modes = ['TRAIN','TEST']
+            for i, myDataDir in enumerate([myDataDir_TRAIN,myDataDir_TEST]):
+                mode=modes[i]
+                print(mode)
+                #Actually, just leave it same as however things were trained, but then separate performance statistics later so can still see how this affects things
+                #if i==1:
+                #    DO_AUGMENTATION=False
+                
+                #Load
+                df = load_my_data(myDataDir)
+                
+                #Remove any bad/irrelevant cities
+                df = remove_cities(df,REMOVE_ID_LIST)
+                
+                #Remove random holdout IDs [for TRAIN only: still want to test on them]:
+                if mode=='TRAIN':
+                    df = remove_cities(df,HOLDOUT_ID_LIST)                
+                
+                #Put into same format as used by Kaggle, save out csv's    
+                df = format_like_Kaggle(df, mode, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, DO_AUGMENTATION, VALIDATION_METHOD, train_last_day, start_date=start, test_end_date=test_end, chunk_name=chunk_name)
+                
+                print('Finished with ', mode)
\ No newline at end of file
diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index e4a4c78..63fc7df 100755
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -22,21 +22,21 @@
 # 
 # =============================================================================
 #For histories, we care most about shorter series, so sample lower numbers more densely
-HISTORY_SIZES=[7,8,50]#[7,8,10,12,15,20,30,50,70,100,150,250,366]
-HORIZON_SIZES=[30,60]#[7,10,20,30,40,50,60]
+HISTORY_SIZES=[250]#,8,50]#[7,8,10,12,15,20,30,50,70,100,150,250,366]
+HORIZON_SIZES=[100]#[7,10,20,30,40,50,60]
 EVAL_STEP_SIZE=4#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
 
 # =============================================================================
 # PARAMETRS
 # =============================================================================
-TEST_DF_PATH = r"data/train_2_ours_daily_TEST.csv"
-TEST_dir = r"data/vars_TEST"
+TEST_DF_PATH = r"data/ours_daily_TEST{}.csv"
+TEST_dir = r"data/vars_TEST{}"
 FEATURES_SET = 'full'# 'arturius' 'simple' 'full'
 SAMPLING_PERIOD = 'daily'
 DATA_TYPE = 'ours' #'kaggle' #'ours'
 Nmodels = 3
-PARAM_SETTING = 's32' #Which of the parameter settings to use [s32 is the default Kaggle one, with a few thigns modified as I want]
-PARAM_SETTING_FULL_NAME = hparams.params_s32 #Which of the parameter settings to use corresponding to the PARAM_SETTING. The mapping is defined in hparams.py at the end in "sets = {'s32':params_s32,..."
+PARAM_SETTING = 'encdec' #Which of the parameter settings to use [s32 is the default Kaggle one, with a few thigns modified as I want]
+PARAM_SETTING_FULL_NAME = hparams.params_encdec #Which of the parameter settings to use corresponding to the PARAM_SETTING. The mapping is defined in hparams.py at the end in "sets = {'s32':params_s32,..."
 OUTPUT_DIR = 'output'
 
 SAVE_PLOTS = False
@@ -268,8 +268,19 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                     series = f_preds.iloc[jj]
                     _id = series.name
                     true = groundtruth[groundtruth['Page'].astype(str) ==_id]
+                    
+                    
+                    first_pred_day = dates[0]
+                    d1 = pd.date_range(first_pred_day,first_pred_day)[0] - pd.Timedelta(history,unit='D')
+                    history_dates = pd.date_range(start=d1, end=first_pred_day, freq='D')[:-1]   #!!!!!! asuming daily sampling...
+                    history_dates = [i.strftime('%Y-%m-%d') for i in history_dates]
+                    history_missing_count = np.isnan(true[history_dates].values[0]).sum()
+#                    print('history_missing_count',history_missing_count)                    
 #                    print('true',true)
                     true = true[dates].values[0]
+                    horizon_missing_count = np.isnan(true).sum()
+#                    print('horizon_missing_count',horizon_missing_count)
+                    
                     #Get smape, mae, bias over this prediction
                     smp = mean_smape(true, series.values)
 #                    mae = asdasdasd
@@ -279,7 +290,10 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                                     'bias':bi,
                                     #'MAE':mae,
                                     'predict_start_date':dates[0],
-                                    'predict_end_date':dates[-1]}
+                                    'predict_end_date':dates[-1],
+                                    'history_missing_count':history_missing_count,
+                                    'horizon_missing_count':horizon_missing_count
+                                    }
 #                    print(hist_horiz__all)
                     
                     
diff --git a/Readme.md b/Readme.md
index f106791..8a51b93 100755
--- a/Readme.md
+++ b/Readme.md
@@ -69,12 +69,24 @@ rm *.pkl
 cd ..
 ll data/
 
-python3 make_features.py train ours daily full --add_day=0
-python3 make_features.py test ours daily full --add_days=0
+python3 make_features.py data/TRAINset1 ours daily full --add_days=0
+python3 make_features.py data/TESTset1 ours daily full --add_days=0
+
+python3 trainer.py full daily --name=train1 --hparam_set=encdec --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=3 --patience=5 --max_epoch=5 --save_epochs_performance
+
+
+
+
+
+
+
+
+#python3 make_features.py train ours daily full --add_days=0
+#python3 make_features.py test ours daily full --add_days=0
 
 #python3 make_features.py data/vars kaggle daily full --add_days=63
 
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=3 --patience=5 --max_epoch=25 --save_epochs_performance
+python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=3 --patience=5 --max_epoch=50 --save_epochs_performance
 
 python3 RUN_ALL_PREDICTIONS.py
 
diff --git a/holiday_features.py b/holiday_features.py
index f2f6d7c..8c245ff 100755
--- a/holiday_features.py
+++ b/holiday_features.py
@@ -117,6 +117,7 @@ def spiral_encoding(dates_range, holiday_date, shoulder):
     #Fixed Holidays [add other international ones as needed]:
     xmas_dates = get_fixed_date_holidays__daily(dates_range, '12-25')
     new_years_dates = get_fixed_date_holidays__daily(dates_range, '01-01')
+    st_patricks_dates = get_fixed_date_holidays__daily(dates_range, '03-17')
     july4_dates = get_fixed_date_holidays__daily(dates_range, '07-04')
     halloween_dates = get_fixed_date_holidays__daily(dates_range, '10-31')
     cincodemayo_dates = get_fixed_date_holidays__daily(dates_range, '05-05')
@@ -134,11 +135,13 @@ def spiral_encoding(dates_range, holiday_date, shoulder):
     #Custom / Single Event moving Holidays
     suberbowl_dates = ['2014-02-02','2015-02-01','2016-02-07','2017-02-05','2018-02-04','2019-02-03']
     
+    
     #Dict of holiday dates: shoulder halfwidth  [-S, -S+1, ..., holiday, holiday+1, ..., holiday+S]
     #for now just use 3 as the shoulder width for all "major" holidays, 0 or 1 for "minor" holidays
     #Use ODD numbers for shoulder sizes
     holidays = {'xmas_dates':(xmas_dates,3),
                 'new_years_dates':(new_years_dates,3),
+                'st_patricks_dates':(st_patricks_dates,1),
                 'july4_dates':(july4_dates,1),
                 'halloween_dates':(halloween_dates,1),
                 'cincodemayo_dates':(cincodemayo_dates,1),
diff --git a/hparams.py b/hparams.py
index 2b05551..dbbba9a 100755
--- a/hparams.py
+++ b/hparams.py
@@ -1,8 +1,8 @@
 import tensorflow.contrib.training as training
-import re
+#import re
 
 # Manually selected params
-params_s32 = dict(
+params_encdec = dict(
     batch_size=123,#256,
     #train_window=380,
 #    train_window=283,#now make this a bash input to do train-validation window size performance heatmaps
@@ -41,275 +41,71 @@
     decoder_stability_loss=0.0, # max 100
     decoder_activation_loss=5e-06,  # max 0.001
     
-    #Kaggle model architecture is more like a basic many-to-many RNN, not really a
-    #usual encoder-decoder architecture since computational graph does not have 
-    #connections from encoded representation to each decoder time step (only to 1st
-    #decoder timestep). Set below to True to use encoder-decoder; set False to use
-    #Kaggle architecture not really true encoder-decoder
-    RECURSIVE_W_ENCODER_CONTEXT=True,
     
+    
+    # =============================================================================
+    # RANDOMIZING OVER WINDOW SIZES (in training only)
+    # =============================================================================
     #Instead of fixed size windows, do training phase over range of window sizes
     #drawn uniformly from [a,b]. Another form of randomization/regularization, 
     #but more importantly this way model can generalize to different lengths so
     #we can more fairly assess performance over range of history/horizon windows:
     history_window_size_minmax=[7,365],
-    horizon_window_size_minmax=[7,60],
+    horizon_window_size_minmax=[7,60],    
+
+    
+    
+    # =============================================================================
+    # DECODER OPTIONS
+    # =============================================================================
     
+    # CONTEXT
+    #Kaggle model architecture is more like a basic many-to-many RNN, not really a
+    #usual encoder-decoder architecture since computational graph does not have 
+    #connections from encoded representation to each decoder time step (only to 1st
+    #decoder timestep). Set below to True to use encoder-decoder; set False to use
+    #Kaggle architecture not really true encoder-decoder
+    RECURSIVE_W_ENCODER_CONTEXT=True,
+
+    # LAGGED FEATURES / LOOKBACK
     #Lookback K steps: [without specifying, default previous Kaggle setting is K=1]:
     #for predicting y_i, insteda of just feeding in previous K=1 prediction (y_i-1),
     #feed in all previous K predictions: y_
     LOOKBACK_K = 3, #!!!!Can NOT set this to be bigger than min history size (history_window_size_minmax[0])
     #since then depending on random draw would possibly need to look back further than history size.
     
-    #True or False to use MLP module postprocessor to locally adjust estimates
-    DO_MLP_POSTPROCESS=False,
-)
-
-
-
-
-
-# Test setting with multiple attention heads
-#python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --no_eval --no_forward_split --asgd_decay=0.99 --max_steps=11500 --save_from_step=200
-params_TEST_attn_head = dict(
-    batch_size=64,#256,
-    train_window=283,
-    train_skip_first=0,
-    rnn_depth=27,#267,
-    use_attn=True,#!!!!!!!!!!!!!!!! Set True
-    attention_depth=64,
-    attention_heads=1,#!!!!!!
-    encoder_readout_dropout=0.4768781146510798,
-
-    encoder_rnn_layers=1,
-    decoder_rnn_layers=1,
-
-    # decoder_state_dropout_type=['outside','outside'],
-    decoder_input_dropout=[1.0, 1.0, 1.0],
-    decoder_output_dropout=[0.975, 1.0, 1.0],  # min 0.95
-    decoder_state_dropout=[0.99, 0.995, 0.995],  # min 0.95
-    decoder_variational_dropout=[False, False, False],
-    decoder_candidate_l2=0.0,
-    decoder_gates_l2=0.0,
-
-    fingerprint_fc_dropout=0.8232342370695286,
-    gate_dropout=0.9967589439360334,#0.9786,
-    gate_activation='none',
-    encoder_dropout=0.030490422531402273,
-    encoder_stability_loss=0.0,  # max 100
-    encoder_activation_loss=1e-06, # max 0.001
-    decoder_stability_loss=0.0, # max 100
-    decoder_activation_loss=5e-06,  # max 0.001
-)
-
-
-
-# Test setting with multiple GRU/LSTM layers
-#python3 trainer.py --name TEST_stacked --hparam_set=TEST_stacked --n_models=3 --no_eval --no_forward_split --asgd_decay=0.99 --max_steps=11500 --save_from_step=200
-params_TEST_stacked = dict(
-    batch_size=256,
-    #train_window=380,
-    train_window=283,
-    train_skip_first=0,
-    rnn_depth=267,
-    use_attn=False,
-    attention_depth=64,
-    attention_heads=1,
-    encoder_readout_dropout=0.4768781146510798,
-
-    encoder_rnn_layers=2,
-    decoder_rnn_layers=2,
-
-    # decoder_state_dropout_type=['outside','outside'],
-    decoder_input_dropout=[1.0, 1.0, 1.0],
-    decoder_output_dropout=[0.975, 1.0, 1.0],  # min 0.95
-    decoder_state_dropout=[0.99, 0.995, 0.995],  # min 0.95
-    decoder_variational_dropout=[False, False, False],
-    decoder_candidate_l2=0.0,
-    decoder_gates_l2=0.0,
-    fingerprint_fc_dropout=0.8232342370695286,
-    gate_dropout=0.9967589439360334,#0.9786,
-    gate_activation='none',
-    encoder_dropout=0.030490422531402273,
-    encoder_stability_loss=0.0,  # max 100
-    encoder_activation_loss=1e-06, # max 0.001
-    decoder_stability_loss=0.0, # max 100
-    decoder_activation_loss=5e-06,  # max 0.001
-)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# Default incumbent on last smac3 search
-params_definc = dict(
-    batch_size=256,
-    train_window=100,
-    train_skip_first=0,
-    rnn_depth=128,
-    use_attn=True,
-    attention_depth=64,
-    attention_heads=1,
-    encoder_readout_dropout=0.4768781146510798,
-
-    encoder_rnn_layers=1,
-    decoder_rnn_layers=1,
-
-    decoder_input_dropout=[1.0, 1.0, 1.0],
-    decoder_output_dropout=[1.0, 1.0, 1.0],
-    decoder_state_dropout=[0.995, 0.995, 0.995],
-    decoder_variational_dropout=[False, False, False],
-    decoder_candidate_l2=0.0,
-    decoder_gates_l2=0.0,
-    fingerprint_fc_dropout=0.8232342370695286,
-    gate_dropout=0.8961710392091516,
-    gate_activation='none',
-    encoder_dropout=0.030490422531402273,
-    encoder_stability_loss=0.0,
-    encoder_activation_loss=1e-05,
-    decoder_stability_loss=0.0,
-    decoder_activation_loss=5e-05,
-)
-
-# Found incumbent 0.35503610596060753
-#"decoder_activation_loss='1e-05'", "decoder_output_dropout:0='1.0'", "decoder_rnn_layers='1'", "decoder_state_dropout:0='0.995'", "encoder_activation_loss='1e-05'", "encoder_rnn_layers='1'", "gate_dropout='0.7934826952854418'", "rnn_depth='243'", "train_window='135'", "use_attn='1'", "attention_depth='17'", "attention_heads='2'", "encoder_readout_dropout='0.7711751356092252'", "fingerprint_fc_dropout='0.9693950737901414'"
-params_foundinc = dict(
-    batch_size=256,
-    train_window=135,
-    train_skip_first=0,
-    rnn_depth=243,
-    use_attn=True,
-    attention_depth=17,
-    attention_heads=2,
-    encoder_readout_dropout=0.7711751356092252,
-
-    encoder_rnn_layers=1,
-    decoder_rnn_layers=1,
-
-    decoder_input_dropout=[1.0, 1.0, 1.0],
-    decoder_output_dropout=[1.0, 1.0, 1.0],
-    decoder_state_dropout=[0.995, 0.995, 0.995],
-    decoder_variational_dropout=[False, False, False],
-    decoder_candidate_l2=0.0,
-    decoder_gates_l2=0.0,
-    fingerprint_fc_dropout=0.9693950737901414,
-    gate_dropout=0.7934826952854418,
-    gate_activation='none',
-    encoder_dropout=0.0,
-    encoder_stability_loss=0.0,
-    encoder_activation_loss=1e-05,
-    decoder_stability_loss=0.0,
-    decoder_activation_loss=1e-05,
-)
-
-# 81 on smac_run0 (0.3552077534247418 x 7)
-#{'decoder_activation_loss': 0.0, 'decoder_output_dropout:0': 0.85, 'decoder_rnn_layers': 2, 'decoder_state_dropout:0': 0.995,
-# 'encoder_activation_loss': 0.0, 'encoder_rnn_layers': 2, 'gate_dropout': 0.7665920904244501, 'rnn_depth': 201,
-#  'train_window': 143, 'use_attn': 1, 'attention_depth': 17, 'attention_heads': 2, 'decoder_output_dropout:1': 0.975,
-# 'decoder_state_dropout:1': 0.99, 'encoder_dropout': 0.0304904225, 'encoder_readout_dropout': 0.4444295965935664, 'fingerprint_fc_dropout': 0.26412480387331017}
-params_inst81 = dict(
-    batch_size=256,
-    train_window=143,
-    train_skip_first=0,
-    rnn_depth=201,
-    use_attn=True,
-    attention_depth=17,
-    attention_heads=2,
-    encoder_readout_dropout=0.4444295965935664,
 
-    encoder_rnn_layers=2,
-    decoder_rnn_layers=2,
 
-    decoder_input_dropout=[1.0, 1.0, 1.0],
-    decoder_output_dropout=[0.85, 0.975, 1.0],
-    decoder_state_dropout=[0.995, 0.99, 0.995],
-    decoder_variational_dropout=[False, False, False],
-    decoder_candidate_l2=0.0,
-    decoder_gates_l2=0.0,
-    fingerprint_fc_dropout=0.26412480387331017,
-    gate_dropout=0.7665920904244501,
-    gate_activation='none',
-    encoder_dropout=0.0304904225,
-    encoder_stability_loss=0.0,
-    encoder_activation_loss=0.0,
-    decoder_stability_loss=0.0,
-    decoder_activation_loss=0.0,
+    # =============================================================================
+    # COMPLETELY DIFFERENT DECODERS    
+    # =============================================================================
+    # Alternative decoders. Can only do one of these (cannot have both True)
+    
+    # MLP POSTPROCESSOR (ADJUST PREDICTIONS IN LOCAL WINDOWS, AND CAN DO QUANTILES)
+    #True or False to use MLP module postprocessor to locally adjust estimates
+    DO_MLP_POSTPROCESS=False,#True,#False
+    MLP_POSTPROCESS__KERNEL_SIZE=15,
+    MLP_POSTPROCESS__KERNEL_OFFSET=7,
+    #If doing quantile regression in addition to point estimates trained to minimize SMAPE.
+    #Also, since SMAPE point estimates are biased positive, can use alternative
+    #point estimator trainde by pinball loss on quantiles < 50 [e.g. 45,38, etc., see what has bias ~0]
+    #To not use quantile regression, just leave list empty
+    MLP_POSTPROCESS__QUANTILES = [.45,.47,.48],#[None],#[.20, .30, .40, .50, ,75, .90] #ValueError: Multi-valued hyperparameters cannot be empty: QUANTILES   ->  so make it a list with "None" in it    
+    
+    
+    # DIRECT MLP DECODER (REPLACE RNN CELLS IN DECODER WITH MLP MODULES, AND DO QUANTILES)
+    #Do a direct, quantile forecast by using an MLP as decoder module instead of RNN/LSTM/GRU cells:
+    MLP_DIRECT_DECODER=False
 )
-# 121 on smac_run0 (0.3548671560628074 x 3)
-# {'decoder_activation_loss': 1e-05, 'decoder_output_dropout:0': 0.975, 'decoder_rnn_layers': 2, 'decoder_state_dropout:0': 1.0,
-# 'encoder_activation_loss': 1e-05, 'encoder_rnn_layers': 1, 'gate_dropout': 0.8631496699358483, 'rnn_depth': 122,
-#  'train_window': 269, 'use_attn': 1, 'attention_depth': 29, 'attention_heads': 4, 'decoder_output_dropout:1': 0.975,
-# 'decoder_state_dropout:1': 0.975, 'encoder_readout_dropout': 0.9835390239895767, 'fingerprint_fc_dropout': 0.7452161827064421}
 
-# 83 on smac_run1 (0.355050330259362 x 7)
-# {'decoder_activation_loss': 1e-06, 'decoder_output_dropout:0': 0.925, 'decoder_rnn_layers': 2, 'decoder_state_dropout:0': 0.98,
-#  'encoder_activation_loss': 1e-06, 'encoder_rnn_layers': 1, 'gate_dropout': 0.9275441207192259, 'rnn_depth': 138,
-# 'train_window': 84, 'use_attn': 1, 'attention_depth': 52, 'attention_heads': 2, 'decoder_output_dropout:1': 0.925,
-# 'decoder_state_dropout:1': 0.98, 'encoder_readout_dropout': 0.6415488109353416, 'fingerprint_fc_dropout': 0.2581296623398802}
 
 
-params_inst83 = dict(
-    batch_size=256,
-    train_window=84,
-    train_skip_first=0,
-    rnn_depth=138,
-    use_attn=True,
-    attention_depth=52,
-    attention_heads=2,
-    encoder_readout_dropout=0.6415488109353416,
 
-    encoder_rnn_layers=1,
-    decoder_rnn_layers=2,
 
-    decoder_input_dropout=[1.0, 1.0, 1.0],
-    decoder_output_dropout=[0.925, 0.925, 1.0],
-    decoder_state_dropout=[0.98, 0.98, 0.995],
-    decoder_variational_dropout=[False, False, False],
-    decoder_candidate_l2=0.0,
-    decoder_gates_l2=0.0,
-    fingerprint_fc_dropout=0.2581296623398802,
-    gate_dropout=0.9275441207192259,
-    gate_activation='none',
-    encoder_dropout=0.0,
-    encoder_stability_loss=0.0,
-    encoder_activation_loss=1e-06,
-    decoder_stability_loss=0.0,
-    decoder_activation_loss=1e-06,
-)
-
-def_params = params_s32
+def_params = params_encdec
 
 sets = {
-    's32':params_s32,
-    'definc':params_definc,
-    'foundinc':params_foundinc,
-    'inst81':params_inst81,
-    'inst83':params_inst83,
-    
-    'TEST_attn_head':params_TEST_attn_head,
-    'TEST_stacked':params_TEST_stacked,
+    'encdec':params_encdec,
 }
 
 
diff --git a/make_features.py b/make_features.py
index 73ab56f..2a15801 100755
--- a/make_features.py
+++ b/make_features.py
@@ -30,7 +30,7 @@ def read_cached(name) -> pd.DataFrame:
                 return df
 
 
-def read_all(data_type,sampling_period,mode) -> pd.DataFrame:
+def read_all(data_type,sampling_period,mode_chunk_name) -> pd.DataFrame:
     """
     Reads source data for training/prediction
     """
@@ -43,18 +43,12 @@ def read_file(file):
         return df
 
     # Path to cached data
-    if mode=='train':
-        path = os.path.join('data', 'all_TRAIN.pkl')
-    elif mode=='test':
-        path = os.path.join('data', 'all_TEST.pkl')
+    path = os.path.join('data', 'all_{}.pkl'.format(mode_chunk_name))
         
     if os.path.exists(path):
         df = pd.read_pickle(path)
     else:
-        end = '' if mode=='train' else '_TEST'
-        if data_type=='kaggle':
-            end='' 
-        filename = f'data/train_2_{data_type}_{sampling_period}{end}'
+        filename = f'data/{data_type}_{sampling_period}_{mode_chunk_name}'
         df = read_file(filename)
         
         if data_type=='kaggle':
@@ -85,11 +79,11 @@ def read_file(file):
 #    return result
 
 
-def read_x(start, end, data_type, sampling_period, mode) -> pd.DataFrame:
+def read_x(start, end, data_type, sampling_period, mode_chunk_name) -> pd.DataFrame:
     """
     Gets source data from start to end date. Any date can be None
     """
-    df = read_all(data_type,sampling_period,mode)
+    df = read_all(data_type,sampling_period,mode_chunk_name)
     # User GoogleAnalitycsRoman has really bad data with huge traffic spikes in all incarnations.
     # Wikipedia banned him, we'll ban it too
 #    bad_roman = df.index.str.startswith("User:GoogleAnalitycsRoman")
@@ -181,7 +175,7 @@ def find_start_end(data: np.ndarray):
     return start_idx, end_idx
 
 
-def prepare_data(start, end, valid_threshold, data_type, sampling_period, mode) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]:
+def prepare_data(start, end, valid_threshold, data_type, sampling_period, mode_chunk_name) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]:
     """
     Reads source data, calculates start and end of each series, drops bad series, calculates log1p(series)
     :param start: start date of effective time interval, can be None to start from beginning
@@ -190,7 +184,7 @@ def prepare_data(start, end, valid_threshold, data_type, sampling_period, mode)
     ratio is less than threshold
     :return: tuple(log1p(series), nans, series start, series end)
     """
-    df = read_x(start, end, data_type, sampling_period, mode)
+    df = read_x(start, end, data_type, sampling_period, mode_chunk_name)
     starts, ends = find_start_end(df.values)
     # boolean mask for bad (too short) series
     page_mask = (ends - starts) / df.shape[1] < valid_threshold
@@ -283,8 +277,8 @@ def normalize(values: np.ndarray):
 
 def run():
     parser = argparse.ArgumentParser(description='Prepare data')
-    #parser.add_argument('data_dir')
-    parser.add_argument('mode', help="Which mode running in, determines some directories: {'train','test'}")
+    parser.add_argument('data_dir',  help="Directory of pickles, etc., e.g. 'data/TRAINset4' or 'data/TESTset4'")
+#    parser.add_argument('mode', help="Which mode running in, determines some directories: {'train','test'}")
     
     parser.add_argument('data_type', help="Which data set to use: {'kaggle','ours'}")
     parser.add_argument('sampling_period', help="Sampling period for our data: {'daily','weekly','monthly'}")
@@ -298,14 +292,17 @@ def run():
     args = parser.parse_args()
 
 
-    if args.mode=='train':
-        data_dir = r"data/vars_TRAIN" 
-    elif args.mode=='test':
-        data_dir = r"data/vars_TEST"
-    print(data_dir, args.data_type, args.features_set)
+#    if args.mode=='train':
+#        data_dir = r"data/vars_TRAIN" 
+#    elif args.mode=='test':
+#        data_dir = r"data/vars_TEST"
+    
+    mode_chunk_name = args.data_dir.split('/')[-1]
+    
+    print(args.data_dir, args.data_type, args.features_set)
 
     # Get the data
-    df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type, args.sampling_period, args.mode)
+    df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type, args.sampling_period, mode_chunk_name)
 
     # =============================================================================
     # STATIC FEATURES
@@ -575,7 +572,7 @@ def run():
     print(plain.keys())
 
     # Store data to the disk
-    VarFeeder(data_dir, tensors, plain)
+    VarFeeder(args.data_dir, tensors, plain)
 
 
 if __name__ == '__main__':
diff --git a/model.py b/model.py
index b8c0ab7..34cf996 100644
--- a/model.py
+++ b/model.py
@@ -196,55 +196,6 @@ def make_fingerprint(x, is_train, fc_dropout, seed):
 
 
-def MLP_postprocess(decoder_outputs, decoder_inputs, kernel_size, kernel_offset, seed):
-    """
-    As a postprocess decoder step:
-    
-    After the decoder has output predictions for each decoder timestep 
-    (in standard mode this is a vector of predictions: one per timestep
-    vs. if doing quantile regression, is a matrix timesteps x quantiles...)
-    
-    Refine those predictions by taking into account the neighboring predictions.
-    Ideally, the RNN/LSTM/GRU state would contain all necessary information about state 
-    to make good predictions. In reality, might be asking too much, could be useful 
-    to do a second pass over the outputs to improve predictions.
-    
-    E.g. if encoding shock events: binary 0,1 with 1 on change day:
-    if there is no pre-event shoulder, no information on Monday takes advantage 
-    of known change about to ocur on Tuesday. This postprocessor will help with that.
-    
-    kernel_size - int
-    The size, in #timesteps, of the MLP postprocess kernel.
-    For a given timestep of decoder, a window containing that day is used to 
-    hard select which nearby timesteps are allowed to contribute to refinement 
-    for that timestep. I.e. bigger kernel means looking at more nearby RNN outputs.
-
-    kernel_offset - int
-    The offset, in #timesteps, of the MLP postprocess kernel from the given timestep under consideration.
-    0 offset means kernel has rightmost index exactly on the given timestep,
-    1 offset means the rightmost timestep included in the postprocess window is 
-    the timestep exactly 1 position right of the timestep under consideration, ..., 
-    ...
-    K-1 is when the kernel window is trasnlated as far as possible to the right,
-    i.e. has the given timestep as the LEFTmost position, and K-1 positions to the right of the day in question.
-    In other words, offset tells you how many positions to the right of the given timestep are included in the kernel window.
-    
-    For this mini feedforward net, just use 1 hidden layer with nonlinearity, and for #of nodes there, just make it half of #input nodes to this MLP.
-    """
-#    with tf.variable_scope("MLP_postprocess"):
-#        with tf.variable_scope('mlp',
-#                               initializer=layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN', seed=seed)):
-#            flattend_input = tf.concat([asdasd,asd,asd])
-#            decoder_outs_windowed, decoder_ins__windowed = asdasdasd func of kernel sizes...
-#            N_nodes = 
-#            fc_encoder = tf.layers.dense(cnn_out, 512, activation=selu, name='fc_encoder')
-#            out_encoder = tf.layers.dense(fc_encoder, 16, activation=selu, name='out_encoder')
-#    return out_encoder
-
-
-
-
-
 def attn_readout_v3(readout, attn_window, attn_heads, page_features, seed):
     # input: [n_days, batch, readout_depth]
     # [n_days, batch, readout_depth] -> [batch(readout_depth), width=n_days, channels=batch]
@@ -510,9 +461,16 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
 
 
 #        inp.time_x = tf.Print(inp.time_x, ['where NANs in inp.time_x :', tf.where(tf.is_nan(inp.time_x))])
+#        inp.time_x = tf.Print(inp.time_x, ['inp.time_x :', tf.shape(inp.time_x),inp.time_x])
+        
+        
 #        inp.time_x = tf.check_numerics(inp.time_x,'inp.time_x has NANs')
-
-
+#        inp.time_y = tf.Print(inp.time_y, ['inp.time_y :', tf.shape(inp.time_y),inp.time_y])
+#        inp.norm_x = tf.Print(inp.norm_x, ['inp.norm_x :', tf.shape(inp.norm_x),inp.norm_x])
+#        inp.time_x = tf.Print(inp.time_x, ['inp.time_x[:,:,0] :', tf.shape(inp.time_x[:,:,0]),inp.time_x[:,:,0]])
+#        gggg = tf.reduce_mean(tf.cast(tf.equal(inp.norm_x, inp.time_x[:,:,0]), tf.int32))
+#        inp.time_x = tf.Print(inp.time_x, ['mean tf.equal(inp.norm_x, inp.time_x[:,:,0]) should be 1 :', gggg])        
+        
 
         encoder_output, h_state, c_state = make_encoder(inp.time_x, inp.encoder_features_depth, is_train, hparams, seed,
                                                         transpose_output=False)
@@ -555,22 +513,25 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
                                                         attn_features if hparams.use_attn else None,
                                                         summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
                                                         inp.time_y, inp.norm_x[:, -self.lookback_K_actual:]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
-#        decoder_targets = tf.Print(decoder_targets,['decoder_targets',decoder_targets,'decoder_outputs',decoder_outputs])        
-        
-        
-        #If doing the MLP postprocessing step to adjust predictions:
-#        if self.DO_MLP_POSTPROCESS:
-#            self.kernel_size
-#            self.kernel_offset
-#            decoder_targets, decoder_outputs = self.MLP_postprocess(encoder_state,
-#                                                            attn_features if hparams.use_attn else None,
-#                                                            summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
-#                                                            inp.time_y, inp.norm_x[:, -self.kernel_size+self.kernel_offset:])  
-#            decoder_targets = tf.Print(decoder_targets,['decoder_targets',decoder_targets,'decoder_outputs',decoder_outputs])        
         
+#        decoder_targets = tf.Print(decoder_targets,['decoder_targets BEFORE',tf.shape(decoder_targets),decoder_targets,'decoder_outputs',tf.shape(decoder_outputs),decoder_outputs])
         
         
+        #If doing the MLP postprocessing step to adjust predictions:
+        if self.DO_MLP_POSTPROCESS:
+            #Could 0 pad, but for now instead just ensure kernelsize and offset are such that
+            #thre are no encoder-side issues with kernel extending beyond history (relevant for very short histories)
+            #Offset can stay the same (represents number of positions to RIGHT of given position),
+            #but adjust kernel size to fit:
+            self.offset = hparams.MLP_POSTPROCESS__KERNEL_OFFSET
+            leftside = hparams.MLP_POSTPROCESS__KERNEL_SIZE -1 -self.offset
+            self.actual_kernel_size = min(leftside, hparams.history_window_size_minmax[0]) +1 +self.offset
+            decoder_targets = self.MLP_postprocess(decoder_targets, inp.time_y, 
+                                                   inp.time_x, 
+                                                   summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None, 
+                                                   self.actual_kernel_size, self.offset, None)
+#        decoder_targets = tf.Print(decoder_targets,['decoder_targets AFTER postprocess',decoder_targets,'decoder_outputs',decoder_outputs])        
 #        decoder_targets = tf.Print(decoder_targets,['encoder_state',encoder_state,'inp.time_y',inp.time_y,'inp.norm_x',inp.norm_x])
 #        decoder_targets = tf.Print(decoder_targets,['decoder_targets',decoder_targets,'decoder_outputs',decoder_outputs])
         
@@ -780,10 +741,16 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
 #        targets_ta_tensor = tf.Print(targets_ta_tensor,[targets_ta_tensor])
 #        print('targets_ta',targets_ta)
 #        print('outputs_ta',outputs_ta)
+        
+        
         # Get final tensors from buffer arrays
+#        targets_ta = tf.Print(targets_ta,['targets_ta',targets_ta,tf.shape(targets_ta)])
+#        print('targets_ta',targets_ta)
         targets = targets_ta.stack()
         # [time, batch_size, 1] -> [time, batch_size]
         targets = tf.squeeze(targets, axis=-1)
+#        print('targets after squeeze',targets)
+#        targets = tf.Print(targets,['targets after squeeze',targets,tf.shape(targets)])
         raw_outputs = outputs_ta.stack() if return_raw_outputs else None
 
 #        print('targets',targets)
@@ -796,4 +763,226 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar
 #        raw_outputs = tf.Print(raw_outputs, print_list)
         
         return targets, raw_outputs
+    
+    
+    
+    def MLP_postprocess(self, decoder_targets, decoder_features, time_x, summary_z, cropped_kernel_size, offset, seed):#kernel_size, kernel_offset, seed):
+        """
+        As a postprocess decoder step:
+        
+        After the decoder has output predictions for each decoder timestep 
+        (in standard mode this is a vector of predictions: one per timestep
+        vs. if doing quantile regression, is a matrix timesteps x quantiles...)
+        
+        Refine those predictions by taking into account the neighboring predictions.
+        Ideally, the RNN/LSTM/GRU state would contain all necessary information about state 
+        to make good predictions. In reality, might be asking too much, could be useful 
+        to do a second pass over the outputs to improve predictions.
+        
+        E.g. if encoding shock events: binary 0,1 with 1 on change day:
+        if there is no pre-event shoulder, no information on Monday takes advantage 
+        of known change about to ocur on Tuesday. This postprocessor will help with that.
+        
+        
+        hparams.MLP_POSTPROCESS__QUANTILES = [None],#[.20, .30, .40, .50, ,75, .90]
+        hparams.MLP_POSTPROCESS__KERNEL_SIZE=15,
+        hparams.MLP_POSTPROCESS__KERNEL_OFFSET=7,
+        
+        
+        
+        INPUTS:
+            decoder_targets - [HORIZON x BATCH]  (for now ignoring quantiles just single value per decoder step)
+            decoder_features - decoder side features for all horizon timesteps [BATCH x HISTORY x FEATURES] (but gets transposed later to make easier)
+            time_x - encoder side features and ground truth for all history timesteps [BATCH x HISTORY x FEATURES+1] (but gets transposed later to make easier)
+            summary_z - encoded context vector, simple way using same vector as context for every decoder step [BATCH x RNNSTATEDEPTH]. Will be None if not doing encoder context option
+            
+        
+            kernel_size - int
+            The size, in #timesteps, of the MLP postprocess kernel.
+            For a given timestep of decoder, a window containing that day is used to 
+            hard select which nearby timesteps are allowed to contribute to refinement 
+            for that timestep. I.e. bigger kernel means looking at more nearby RNN outputs.
+    
+            kernel_offset - int
+            The offset, in #timesteps, of the MLP postprocess kernel from the given timestep under consideration.
+            0 offset means kernel has rightmost index exactly on the given timestep,
+            1 offset means the rightmost timestep included in the postprocess window is 
+            the timestep exactly 1 position right of the timestep under consideration, ..., 
+            ...
+            K-1 is when the kernel window is trasnlated as far as possible to the right,
+            i.e. has the given timestep as the LEFTmost position, and K-1 positions to the right of the day in question.
+            In other words, offset tells you how many positions to the right of the given timestep are included in the kernel window.
+        
+        
+        For this mini feedforward net, just use 1 hidden layer with nonlinearity, and for #of nodes there, just make it half of #input nodes to this MLP.
+                
+        ALternative: you could also choose to simplify things a bit on the left side (early in time)
+        where there is overlap with encoder side and even potentially with before the encoder starts (which needs padding).
+        To simplify, could choose to not do refinement on those earliest outputs, and only do refinement when kernel
+        is fully in decoder area. Justification could be that refinement of those early predictions is less needed
+        since they are closer to the forecast creation time, so should suffer less from the recursion problem and hopefully be OK already.
+        """
+        
+        
+        #Get dimensions of things based on inputs, features, options:
+        #N1 is number nuerons in input layer, is   (KERNELSIZE x FEATURES) + KERNELSIZE (+ENCODER_DEPTH if encoder_context)
+        context_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT else 0 #Should just be the encoder RNN depth
+        feature_depth = decoder_features.shape[-1].value
+        batchsize = tf.shape(decoder_features)[0]
+        N1 =  cropped_kernel_size*(feature_depth+1) + context_depth + 1 #+1 is to also use the timestep index as a feature, this is since at beginning/end of iterations, could have overlap with 0-padded region, and those nodes of net would normally have a very different distribution of nonzero values, so signal this.
+        N2 = int(min(max(.5*N1,100),400)) #Use between 100-400 neurons in layer 2, using approximately 
+        print('MLP Post-processing N1, N2:', N1, N2)
+        #0-pad the right side (for now just make offset/kernel size such that don't need to worry about left side before encoder)
+        #The amount of padding needed is based on horizon, kernel size, offset:
+        print('offset',offset)
+        print('batchsize',batchsize)
+        #_ = tf.stack([offset, tf.shape(decoder_features)[0], feature_depth])
+        _ = tf.stack([offset, batchsize, feature_depth+1])
+        right_pad_zeros = tf.fill(_, 0.0)
+        
+        
+
+        
+        #If doing quantile regression, then quantiles list will have scalars instead of a single "None" element:
+        Nquantiles = len(self.hparams.MLP_POSTPROCESS__QUANTILES) if self.hparams.MLP_POSTPROCESS__QUANTILES[0] else 0
+        N_out = 1 + Nquantiles
+        print('Nquantiles',Nquantiles)
+        print('N_out',N_out)
+        
+
+        #Quick check there are correct number timesteps:
+        assert decoder_features.shape[1] == self.inp.horizon_window_size #!!!!!!!quantiles
+
+        #Rearrange tensors to all have time first:
+        # [batch_size, time, input_depth] -> [time, batch_size, input_depth]
+        decoder_targets = tf.expand_dims(decoder_targets,axis=-1)
+        decoder_features = tf.transpose(decoder_features, [1, 0, 2])
+        print('decoder_targets',decoder_targets)
+        print('decoder_features',decoder_features)        
+        
+        decoder_by_time = tf.concat([decoder_targets,decoder_features],axis=2)
+        encoder_by_time = tf.transpose(time_x, [1, 0, 2]) #Has target and features stacked already
+
+        print('decoder_by_time',decoder_by_time)
+        print('encoder_by_time',encoder_by_time)
+        all_features_targets_by_time = tf.concat([encoder_by_time,decoder_by_time,right_pad_zeros],axis=0)
+        print('all_features_targets_by_time',all_features_targets_by_time)
+        
+        
+        #Simple 2 layer feedforward
+        def feedforward(_x):
+            fc1 = tf.layers.dense(_x, N2, activation=selu, name='fc1', kernel_initializer=self.default_init())
+            fc2 = tf.layers.dense(fc1, N_out, name='fc2', kernel_initializer=self.default_init())
+            return fc2
+        
+        
+        
+#        #Simple 2 layer feedforward
+#        def feedforward(kernel_window_cropped):
+##            _x = tf.placeholder(tf.float32, [None, N1])
+#            def network():
+#                with tf.Graph().as_default():
+#                    _x = tf.placeholder(tf.float32, [None, N1])
+#                    with tf.variable_scope("MLP_postprocess", 
+#                                       initializer=layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN', seed=seed)):
+#                        
+##                        tf.layers.Input(shape=(N1,), dtype=tf.float32)
+#                        print('_x',_x)
+#                        fc1 = tf.layers.dense(_x, N2, activation=selu, name='fc1')
+#                        print('fc1',fc1)
+#                        fc2 = tf.layers.dense(fc1, N_out, name='fc2')
+#                        print('fc2',fc2)
+#    #                    out = tf.layers.Network(kernel_window_cropped, fc2)
+#                        return fc2
+#                    
+#            with tf.Graph().as_default():
+#                net = network()
+##                config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
+##                with tf.Session(config=config) as sess:
+#                with tf.Session() as sess:                
+#                    result = sess.run(net,feed_dict={_x: kernel_window_cropped})
+#                    return result
+                
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+    
+        # Stop condition for decoding loop
+        def cond_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
+            return timestep < self.inp.horizon_window_size
+
+        def loop_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
+            print(timestep)
+            #Excerpt time window
+            start = timestep - cropped_kernel_size + 1 + offset + self.inp.history_window_size
+            end = timestep + offset + self.inp.history_window_size + 1
+            cropped = all_features_targets_by_time[start:end,:,:]
+            print('timestep,start,end', timestep,start,end)
+            cropped = tf.transpose(cropped,[1,0,2])
+            cropped = tf.contrib.layers.flatten(cropped)
+            
+            #If using context vector, also append it
+            if self.hparams.RECURSIVE_W_ENCODER_CONTEXT:
+                cropped = tf.concat([cropped,summary_z],axis=1)
+
+            print('cropped',cropped)
+            
+            #Include timestep related features:
+            #Because using fixed dimension MLP with fixed input for input layer neurons,
+            #but window is sliding over boundaries with different meaning (0 padded vs legit values),
+            #use 2 additional features that help control issue of boundary effects.
+            #Use the (normalized) percent through decoder phase, = normalize(timestep/horizon),
+            #and the (normalized) percent overlap of the kernel with the 0padded region
+            f1 = timestep/self.inp.horizon_window_size - .5
+            f2 = tf.maximum(0, offset - (self.inp.horizon_window_size - timestep)) / offset - .5
+            _ = tf.stack([batchsize,1])
+            f1 = tf.cast(tf.fill(_, f1),tf.float32)
+            f2 = tf.cast(tf.fill(_, f2),tf.float32)
+            print('f1',f1)
+            print('f2',f2)
+            print('cropped',cropped)
+            flattened_input = tf.concat([cropped,f1,f2],axis=-1)
+            print('flattened_input',flattened_input)
+
+            #Pass tensor into the simple feedforward network:
+            projected_output = feedforward(flattened_input)
+            #Append this timestep results 
+            array_targets = array_targets.write(timestep, projected_output)
+            
+                
+            return timestep + 1, all_timesteps_input, array_targets #!!!!!! quantiles: projected_output will be diff dims
+
+
+
+        # Initial values for loop
+        loop_init = [tf.constant(0, dtype=tf.int32), #timestep
+                    all_features_targets_by_time, #all_timesteps_input
+                    tf.TensorArray(dtype=tf.float32, size=self.inp.horizon_window_size)] #array_targets
+
+        # Run the loop
+        _timestep, _, targets_ta = tf.while_loop(cond_fn, loop_fn, loop_init)        
+        
+        #Get the post-processed predictions
+        #[time, batch_size, Nptcl]
+        targets = targets_ta.stack()
+        
+        # [time, batch_size, 1] -> [time, batch_size]
+#        targets = tf.squeeze(targets, axis=-1)
+                
+        return targets    
+
+
+
+
 
+#Direct MLP decoder...
\ No newline at end of file
diff --git a/trainer.py b/trainer.py
index 35600fe..c56e300 100755
--- a/trainer.py
+++ b/trainer.py
@@ -416,7 +416,7 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
         tf.set_random_seed(seed)
 
     with tf.device("/cpu:0"):
-        inp = VarFeeder.read_vars("data/vars_TRAIN")
+        inp = VarFeeder.read_vars(f"data/{name}")
         if side_split:
             splitter = Splitter(page_features(inp, features_set), inp.page_map, 3, train_sampling=train_sampling,
                                 test_sampling=eval_sampling, seed=seed)
@@ -507,8 +507,8 @@ def random_draw_history_and_horizon_window_sizes(trainer,sess):
     def create_model(features_set, sampling_period, scope, index, prefix, seed):
 
         #Just dummy filler, not important what value [since in training we will randomly vary these]
-        HISTORY_DUMMY = 333
-        HORIZON_DUMMY = 77
+        HISTORY_DUMMY = 100
+        HORIZON_DUMMY = 20
 
         with tf.variable_scope('input') as inp_scope:
             with tf.device("/cpu:0"):

From e3d6ea24369344da1f6e04fd192868652e9b4f31 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 13 Aug 2018 14:24:23 -0700
Subject: [PATCH 37/42] working MLP postprocessor, helps SMAPE by >4pct

---
 PERFORMANCE_HEATMAPS.py |  57 ++++----
 RUN_ALL_PREDICTIONS.py  | 286 +++++++++++++++++++++++-----------------
 Readme.md               | 139 ++++---------------
 hparams.py              |  21 ++-
 model.py                |  50 +++++--
 5 files changed, 277 insertions(+), 276 deletions(-)

diff --git a/PERFORMANCE_HEATMAPS.py b/PERFORMANCE_HEATMAPS.py
index 6b21f63..d6251a9 100755
--- a/PERFORMANCE_HEATMAPS.py
+++ b/PERFORMANCE_HEATMAPS.py
@@ -21,7 +21,7 @@
 # PARAMETERS
 # =============================================================================
 OUTDIR = 'output'
-
+NAMES = ['TESTset1', 'TESTset2', 'TESTset3', 'TESTset4']
 
 
@@ -36,7 +36,7 @@ def load_dict(path):
     return d
     
     
-def aggregate__overall(data_dict, real_only, id_hold_out_list, bad_ids):
+def aggregate__overall(data_dict, real_only, id_subsets, bad_ids):
     """
     For each (history,horizon) pair, marginalized over all id's and dates
     
@@ -52,8 +52,8 @@ def aggregate__overall(data_dict, real_only, id_hold_out_list, bad_ids):
             if '__' in series_id:
                 continue
         #If have a set of holdout id's:
-        if id_hold_out_list:
-            if series_id not in id_hold_out_list:
+        if id_subsets:
+            if series_id not in id_subsets:
                 continue
         #Regardless of mode, if this is one of the corrupted time series, ignore it:
         if series_id in bad_ids:
@@ -140,27 +140,36 @@ def make_heatmap(metrics_arrays, histories, horizons, outdir, name):
     #for each of the 4 dicts:
     
     #Make list of id's that were held out from training, to assess transfer ability
-    HOLD_OUTS = [str(i) for i in range(500)]
+    HOLD_OUTS = [str(i) for i in range(500)] #Not actually held out, but just get an idea of performance on earlier ids
+    special_ids = [str(i) for i in [531, 1007, 143, 130, 197, 203, 209, 215, 342, 476, 328, 182, 200, 145, 242, 44, 94, 147, 1, 5, 6, 7, 8, 12, 387, 429, 1005, 943]]
+    id_dict = {'allIDs':[],
+         'special_ids':special_ids,
+         'holdout_ids':HOLD_OUTS}
     
     #Some of the ID's are just bad, have multiple month long gaps from corrupted data, etc., so can ignore them
+    #For now just use everything to get conservative estimate of performance
     BAD_IDs = []#['44','46','581','582','583','584']
     
-    path = os.path.join(OUTDIR,'hist_horiz__all.pickle')
-    data = load_dict(path)
-    
-    for real_only in [True,False]:
-        for id_hold_out_list in [HOLD_OUTS,[]]:
-            
-            r = 'real' if real_only else 'realAndsynthetic'
-            h = 'holdoutsOnly' if id_hold_out_list else 'allIDs'
-            name = r+'_'+h
-            print(name)
-            
-            
-            metrics_dict, histories, horizons, metrics_arrays = aggregate__overall(data, real_only, id_hold_out_list, BAD_IDs)
-            make_heatmap(metrics_arrays, histories, horizons, OUTDIR, name)
-            
-            #Save out the metrics dict
-            dict_savename = os.path.join(OUTDIR,f"hist_horiz__{name}__metrics.pickle")
-            with open(dict_savename, "wb") as outp:
-                pickle.dump(metrics_dict, outp)    
\ No newline at end of file
+    
+    
+    #For the 4 chunk backtesting performance assessment
+    for chunkname in NAMES:
+        print('chunkname: ',chunkname)    
+        path = os.path.join(OUTDIR,f'hist_horiz__all_{chunkname}.pickle')
+        data = load_dict(path)
+        
+        for real_only in [True,False]:
+            for k, id_subsets in id_dict.items():
+                
+                r = 'real' if real_only else 'realAndsynthetic'
+                name = chunkname + '_' + r + '_' + k
+                print(name)
+                
+                
+                metrics_dict, histories, horizons, metrics_arrays = aggregate__overall(data, real_only, id_subsets, BAD_IDs)
+                make_heatmap(metrics_arrays, histories, horizons, OUTDIR, name)
+                
+                #Save out the metrics dict
+                dict_savename = os.path.join(OUTDIR,f"hist_horiz__{name}__metrics.pickle")
+                with open(dict_savename, "wb") as outp:
+                    pickle.dump(metrics_dict, outp)    
\ No newline at end of file
diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index 63fc7df..6befdc5 100755
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -19,18 +19,15 @@
 
 
 # =============================================================================
-# 
+# PARAMETRS
 # =============================================================================
 #For histories, we care most about shorter series, so sample lower numbers more densely
-HISTORY_SIZES=[250]#,8,50]#[7,8,10,12,15,20,30,50,70,100,150,250,366]
-HORIZON_SIZES=[100]#[7,10,20,30,40,50,60]
+HISTORY_SIZES=[7,8,10,12,15,20,30,50,100,200,360]
+HORIZON_SIZES=[7,10,14,20,30,60]
 EVAL_STEP_SIZE=4#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
+PREDICT_MODE = 'backtest'#'disjoint'
+NAMES = ['TESTset1', 'TESTset2', 'TESTset3', 'TESTset4']
 
-# =============================================================================
-# PARAMETRS
-# =============================================================================
-TEST_DF_PATH = r"data/ours_daily_TEST{}.csv"
-TEST_dir = r"data/vars_TEST{}"
 FEATURES_SET = 'full'# 'arturius' 'simple' 'full'
 SAMPLING_PERIOD = 'daily'
 DATA_TYPE = 'ours' #'kaggle' #'ours'
@@ -50,6 +47,8 @@
 
 
+
+
 # =============================================================================
 # MAIN
 # =============================================================================
@@ -88,7 +87,7 @@ def mean_bias(true, pred):
 
 
-def do_predictions_one_setting(history,horizon,backoffset,TEST_dir,save_plots,n_series):
+def do_predictions_one_setting(history,horizon,backoffset,TEST_dir,save_plots,n_series,chunk):
     
     # =============================================================================
     # 
@@ -96,7 +95,7 @@ def do_predictions_one_setting(history,horizon,backoffset,TEST_dir,save_plots,n_
     #read_all funcion loads the (hardcoded) file "data/all.pkl", or otherwise train2.csv
     print('loading data...')
 
-    df_all = read_all(DATA_TYPE,SAMPLING_PERIOD,'test')
+    df_all = read_all(DATA_TYPE,SAMPLING_PERIOD,f'TEST{chunk}')
     print('df_all.columns')
     print(df_all.columns)
 #        filename = f'train_2_{data_type}_{sampling_period}'
@@ -109,7 +108,7 @@ def do_predictions_one_setting(history,horizon,backoffset,TEST_dir,save_plots,n_
     # 
     # =============================================================================
     prev = df_all#.loc[:,:'2017-07-08']
-    paths = [p for p in tf.train.get_checkpoint_state(f'data/cpt/{PARAM_SETTING}').all_model_checkpoint_paths]
+    paths = [p for p in tf.train.get_checkpoint_state(f'data/cpt/TRAIN{chunk}').all_model_checkpoint_paths]
     #tf.reset_default_graph()
     #preds = predict(paths, default_hparams(), back_offset=0,
     #                    n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True)
@@ -210,6 +209,33 @@ def get_data_timesteps_Nseries(df_path):
 
 
+def get_data_timesteps_Nseries__backtest(test_path,train_path):
+    """
+    For backtest chunk mode only.
+    Get the number of data timesteps (which potentially varies per testset1,2,3,4),
+    in order to determine backoffset range.
+    
+    Because in this mode the TEST set also includes the TRAIN ste in it, cannot 
+    just use length of TEST set alone to get datatimesteps.
+    """
+    test = pd.read_csv(test_path)
+#    test_day = test.columns[-1]
+    train = pd.read_csv(train_path)
+#    train_day = train.columns[-1]
+    
+    #Assuming consecutive days, just get diff of number columns:
+    data_timesteps = len(test.columns) - len(train.columns)
+    
+    #Depending if did holdout id's, then TEST would have extra id's not in TRAIN
+    #For batchsize, using N_series as number of rows of TEST set.
+    #Since metrics are later made from dicts, if an ID is predicted on more than once,
+    #is ok, since would have same key in dict and only be there once anyway.
+    N_series = len(test)
+    
+    return data_timesteps, N_series
+        
+    
+
 
 
@@ -228,120 +254,132 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
 
 if __name__ == '__main__':
     
-    print('TEST_DF_PATH',TEST_DF_PATH)
-
-    groundtruth = pd.read_csv(TEST_DF_PATH)
-    groundtruth.sort_values(['Page'])    
-    print('groundtruth',groundtruth)
-
-    if not os.path.exists(OUTPUT_DIR):
-        os.makedirs(OUTPUT_DIR)
-    
-    data_timesteps, N_series = get_data_timesteps_Nseries(TEST_DF_PATH)
-    
-    hist_horiz__all = {}
-#    hist_horiz__real_only = {}
-#    hist_horiz__dayofweek = {}
-#    hist_horiz__holidays = {}
-    t0 = time.clock()
-    for history in HISTORY_SIZES:
-        for horizon in HORIZON_SIZES:
-            print('HISTORY ',history, 'of ', HISTORY_SIZES)
-            print('HORIZON ',horizon, 'of ', HORIZON_SIZES)
-            if history+horizon >= data_timesteps:
-                print(f'history+horizon ({history+horizon}) >= data set size ({data_timesteps})')
-                continue
-            
-            #Get the range of values that will step through for 
-            offs = [i for i in range(horizon, data_timesteps - history +1, EVAL_STEP_SIZE)]
-            
-            dflist = []
-            for backoffset in offs:
-                print('backoffset ',backoffset, 'of ', offs)
-                f_preds = do_predictions_one_setting(history,horizon,backoffset,TEST_dir,SAVE_PLOTS,N_series)
-                cols = f_preds.columns
-                dates = [i.strftime('%Y-%m-%d') for i in cols]
-                print(dates)
+    
+    
+    #For the 4 chunk backtesting performance assessment
+    for name in NAMES:
+        
+        print('name: ',name)
+        chunk = name.replace('TEST','')
+        TEST_DF_PATH = f"data/ours_daily_{name}.csv"
+        TEST_dir = f"data/{name}"
+        TRAIN_DF_PATH = TEST_DF_PATH.replace('TEST','TRAIN')
+        print('TEST_DF_PATH',TEST_DF_PATH)
+        print('TEST_dir',TEST_dir)
+        print('TRAIN_DF_PATH',TRAIN_DF_PATH)
+    
+        groundtruth = pd.read_csv(TEST_DF_PATH)
+        groundtruth.sort_values(['Page'])    
+        print('groundtruth',groundtruth)
+    
+        if not os.path.exists(OUTPUT_DIR):
+            os.makedirs(OUTPUT_DIR)
+        
+        if PREDICT_MODE=='disjoint':
+            data_timesteps, N_series = get_data_timesteps_Nseries(TEST_DF_PATH)
+        elif PREDICT_MODE=='backtest':
+            data_timesteps, N_series = get_data_timesteps_Nseries__backtest(TEST_DF_PATH,TRAIN_DF_PATH)
+        
+        hist_horiz__all = {}
+        t0 = time.clock()
+        for history in HISTORY_SIZES:
+            for horizon in HORIZON_SIZES:
+                print('HISTORY ',history, 'of ', HISTORY_SIZES)
+                print('HORIZON ',horizon, 'of ', HORIZON_SIZES)
+                if history+horizon >= data_timesteps:
+                    print(f'history+horizon ({history+horizon}) >= data set size ({data_timesteps})')
+                    continue
                 
-                #For each series
-                for jj in range(len(f_preds)):
-                    series = f_preds.iloc[jj]
-                    _id = series.name
-                    true = groundtruth[groundtruth['Page'].astype(str) ==_id]
-                    
-                    
-                    first_pred_day = dates[0]
-                    d1 = pd.date_range(first_pred_day,first_pred_day)[0] - pd.Timedelta(history,unit='D')
-                    history_dates = pd.date_range(start=d1, end=first_pred_day, freq='D')[:-1]   #!!!!!! asuming daily sampling...
-                    history_dates = [i.strftime('%Y-%m-%d') for i in history_dates]
-                    history_missing_count = np.isnan(true[history_dates].values[0]).sum()
-#                    print('history_missing_count',history_missing_count)                    
-#                    print('true',true)
-                    true = true[dates].values[0]
-                    horizon_missing_count = np.isnan(true).sum()
-#                    print('horizon_missing_count',horizon_missing_count)
+                #Get the range of values that will step through for 
+                offs = [i for i in range(horizon, data_timesteps - history +1, EVAL_STEP_SIZE)]
+                
+                dflist = []
+                for backoffset in offs:
+                    print('backoffset ',backoffset, 'of ', offs)
+                    f_preds = do_predictions_one_setting(history,horizon,backoffset,TEST_dir,SAVE_PLOTS,N_series,chunk)
+                    cols = f_preds.columns
+                    dates = [i.strftime('%Y-%m-%d') for i in cols]
+                    print(dates)
                     
-                    #Get smape, mae, bias over this prediction
-                    smp = mean_smape(true, series.values)
-#                    mae = asdasdasd
-                    bi = mean_bias(true, series.values)
-#                    print(smape,bias)
-                    hist_horiz__all[(history,horizon,backoffset,_id)] = {'SMAPE':smp, 
-                                    'bias':bi,
-                                    #'MAE':mae,
-                                    'predict_start_date':dates[0],
-                                    'predict_end_date':dates[-1],
-                                    'history_missing_count':history_missing_count,
-                                    'horizon_missing_count':horizon_missing_count
-                                    }
-#                    print(hist_horiz__all)
+                    #For each series
+                    for jj in range(len(f_preds)):
+                        series = f_preds.iloc[jj]
+                        _id = series.name
+                        true = groundtruth[groundtruth['Page'].astype(str) ==_id]
+                        
+                        
+                        first_pred_day = dates[0]
+                        d1 = pd.date_range(first_pred_day,first_pred_day)[0] - pd.Timedelta(history,unit='D')
+                        history_dates = pd.date_range(start=d1, end=first_pred_day, freq='D')[:-1]   #!!!!!! asuming daily sampling...
+                        history_dates = [i.strftime('%Y-%m-%d') for i in history_dates]
+                        history_missing_count = np.isnan(true[history_dates].values[0]).sum()
+    #                    print('history_missing_count',history_missing_count)                    
+    #                    print('true',true)
+                        true = true[dates].values[0]
+                        horizon_missing_count = np.isnan(true).sum()
+    #                    print('horizon_missing_count',horizon_missing_count)
+                        
+                        #Get smape, mae, bias over this prediction
+                        smp = mean_smape(true, series.values)
+    #                    mae = asdasdasd
+                        bi = mean_bias(true, series.values)
+    #                    print(smape,bias)
+                        hist_horiz__all[(history,horizon,backoffset,_id)] = {'SMAPE':smp, 
+                                        'bias':bi,
+                                        #'MAE':mae,
+                                        'predict_start_date':dates[0],
+                                        'predict_end_date':dates[-1],
+                                        'history_missing_count':history_missing_count,
+                                        'horizon_missing_count':horizon_missing_count
+                                        }
+    #                    print(hist_horiz__all)
+                        
+                        
+                    #For saving out predictions:
+                    dates = [i.strftime('%m/%d/%Y') for i in cols]
+                    d = {cols[i]:dates[i] for i in range(len(cols))}
+                    f_preds.rename(columns=d,inplace=True)
+                    f_preds['Page'] = f_preds.index.values
+                    #Depending on missing data in the test set in the history window for this backoffset,
+                    #it oculd be that that particular id did not pass the train completeness threshold.
+                    #Then it will not be included, but the batchsize will still be len(df), so to fill that missing
+                    #id, it will repeat id's that already had predictions. THey will be identical, 
+                    #so just take the 1st occurrence for those repeated id's:
+                    df = []
+                    u_ids = np.unique(f_preds['Page'].values)
+                    for u in u_ids:
+                        s = f_preds[f_preds['Page']==u]
+                        if len(s)>1:
+                            s = s.head(1)
+                        df += [s]
+                    f_preds = pd.concat(df,axis=0)
+                    cols = list(f_preds.columns)
+                    cols.remove('Page')
+                    cols = ['Page'] + cols
+                    f_preds = f_preds[cols]                 
+                    print(f_preds)
                     
+                    dflist += [f_preds]
+                    #Care about the metrics within different partitions:
+                    #Beside just history and horizon size, also consider:
+                    #real vs. synthetic augmented series
+                    #training ID vs. new ID only in TEST set
+                    #series contains holiday vs. only non-holidays
+                    #day of week
                     
-                #For saving out predictions:
-                dates = [i.strftime('%m/%d/%Y') for i in cols]
-                d = {cols[i]:dates[i] for i in range(len(cols))}
-                f_preds.rename(columns=d,inplace=True)
-                f_preds['Page'] = f_preds.index.values
-                #Depending on missing data in the test set in the history window for this backoffset,
-                #it oculd be that that particular id did not pass the train completeness threshold.
-                #Then it will not be included, but the batchsize will still be len(df), so to fill that missing
-                #id, it will repeat id's that already had predictions. THey will be identical, 
-                #so just take the 1st occurrence for those repeated id's:
-                df = []
-                u_ids = np.unique(f_preds['Page'].values)
-                for u in u_ids:
-                    s = f_preds[f_preds['Page']==u]
-                    if len(s)>1:
-                        s = s.head(1)
-                    df += [s]
-                f_preds = pd.concat(df,axis=0)
-                cols = list(f_preds.columns)
-                cols.remove('Page')
-                cols = ['Page'] + cols
-                f_preds = f_preds[cols]                 
-                print(f_preds)
-                
-                dflist += [f_preds]
-                #Care about the metrics within different partitions:
-                #Beside just history and horizon size, also consider:
-                #real vs. synthetic augmented series
-                #training ID vs. new ID only in TEST set
-                #series contains holiday vs. only non-holidays
-                #day of week
-                
-            savename = f"{str(history)}_{str(horizon)}.xls"
-            savename = os.path.join(OUTPUT_DIR,savename)
-            sheetnames = [str(i) for i in offs]
-            SaveMultisheetXLS(dflist, sheetnames, savename)
-            #each sheet is for a single backoffset, so each sheet contains all ~1800 id's
-
-    
-    print(hist_horiz__all)
-    t1 = time.clock()
-    print('elapsed time: ',t1-t0)
-    #Now that all metrics stored in dict, save dict, and analyze further
-    #pickle ... hist_horiz__all
-#    print(hist_horiz__all)
-    dict_savename = os.path.join(OUTPUT_DIR,"hist_horiz__all.pickle")
-    with open(dict_savename, "wb") as outp:
-        pickle.dump(hist_horiz__all, outp)#, protocol=2)
\ No newline at end of file
+                savename = f"{str(history)}_{str(horizon)}_{name}.xls"
+                savename = os.path.join(OUTPUT_DIR,savename)
+                sheetnames = [str(i) for i in offs]
+                SaveMultisheetXLS(dflist, sheetnames, savename)
+                #each sheet is for a single backoffset, so each sheet contains all ~1800 id's
+    
+        
+        print(hist_horiz__all)
+        t1 = time.clock()
+        print('elapsed time: ',t1-t0)
+        #Now that all metrics stored in dict, save dict, and analyze further
+        #pickle ... hist_horiz__all
+    #    print(hist_horiz__all)
+        dict_savename = os.path.join(OUTPUT_DIR,f"hist_horiz__all_{name}.pickle")
+        with open(dict_savename, "wb") as outp:
+            pickle.dump(hist_horiz__all, outp)#, protocol=2)
\ No newline at end of file
diff --git a/Readme.md b/Readme.md
index 8a51b93..3acc2ac 100755
--- a/Readme.md
+++ b/Readme.md
@@ -1,64 +1,44 @@
-# Kaggle Web Traffic Time Series Forecasting
-1st place solution
-
-![predictions](images/predictions.png)
+# RNN-based Encoder-Decoder for Time Series Forecasting w/ Quantiles 
 
-Main files:
- * `make_features.py` - builds features from source data
- * `input_pipe.py` - TF data preprocessing pipeline (assembles features
-  into training/evaluation tensors, performs some sampling and normalisation)
- * `model.py` - the model
- * `trainer.py` - trains the model(s)
- * `hparams.py` - hyperpatameter sets.
- * `submission-final.ipynb` - generates predictions for submission
-
-How to reproduce competition results:
-1. Download input files from https://www.kaggle.com/c/web-traffic-time-series-forecasting/data :
-`key_2.csv.zip`, `train_2.csv.zip`, put them into `data` directory.
-2. Run `python make_features.py data/vars --add_days=63`. It will
-extract data and features from the input files and put them into
-`data/vars` as Tensorflow checkpoint.
-3. Run trainer:
-`python trainer.py --name s32 --hparam_set=s32 --n_models=3 --name s32 --no_eval --no_forward_split
- --asgd_decay=0.99 --max_steps=11500 --save_from_step=10500`. This command
- will simultaneously train 3 models on different seeds (on a single TF graph)
- and save 10 checkpoints from step 10500 to step 11500 to `data/cpt`.
- __Note:__ training requires GPU, because of cuDNN usage. CPU training will not work.
- If you have 3 or more GPUs, add `--multi_gpu` flag to speed up the training. One can also try different
-hyperparameter sets (described in `hparams.py`): `--hparam_set=definc`,
-`--hparam_set=inst81`, etc.
-Don't be afraid of displayed NaN losses during training. This is normal,
-because we do the training in a blind mode, without any evaluation of model performance.
-4. Run `submission-final.ipynb` in a standard jupyter notebook environment,
-execute all cells. Prediction will take some time, because it have to
-load and evaluate 30 different model weights. At the end,
-you'll get `submission.csv.gz` file in `data` directory.
 
+Based on Arturus'
+Kaggle Web Traffic Time Series Forecasting
+1st place solution
+https://github.com/Arturus/kaggle-web-traffic
+![predictions](images/predictions.png)
 See also [detailed model description](how_it_works.md)
 
-
-
-
 -----------------------------------
 
 GK modifications for own forecasting application:
 
-1) Several architecture improvements:
+1) Architecture improvements:
+	- Recursive feedforward postprocessor: after getting sequence of predictions from RNN-based decoder, refine predictions in 2 layer MLP using nearby timesteps predictions + features + context.
 	- give encoded representation vector as context to every decoder timestep
-	- K step lookback: ideally the RNN would learn a hidden state representation that ~completely describes state of the system. In realiy, that is too much to expect. In addition to previous timestep prediction y_i-1, also feed in y_i-2,...,y_i-K for K-step lookback. [~same as using lagged features]
-	- performance analysis of validation set SMAPE as function of history/horizon window sizes [randomized uniformly in training over all min-max range of history/horizon window sizes]
-	- more in development
+	- K step lookback: ideally the RNN would learn a hidden state representation that ~completely describes state of the system. In realiy, that may be too much to expect. In addition to previous timestep prediction y_i-1, also feed in y_i-2,...,y_i-K for K-step lookback. [~same as using lagged features]
+2) Performance Analysis:
+	- performance analysis of test set SMAPE as function of history/horizon window sizes [randomized uniformly in training over all min-max range of history/horizon window sizes]
+	- 
 2) More features, relevant to my data. More focus on seasonalities, and "spiral encoding" for holidays. Automated data augmentation.
 3) Dealing with holes/sparsity as in my data.
 
 
-1. PREPROCESS.py - Maximize reuse of existing architecture: just put my data in exact same format as Kaggle competition csv's
-2. $source activate gktf.  #previously set up a conda environment w/ Python 3.6, tensorflow 1.4.0, to match same versions as Kaggle solution
-3. $cd ..../kaggle-web-traffic
-4. $python3 PREPROCESS.py
-5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period
-python3 make_features.py data/vars kaggle daily full --add_days=63
 
+The complete pipeline is:
+
+1. $source activate gktf.               #previously set up a conda environment w/ Python 3.6, tensorflow 1.4.0, to match same versions as Kaggle solution
+2. $cd ..../kaggle-web-traffic
+4. $python3 PREPROCESS.py               #Maximize reuse of existing architecture: just put my data in exact same format as Kaggle competition csv's
+5. $./MAKEFEATURES_TRAIN_ALL.sh         #For backtestign in chunks method [4 partially overlapping train-test set pairs]
+6. $python3 RUN_ALL_PREDICTIONS.py      #Run predictions for every ID over triplets of (history, horizon, start point)
+7. $python3 PERFORMANCE_HEATMAPS.py     #Analyze the prediction metrics across different dimensions 
+
+
+
+
+
+
+---------------------------------------
 #Just in case making new features
 cd data
 rm -R vars*
@@ -72,76 +52,15 @@ ll data/
 python3 make_features.py data/TRAINset1 ours daily full --add_days=0
 python3 make_features.py data/TESTset1 ours daily full --add_days=0
 
-python3 trainer.py full daily --name=train1 --hparam_set=encdec --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=3 --patience=5 --max_epoch=5 --save_epochs_performance
-
-
-
-
-
-
-
-
-#python3 make_features.py train ours daily full --add_days=0
-#python3 make_features.py test ours daily full --add_days=0
-
-#python3 make_features.py data/vars kaggle daily full --add_days=63
-
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=3 --patience=5 --max_epoch=50 --save_epochs_performance
-
-python3 RUN_ALL_PREDICTIONS.py
-
-
-
-
-
-
-----------------------------------------------------------------------------------------------------------------------------------------------------------
-
-#no reason to expect 10000 to 11500 is good range to save out. View loss along the way
-python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=50
-python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=50
---name TEST_stacked --hparam_set=TEST_stacked
-
---no_eval
---side_split
---max_epoch=1000
---save_from_step=1
---verbose
-
-
-python3 trainer.py full daily --name wEncDec --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=63 --history_window_size=100 --max_epoch=10
-
-python3 trainer.py full daily --name noEncDec --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=63 --history_window_size=100 --max_epoch=10
-
-
-python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=1000 --patience=50 --verbose --side_split
-
-
-
-
-7. $python3 PREDICT.py
-
-- confirmed it runs with 2 layers stacked GRU (for both encoder and decoder modules), or with attention mechanism. Performance is worse in both cases [SMAPE], at least initially.
-
-- tried bidirectional encoder but has input dimension issues, think about that more later.
-
-
-
-#For doing performance analysis of SMAPE as function of history/horizon window sizes:
-./RUN_MANY_TRAIN_VAL_WINDOWS
+#For backtesting in 4 chunks, no longer do this. Run the script MAKEFEATURES_TRAIN_ALL.py to automate feature making and training all 4 chunks.
+python3 trainer.py full daily --name=TRAINset1 --hparam_set=encdec --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=3 --patience=5 --max_epoch=50 --save_epochs_performance
 
 
 ----------------------------------------------------------------------------------------------------------------------------------------------------------
 To do:
-0. SMAPEs on ground truth 2018
-1. why encoder_state NANs in it for small train window lengths [is it train/predict window completeness thresholds?]
-1. performance heatmaps
-
 2. for weekly. monthly inputs, need to change few places in tensorflow code
 3. Prediction intervals
 4. Architecture improvements: bi enc, dilated; randomly dilated; randomly dilated with bounds per layer
-4. K step recursive as hybrid of 1step recursive and K step direct
 4. MLP direct multihorizon
 5. custom attention [e.g. position specific]
 6. VAE aug
\ No newline at end of file
diff --git a/hparams.py b/hparams.py
index dbbba9a..8b0ec8d 100755
--- a/hparams.py
+++ b/hparams.py
@@ -83,19 +83,26 @@
     
     # MLP POSTPROCESSOR (ADJUST PREDICTIONS IN LOCAL WINDOWS, AND CAN DO QUANTILES)
     #True or False to use MLP module postprocessor to locally adjust estimates
-    DO_MLP_POSTPROCESS=False,#True,#False
+    DO_MLP_POSTPROCESS=True,#True,#False
     MLP_POSTPROCESS__KERNEL_SIZE=15,
     MLP_POSTPROCESS__KERNEL_OFFSET=7,
-    #If doing quantile regression in addition to point estimates trained to minimize SMAPE.
-    #Also, since SMAPE point estimates are biased positive, can use alternative
-    #point estimator trainde by pinball loss on quantiles < 50 [e.g. 45,38, etc., see what has bias ~0]
-    #To not use quantile regression, just leave list empty
-    MLP_POSTPROCESS__QUANTILES = [.45,.47,.48],#[None],#[.20, .30, .40, .50, ,75, .90] #ValueError: Multi-valued hyperparameters cannot be empty: QUANTILES   ->  so make it a list with "None" in it    
+    
+  
     
     
     # DIRECT MLP DECODER (REPLACE RNN CELLS IN DECODER WITH MLP MODULES, AND DO QUANTILES)
     #Do a direct, quantile forecast by using an MLP as decoder module instead of RNN/LSTM/GRU cells:
-    MLP_DIRECT_DECODER=False
+    MLP_DIRECT_DECODER=False,
+    
+    
+    # QUANTILE REGRESSION
+    # For whatever kind of decoder, whether or not to use quantiles
+    DO_QUANTILES=True,
+    #If doing quantile regression in addition to point estimates trained to minimize SMAPE.
+    #Also, since SMAPE point estimates are biased positive, can use alternative
+    #point estimator trainde by pinball loss on quantiles < 50 [e.g. 45,38, etc., see what has bias ~0]
+    #To not use quantile regression, just leave list empty
+    QUANTILES = [.45,.47,.48],#[None],#[.20, .30, .40, .50, ,75, .90] #ValueError: Multi-valued hyperparameters cannot be empty: QUANTILES   ->  so make it a list with "None" in it      
 )
 
 
diff --git a/model.py b/model.py
index 34cf996..cb80dea 100644
--- a/model.py
+++ b/model.py
@@ -289,7 +289,7 @@ def decode_predictions(decoder_readout, inp: InputPipe):#!!!!!quantiles
     return ret
 
 
-def calc_loss(predictions, true_y, additional_mask=None):#!!!!!quantiles
+def calc_loss(predictions, true_y, additional_mask=None, DO_QUANTILES=False):
     """
     Calculates losses, ignoring NaN true values (assigning zero loss to them)
     :param predictions: Predicted values
@@ -307,6 +307,13 @@ def calc_loss(predictions, true_y, additional_mask=None):#!!!!!quantiles
         weights = weights * tf.expand_dims(additional_mask, axis=0)
 
     mae_loss = tf.losses.absolute_difference(labels=true_y, predictions=predictions, weights=weights)
+    
+    if DO_QUANTILES:
+        quantile_losses = []
+        #...
+    else:
+        quantile_losses = None
+        
     return mae_loss, smape_loss(true_y, predictions, weights), calc_smape_rounded(true_y, predictions,
                                                                                   weights), tf.size(true_y)
 
@@ -514,7 +521,7 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
                                                         summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
                                                         inp.time_y, inp.norm_x[:, -self.lookback_K_actual:]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
         
-#        decoder_targets = tf.Print(decoder_targets,['decoder_targets BEFORE',tf.shape(decoder_targets),decoder_targets,'decoder_outputs',tf.shape(decoder_outputs),decoder_outputs])
+        decoder_targets = tf.Print(decoder_targets,['decoder_targets BEFORE',tf.shape(decoder_targets),decoder_targets,'decoder_outputs',tf.shape(decoder_outputs),decoder_outputs])
         
         
@@ -531,7 +538,7 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
                                                    inp.time_x, 
                                                    summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None, 
                                                    self.actual_kernel_size, self.offset, None)
-#        decoder_targets = tf.Print(decoder_targets,['decoder_targets AFTER postprocess',decoder_targets,'decoder_outputs',decoder_outputs])        
+        decoder_targets = tf.Print(decoder_targets,['decoder_targets AFTER postprocess',decoder_targets,'decoder_outputs',decoder_outputs])        
 #        decoder_targets = tf.Print(decoder_targets,['encoder_state',encoder_state,'inp.time_y',inp.time_y,'inp.norm_x',inp.norm_x])
 #        decoder_targets = tf.Print(decoder_targets,['decoder_targets',decoder_targets,'decoder_outputs',decoder_outputs])
         
@@ -548,8 +555,16 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
 #        vv = tf.Print(vv, ['decode_predictions',vv,tf.shape(vv)])
 #        self.predictions = vv
 #        print('self.predictions (still log1p(counts))')
-#        print(self.predictions)
+        print(self.predictions)
+        
+        
+        #!!!!!! for now just see if helps. ignore last nodes, only use 0th.
+        self.predictions = self.predictions[0] #Now is batch x time
         
+        
+        
+#        if self.hparams.DO_QUANTILES:
+#            pass
 
 
         # Calculate losses and build training op
@@ -570,9 +585,15 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
             #from calc_loss:
             #mae_loss, smape_loss(true_y, predictions, weights), calc_smape_rounded(true_y, predictions, weights), tf.size(true_y)
             
+#            if hparams.DO_QUANTILES:
+#                quantile_loss = aaaaaa
+#                self.quantile_losses = {q:aaaaaa for q in self.QUANTILES}
+            
             if is_train:
                 # Sum all losses
                 total_loss = smape_loss + enc_stab_loss + dec_stab_loss + enc_activation_loss + dec_activation_loss  #!!!!!!!! put in pinball loss instead of SMAPE when doing quantiles
+#                if hparams.DO_QUANTILES:
+#                    total_loss += quantile_loss
                 self.train_op, self.glob_norm, self.ema = make_train_op(total_loss, asgd_decay, prefix=graph_prefix)
 
 
@@ -784,7 +805,7 @@ def MLP_postprocess(self, decoder_targets, decoder_features, time_x, summary_z,
         of known change about to ocur on Tuesday. This postprocessor will help with that.
         
         
-        hparams.MLP_POSTPROCESS__QUANTILES = [None],#[.20, .30, .40, .50, ,75, .90]
+        hparams.QUANTILES = [None],#[.20, .30, .40, .50, ,75, .90]
         hparams.MLP_POSTPROCESS__KERNEL_SIZE=15,
         hparams.MLP_POSTPROCESS__KERNEL_OFFSET=7,
         
@@ -829,7 +850,7 @@ def MLP_postprocess(self, decoder_targets, decoder_features, time_x, summary_z,
         context_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT else 0 #Should just be the encoder RNN depth
         feature_depth = decoder_features.shape[-1].value
         batchsize = tf.shape(decoder_features)[0]
-        N1 =  cropped_kernel_size*(feature_depth+1) + context_depth + 1 #+1 is to also use the timestep index as a feature, this is since at beginning/end of iterations, could have overlap with 0-padded region, and those nodes of net would normally have a very different distribution of nonzero values, so signal this.
+        N1 =  cropped_kernel_size*(feature_depth+1) + context_depth + 2 #+2 is to also use two timestep index features, this is since at beginning/end of iterations, could have overlap with 0-padded region, and those nodes of net would normally have a very different distribution of nonzero values, so signal this.
         N2 = int(min(max(.5*N1,100),400)) #Use between 100-400 neurons in layer 2, using approximately 
         print('MLP Post-processing N1, N2:', N1, N2)
         #0-pad the right side (for now just make offset/kernel size such that don't need to worry about left side before encoder)
@@ -843,9 +864,9 @@ def MLP_postprocess(self, decoder_targets, decoder_features, time_x, summary_z,
         
 
-        #If doing quantile regression, then quantiles list will have scalars instead of a single "None" element:
-        Nquantiles = len(self.hparams.MLP_POSTPROCESS__QUANTILES) if self.hparams.MLP_POSTPROCESS__QUANTILES[0] else 0
-        N_out = 1 + Nquantiles
+        #If doing quantile regression
+        Nquantiles = len(self.hparams.QUANTILES) if self.hparams.DO_QUANTILES else 0
+        N_out = 1 + Nquantiles #+1 because also do a point estimate optimized by SMAPE. (Even if some of the wuantiles e.g. .48 are really for point estimates, still keep the direct SMAPE optimized point estimate as well.)
         print('Nquantiles',Nquantiles)
         print('N_out',N_out)
         
@@ -952,6 +973,9 @@ def loop_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
             print('f2',f2)
             print('cropped',cropped)
             flattened_input = tf.concat([cropped,f1,f2],axis=-1)
+            #Before even running this, we know the shape (other than batchsize which may be dynamic)
+            #So, since dense layer in feed_forward needs last dimension specified, do this:
+            flattened_input.set_shape((None,N1))
             print('flattened_input',flattened_input)
 
             #Pass tensor into the simple feedforward network:
@@ -963,10 +987,14 @@ def loop_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
             return timestep + 1, all_timesteps_input, array_targets #!!!!!! quantiles: projected_output will be diff dims
 
 
-
+        #Since we know the dimensions beforehand, jsut use an init with defined dimension so can initialize the feedforward net
+#        _ = tf.stack([batchsize,N1])
+#        init_features_zeros = tf.cast(tf.fill(_, 0.),tf.float32)
+#        print('init_features_zeros',init_features_zeros)
+        
         # Initial values for loop
         loop_init = [tf.constant(0, dtype=tf.int32), #timestep
-                    all_features_targets_by_time, #all_timesteps_input
+                    all_features_targets_by_time, #init_features_zeros,#all_features_targets_by_time, #all_timesteps_input
                     tf.TensorArray(dtype=tf.float32, size=self.inp.horizon_window_size)] #array_targets
 
         # Run the loop

From 6cb6854c69a5de40086f2af4f2935ae7f73bc2bf Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Mon, 13 Aug 2018 14:55:19 -0700
Subject: [PATCH 38/42] fixed heatmap issue for backtest mode for
 history+horizon size

---
 RUN_ALL_PREDICTIONS.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index 6befdc5..d53baa3 100755
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -286,9 +286,20 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
             for horizon in HORIZON_SIZES:
                 print('HISTORY ',history, 'of ', HISTORY_SIZES)
                 print('HORIZON ',horizon, 'of ', HORIZON_SIZES)
-                if history+horizon >= data_timesteps:
+                
+                #For the disjoint mode, the test set does not overlap art all w the train set. 
+                #The history + horizon window must completely fit in the test set alone.
+                #vs.
+                #in backtest chunk mode, test set include full train set, but 
+                #horizon window always starts after the train set (so horizon 
+                #is fully inside test set). SO for backtest chunk mode, irrelevant
+                #what history + horizon is, only matters that the horizon is fully inside TEST set.
+                if (PREDICT_MODE=='disjoint') and (history+horizon >= data_timesteps):
                     print(f'history+horizon ({history+horizon}) >= data set size ({data_timesteps})')
                     continue
+                if (PREDICT_MODE=='backtest') and (horizon > data_timesteps):
+                    print(f'horizon ({horizon}) > test region size ({data_timesteps})')
+                    continue                
                 
                 #Get the range of values that will step through for 
                 offs = [i for i in range(horizon, data_timesteps - history +1, EVAL_STEP_SIZE)]

From 352bdb7f69e0960e902392e1825206f3889227e0 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Tue, 14 Aug 2018 18:12:56 -0700
Subject: [PATCH 39/42] SMAPE + K*quantile_loss

---
 MAKEFEATURES_TRAIN_ALL.sh |  2 +-
 PERFORMANCE_HEATMAPS.py   | 59 ++++++++++++++++++++++++++++++---
 RUN_ALL_PREDICTIONS.py    | 16 +++++----
 hparams.py                |  6 ++++
 model.py                  | 69 +++++++++++++++++++++++++++------------
 trainer.py                | 48 +++++++++++++++------------
 6 files changed, 147 insertions(+), 53 deletions(-)

diff --git a/MAKEFEATURES_TRAIN_ALL.sh b/MAKEFEATURES_TRAIN_ALL.sh
index 3b2b37d..fc5266c 100644
--- a/MAKEFEATURES_TRAIN_ALL.sh
+++ b/MAKEFEATURES_TRAIN_ALL.sh
@@ -17,7 +17,7 @@ train_names="TRAINset1 TRAINset2 TRAINset3 TRAINset4"
 #In training, max number of epochs to do. By 25-50 things have usually plateaud
 MAX_EPOCH=50
 
-
+train_names="TRAINset4"
 
 
 if $MAKE_FEATURESETS; then
diff --git a/PERFORMANCE_HEATMAPS.py b/PERFORMANCE_HEATMAPS.py
index d6251a9..2e81e78 100755
--- a/PERFORMANCE_HEATMAPS.py
+++ b/PERFORMANCE_HEATMAPS.py
@@ -20,7 +20,7 @@
 # =============================================================================
 # PARAMETERS
 # =============================================================================
-OUTDIR = 'output'
+OUTDIR = 'output/redo_full'
 NAMES = ['TESTset1', 'TESTset2', 'TESTset3', 'TESTset4']
 
 
@@ -120,8 +120,13 @@ def make_heatmap(metrics_arrays, histories, horizons, outdir, name):
         plt.ylabel('History',fontsize=15)
         plt.xticks(np.arange(len(horizons)),horizons,fontsize=15)
         plt.yticks(np.arange(len(histories)),histories,fontsize=15)
+        
+        for x, hor in enumerate(np.arange(len(horizons))):
+            for y, hist in enumerate(np.arange(len(histories))):
+                s = np.round(v[y,x],1)
+                plt.text(x, y, s)
     #    plt.grid()
-        savepath = os.path.join(outdir,f'history_horizon_heatmap__{savename}.png')
+        savepath = os.path.join(outdir,f'{savename}.png')
         plt.savefig(savepath)
     
     
@@ -152,10 +157,46 @@ def make_heatmap(metrics_arrays, histories, horizons, outdir, name):
     
     
+    
+    # =============================================================================
+    # Aggregated over all 4 test sets                
+    # =============================================================================
+    all_data = {}
+    for chunkname in NAMES:
+        print('chunkname: ',chunkname)    
+        path = os.path.join(OUTDIR,f'hist_horiz__{chunkname}.pickle')
+        data = load_dict(path)    
+        new_data = {k+(chunkname,): v for k,v in data.items()}
+        all_data.update(new_data)
+    
+    for real_only in [True,False]:
+        for k, id_subsets in id_dict.items():
+            
+            r = 'real' if real_only else 'realAndsynthetic'
+            name = '4Ave' + '_' + r + '_' + k
+            print(name)
+            
+            
+            metrics_dict, histories, horizons, metrics_arrays = aggregate__overall(all_data, real_only, id_subsets, BAD_IDs)
+            make_heatmap(metrics_arrays, histories, horizons, OUTDIR, name)
+            
+            #Save out the metrics dict
+            dict_savename = os.path.join(OUTDIR,f"hist_horiz__{name}__allchunks__metrics.pickle")
+            with open(dict_savename, "wb") as outp:
+                pickle.dump(metrics_dict, outp)    
+    
+    
+    
+    f=fffffffffff
+    
+    
+    # =============================================================================
+    # Individual test sets
+    # =============================================================================
     #For the 4 chunk backtesting performance assessment
     for chunkname in NAMES:
         print('chunkname: ',chunkname)    
-        path = os.path.join(OUTDIR,f'hist_horiz__all_{chunkname}.pickle')
+        path = os.path.join(OUTDIR,f'hist_horiz__{chunkname}.pickle')
         data = load_dict(path)
         
         for real_only in [True,False]:
@@ -172,4 +213,14 @@ def make_heatmap(metrics_arrays, histories, horizons, outdir, name):
                 #Save out the metrics dict
                 dict_savename = os.path.join(OUTDIR,f"hist_horiz__{name}__metrics.pickle")
                 with open(dict_savename, "wb") as outp:
-                    pickle.dump(metrics_dict, outp)    
\ No newline at end of file
+                    pickle.dump(metrics_dict, outp)    
+                    
+                    
+                    
+                     
+                    
+                    
+   
+                    
+                    
+                    
\ No newline at end of file
diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index d53baa3..d3c409e 100755
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -8,7 +8,7 @@
 import pandas as pd
 import numpy as np
 from trainer import predict
-from hparams import build_hparams
+#from hparams import build_hparams
 import hparams
 
 from make_features import read_all
@@ -23,7 +23,7 @@
 # =============================================================================
 #For histories, we care most about shorter series, so sample lower numbers more densely
 HISTORY_SIZES=[7,8,10,12,15,20,30,50,100,200,360]
-HORIZON_SIZES=[7,10,14,20,30,60]
+HORIZON_SIZES=[7,8,10,12,15,20,30,60]
 EVAL_STEP_SIZE=4#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
 PREDICT_MODE = 'backtest'#'disjoint'
 NAMES = ['TESTset1', 'TESTset2', 'TESTset3', 'TESTset4']
@@ -302,8 +302,12 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                     continue                
                 
                 #Get the range of values that will step through for 
-                offs = [i for i in range(horizon, data_timesteps - history +1, EVAL_STEP_SIZE)]
-                
+                if (PREDICT_MODE=='disjoint'):
+                    offs = [i for i in range(horizon, data_timesteps - history +1, EVAL_STEP_SIZE)]
+                if (PREDICT_MODE=='backtest'):
+                    offs = [i for i in range(horizon, data_timesteps+1, EVAL_STEP_SIZE)]
+
+
                 dflist = []
                 for backoffset in offs:
                     print('backoffset ',backoffset, 'of ', offs)
@@ -381,7 +385,7 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                 savename = f"{str(history)}_{str(horizon)}_{name}.xls"
                 savename = os.path.join(OUTPUT_DIR,savename)
                 sheetnames = [str(i) for i in offs]
-                SaveMultisheetXLS(dflist, sheetnames, savename)
+#                SaveMultisheetXLS(dflist, sheetnames, savename)
                 #each sheet is for a single backoffset, so each sheet contains all ~1800 id's
     
         
@@ -391,6 +395,6 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
         #Now that all metrics stored in dict, save dict, and analyze further
         #pickle ... hist_horiz__all
     #    print(hist_horiz__all)
-        dict_savename = os.path.join(OUTPUT_DIR,f"hist_horiz__all_{name}.pickle")
+        dict_savename = os.path.join(OUTPUT_DIR,f"hist_horiz__{name}.pickle")
         with open(dict_savename, "wb") as outp:
             pickle.dump(hist_horiz__all, outp)#, protocol=2)
\ No newline at end of file
diff --git a/hparams.py b/hparams.py
index 8b0ec8d..92d89e9 100755
--- a/hparams.py
+++ b/hparams.py
@@ -93,6 +93,11 @@
     # DIRECT MLP DECODER (REPLACE RNN CELLS IN DECODER WITH MLP MODULES, AND DO QUANTILES)
     #Do a direct, quantile forecast by using an MLP as decoder module instead of RNN/LSTM/GRU cells:
     MLP_DIRECT_DECODER=False,
+    LOCAL_CONTEXT_SIZE=64,
+    GLOBAL_CONTEXT_SIZE=256,
+    
+    
+    
     
     
     # QUANTILE REGRESSION
@@ -103,6 +108,7 @@
     #point estimator trainde by pinball loss on quantiles < 50 [e.g. 45,38, etc., see what has bias ~0]
     #To not use quantile regression, just leave list empty
     QUANTILES = [.45,.47,.48],#[None],#[.20, .30, .40, .50, ,75, .90] #ValueError: Multi-valued hyperparameters cannot be empty: QUANTILES   ->  so make it a list with "None" in it      
+    LAMBDA=.01 #Scale factor for relative weight of quantile loss for the point estimate SMAPE loss
 )
 
 
diff --git a/model.py b/model.py
index cb80dea..09c3d1e 100644
--- a/model.py
+++ b/model.py
@@ -289,7 +289,19 @@ def decode_predictions(decoder_readout, inp: InputPipe):#!!!!!quantiles
     return ret
 
 
-def calc_loss(predictions, true_y, additional_mask=None, DO_QUANTILES=False):
+def quantile_loss(true, predicted, weights, quantile):
+    """
+    When doing quantile regression, get the pinball loss on each quantile
+    """
+    n_valid = tf.reduce_sum(weights)
+    true_o = tf.round(tf.expm1(true))
+    pred_o = tf.maximum(tf.round(tf.expm1(predicted)), 0.0)
+    diff = tf.subtract(true, predicted)
+    pinball = tf.reduce_mean(tf.maximum(quantile*diff, (quantile-1.)*diff))
+    return tf.reduce_sum(pinball * weights) / n_valid
+
+
+def calc_loss(predictions, true_y, additional_mask=None, DO_QUANTILES=False, QUANTILES=[]):
     """
     Calculates losses, ignoring NaN true values (assigning zero loss to them)
     :param predictions: Predicted values
@@ -310,12 +322,16 @@ def calc_loss(predictions, true_y, additional_mask=None, DO_QUANTILES=False):
     
     if DO_QUANTILES:
         quantile_losses = []
-        #...
+        for nn, q in enumerate(QUANTILES):
+            quantile_losses.extend([quantile_loss(true_y, predictions[nn+1], weights, q)])
+        #For the total quantile loss, use the mean loss over all quantiles
+        ave_quantile_loss = tf.reduce_mean(quantile_losses)
     else:
-        quantile_losses = None
+        ave_quantile_loss = 0.
+        quantile_losses = []
         
     return mae_loss, smape_loss(true_y, predictions, weights), calc_smape_rounded(true_y, predictions,
-                                                                                  weights), tf.size(true_y)
+                                                                                  weights), tf.size(true_y), ave_quantile_loss, quantile_losses
 
 
 def make_train_op(loss, ema_decay=None, prefix=None):#!!!!!quantiles
@@ -452,10 +468,16 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         self.hparams = hparams
         self.seed = seed
         self.inp = inp
+        
         self.DO_MLP_POSTPROCESS = self.hparams.DO_MLP_POSTPROCESS
         self.lookback_K_actual = min(hparams.LOOKBACK_K, hparams.history_window_size_minmax[0])
         print('self.lookback_K_actual',self.lookback_K_actual)
-
+        self.MLP_DIRECT_DECODER = self.hparams.MLP_DIRECT_DECODER
+        self.DO_QUANTILES = self.hparams.DO_QUANTILES
+        self.LAMBDA = self.hparams.LAMBDA
+        assert not (self.MLP_DIRECT_DECODER and self.DO_MLP_POSTPROCESS), 'Cannot do both DO_MLP_POSTPROCESS and MLP_DIRECT_DECODER.\n Choose exactly 1, or neither.'
+ 
+        
 
 
 #        with tf.Graph().as_default():
@@ -521,7 +543,7 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
                                                         summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
                                                         inp.time_y, inp.norm_x[:, -self.lookback_K_actual:]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
         
-        decoder_targets = tf.Print(decoder_targets,['decoder_targets BEFORE',tf.shape(decoder_targets),decoder_targets,'decoder_outputs',tf.shape(decoder_outputs),decoder_outputs])
+#        decoder_targets = tf.Print(decoder_targets,['decoder_targets BEFORE',tf.shape(decoder_targets),decoder_targets,'decoder_outputs',tf.shape(decoder_outputs),decoder_outputs])
         
         
@@ -538,10 +560,17 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
                                                    inp.time_x, 
                                                    summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None, 
                                                    self.actual_kernel_size, self.offset, None)
-        decoder_targets = tf.Print(decoder_targets,['decoder_targets AFTER postprocess',decoder_targets,'decoder_outputs',decoder_outputs])        
+#        decoder_targets = tf.Print(decoder_targets,['decoder_targets AFTER postprocess',decoder_targets,'decoder_outputs',decoder_outputs])        
 #        decoder_targets = tf.Print(decoder_targets,['encoder_state',encoder_state,'inp.time_y',inp.time_y,'inp.norm_x',inp.norm_x])
 #        decoder_targets = tf.Print(decoder_targets,['decoder_targets',decoder_targets,'decoder_outputs',decoder_outputs])
         
+        #Vs. if doing the direct MLP decoder (while loop of MLP's instead of while loop of RNN cells):
+        elif self.MLP_DIRECT_DECODER:
+            #LOCAL_CONTEXT_SIZE=64,
+            #GLOBAL_CONTEXT_SIZE=256,
+            pass
+        
+        
         
         # Decoder activation losses
         dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.horizon_window_size)
@@ -561,11 +590,6 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         #!!!!!! for now just see if helps. ignore last nodes, only use 0th.
         self.predictions = self.predictions[0] #Now is batch x time
         
-        
-        
-#        if self.hparams.DO_QUANTILES:
-#            pass
-
 
         # Calculate losses and build training op
         if inp.mode == ModelMode.PREDICT:
@@ -580,20 +604,20 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
                     ema_vars = variables
                 self.ema.apply(ema_vars)
         else:
-            self.mae, smape_loss, self.smape, self.loss_item_count = calc_loss(self.predictions, inp.true_y,
-                                                                               additional_mask=loss_mask)
+            self.mae, smape_loss, self.smape, self.loss_item_count, self.ave_quantile_loss, self.quantile_losses = calc_loss(self.predictions, inp.true_y,
+                                                                               additional_mask=loss_mask,
+                                                                               DO_QUANTILES=self.hparams.DO_QUANTILES,
+                                                                               QUANTILES=self.hparams.QUANTILES)
             #from calc_loss:
             #mae_loss, smape_loss(true_y, predictions, weights), calc_smape_rounded(true_y, predictions, weights), tf.size(true_y)
             
-#            if hparams.DO_QUANTILES:
-#                quantile_loss = aaaaaa
-#                self.quantile_losses = {q:aaaaaa for q in self.QUANTILES}
             
             if is_train:
                 # Sum all losses
-                total_loss = smape_loss + enc_stab_loss + dec_stab_loss + enc_activation_loss + dec_activation_loss  #!!!!!!!! put in pinball loss instead of SMAPE when doing quantiles
-#                if hparams.DO_QUANTILES:
-#                    total_loss += quantile_loss
+                total_loss = smape_loss + enc_stab_loss + dec_stab_loss + enc_activation_loss + dec_activation_loss
+                #For quantile regression: just include a term for sum over all quantile losses [weights every quantile equally]
+                if self.hparams.DO_QUANTILES:
+                    total_loss += self.LAMBDA * self.ave_quantile_loss
                 self.train_op, self.glob_norm, self.ema = make_train_op(total_loss, asgd_decay, prefix=graph_prefix)
 
 
@@ -1013,4 +1037,7 @@ def loop_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
 
 
-#Direct MLP decoder...
\ No newline at end of file
+#Direct MLP decoder...
+#            MLP_DIRECT_DECODER=False,
+#    LOCAL_CONTEXT_SIZE=64,
+#    GLOBAL_CONTEXT_SIZE=256,
\ No newline at end of file
diff --git a/trainer.py b/trainer.py
index c56e300..db8aedf 100755
--- a/trainer.py
+++ b/trainer.py
@@ -164,7 +164,7 @@ def __init__(self, train_model: Model, eval: List[Tuple[Stage, Model]], model_no
         self.summary_writer = summary_writer
 
         def std_metrics(model: Model, smoothness):
-            return [Metric('SMAPE', model.smape, smoothness), Metric('MAE', model.mae, smoothness)]
+            return [Metric('SMAPE', model.smape, smoothness), Metric('MAE', model.mae, smoothness), Metric('qntl', model.ave_quantile_loss, smoothness)]
 
         self._metrics = {Stage.TRAIN: std_metrics(train_model, 0.9) + [Metric('GrNorm', train_model.glob_norm)]}
         for stage, model in eval:
@@ -463,19 +463,15 @@ def random_draw_history_and_horizon_window_sizes(trainer,sess):
 #        max_train_empty = min(history-1, int(np.floor(history * (1 - TT.train_model.inp.train_completeness_threshold))))
 #        max_predict_empty = int(np.floor(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))   
         for TT in trainer.trainers:
-#            TT.train_model.inp.history_window_size = history
-#            TT.train_model.inp.horizon_window_size = horizon
-#            TT.train_model.inp.attn_window = history - horizon + 1
-#            TT.train_model.inp.max_train_empty = min(history-1, int(np.floor(history * (1 - TT.train_model.inp.train_completeness_threshold))))
-#            TT.train_model.inp.max_predict_empty = int(np.floor(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))   
-##            TT.train_model.inp = InputPipeline
-##            TT.train_model.init(sess)
-#            TT.train_model.inp.inp.restore(sess)
-#            TT.train_model.inp.init_iterator(sess)
-            
-            #TT.train_model.inp = 77777
-#            TT.train_model = 77777
-            pass
+            TT.train_model.inp.history_window_size = history
+            TT.train_model.inp.horizon_window_size = horizon
+            TT.train_model.inp.attn_window = history - horizon + 1
+            TT.train_model.inp.max_train_empty = min(history-1, int(np.floor(history * (1 - TT.train_model.inp.train_completeness_threshold))))
+            TT.train_model.inp.max_predict_empty = int(np.floor(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))   
+#            TT.train_model.inp = InputPipeline
+#            TT.train_model.init(sess)
+            TT.train_model.inp.inp.restore(sess)
+            TT.train_model.inp.init_iterator(sess)
             
             
@@ -616,19 +612,24 @@ def ema_vars(model):
     if forward_split and do_eval:
         eval_smape = trainer.metric(Stage.EVAL_FRWD, 'SMAPE')
         eval_mae = trainer.metric(Stage.EVAL_FRWD, 'MAE')
+        eval_qntl = trainer.metric(Stage.EVAL_FRWD, 'qntl')
     else:
         eval_smape = DummyMetric()
         eval_mae = DummyMetric()
+        eval_qntl = DummyMetric()
 
     if side_split and do_eval:
         eval_mae_side = trainer.metric(Stage.EVAL_SIDE, 'MAE')
         eval_smape_side = trainer.metric(Stage.EVAL_SIDE, 'SMAPE')
+        eval_qntl_side = trainer.metric(Stage.EVAL_SIDE, 'qntl')
     else:
         eval_mae_side = DummyMetric()
         eval_smape_side = DummyMetric()
+        eval_qntl_side = DummyMetric()
 
     train_smape = trainer.metric(Stage.TRAIN, 'SMAPE')
     train_mae = trainer.metric(Stage.TRAIN, 'MAE')
+    train_qntl = trainer.metric(Stage.TRAIN, 'qntl')
     grad_norm = trainer.metric(Stage.TRAIN, 'GrNorm')
     eval_stages = []
     ema_eval_stages = []
@@ -718,8 +719,9 @@ def ema_vars(model):
                 MAE = "%.3f/%.3f/%.3f" % (eval_mae.last, eval_mae_side.last, train_mae.last)
                 improvement = '↑' if eval_smape.improved else ' '
                 SMAPE = "%s%.3f/%.3f/%.3f" % (improvement, eval_smape.last, eval_smape_side.last,  train_smape.last)
+                qntl = "%.3f/%.3f/%.3f" % (eval_qntl.last, eval_qntl_side.last, train_qntl.last)
                 if tqdm:
-                    tqr.set_postfix(gr=grad_norm.last, MAE=MAE, SMAPE=SMAPE)
+                    tqr.set_postfix(gr=grad_norm.last, MAE=MAE, SMAPE=SMAPE, qntl=qntl)
                 if not trainer.has_active() or (max_steps and step > max_steps):
                     break
 
@@ -742,14 +744,18 @@ def ema_vars(model):
                 ",".join(["%.3f" % m.top for m in eval_smape.metrics]))
 
             if trainer.has_active():
-                status += ", frwd/side best MAE=%.3f/%.3f, SMAPE=%.3f/%.3f; avg MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, %d active models" % \
-                          (eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch,
-                           eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch,
-                           trainer.has_active())
+#                status += ", frwd/side best MAE=%.3f/%.3f, SMAPE=%.3f/%.3f; avg MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, %d active models" % \
+#                          (eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch,
+#                           eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch,
+#                           trainer.has_active())
+                status += ", frwd/side best MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, qntl=%.3f/%.3f; avg MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, , qntl=%.3f/%.3f; %d active models" % \
+                          (eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch, eval_qntl.best_epoch, eval_qntl_side.best_epoch,
+                           eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch, eval_qntl.avg_epoch, eval_qntl_side.avg_epoch,
+                           trainer.has_active())                
                 print(status, file=sys.stderr)
                 if save_epochs_performance:
-                    output_list.append([eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch,
-                               eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch,
+                    output_list.append([eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch, eval_qntl.best_epoch, eval_qntl_side.best_epoch,
+                               eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch, eval_qntl.avg_epoch, eval_qntl_side.avg_epoch,
                                trainer.has_active()])
             else:
                 print(status, file=sys.stderr)

From fee641da4e3ffbe304507bc15da881c9e706d150 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Wed, 15 Aug 2018 15:02:04 -0700
Subject: [PATCH 40/42] when doing wuantiles, only use ave pinball losses,
 ignroe SMAPE

---
 MAKEFEATURES_TRAIN_ALL.sh |   2 +-
 RUN_ALL_PREDICTIONS.py    |   6 +-
 hparams.py                |  24 ++--
 model.py                  | 263 +++++++++++++++++++++++++++-----------
 trainer.py                | 133 +------------------
 5 files changed, 207 insertions(+), 221 deletions(-)

diff --git a/MAKEFEATURES_TRAIN_ALL.sh b/MAKEFEATURES_TRAIN_ALL.sh
index fc5266c..3b2b37d 100644
--- a/MAKEFEATURES_TRAIN_ALL.sh
+++ b/MAKEFEATURES_TRAIN_ALL.sh
@@ -17,7 +17,7 @@ train_names="TRAINset1 TRAINset2 TRAINset3 TRAINset4"
 #In training, max number of epochs to do. By 25-50 things have usually plateaud
 MAX_EPOCH=50
 
-train_names="TRAINset4"
+
 
 
 if $MAKE_FEATURESETS; then
diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index d3c409e..60fc412 100755
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -22,7 +22,7 @@
 # PARAMETRS
 # =============================================================================
 #For histories, we care most about shorter series, so sample lower numbers more densely
-HISTORY_SIZES=[7,8,10,12,15,20,30,50,100,200,360]
+HISTORY_SIZES=[7,8,10,12,15,20,30,50,100]#,200,360]
 HORIZON_SIZES=[7,8,10,12,15,20,30,60]
 EVAL_STEP_SIZE=4#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
 PREDICT_MODE = 'backtest'#'disjoint'
@@ -372,7 +372,7 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                     cols.remove('Page')
                     cols = ['Page'] + cols
                     f_preds = f_preds[cols]                 
-                    print(f_preds)
+#                    print(f_preds)
                     
                     dflist += [f_preds]
                     #Care about the metrics within different partitions:
@@ -389,7 +389,7 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                 #each sheet is for a single backoffset, so each sheet contains all ~1800 id's
     
         
-        print(hist_horiz__all)
+#        print(hist_horiz__all)
         t1 = time.clock()
         print('elapsed time: ',t1-t0)
         #Now that all metrics stored in dict, save dict, and analyze further
diff --git a/hparams.py b/hparams.py
index 92d89e9..e1ffea0 100755
--- a/hparams.py
+++ b/hparams.py
@@ -87,28 +87,30 @@
     MLP_POSTPROCESS__KERNEL_SIZE=15,
     MLP_POSTPROCESS__KERNEL_OFFSET=7,
     
-  
-    
     
     # DIRECT MLP DECODER (REPLACE RNN CELLS IN DECODER WITH MLP MODULES, AND DO QUANTILES)
     #Do a direct, quantile forecast by using an MLP as decoder module instead of RNN/LSTM/GRU cells:
     MLP_DIRECT_DECODER=False,
-    LOCAL_CONTEXT_SIZE=64,
-    GLOBAL_CONTEXT_SIZE=256,
-    
-    
+    LOCAL_CONTEXT_SIZE=8,
+    GLOBAL_CONTEXT_SIZE=64,
     
     
     # QUANTILE REGRESSION
     # For whatever kind of decoder, whether or not to use quantiles
-    DO_QUANTILES=True,
+    DO_QUANTILES=False,
     #If doing quantile regression in addition to point estimates trained to minimize SMAPE.
     #Also, since SMAPE point estimates are biased positive, can use alternative
-    #point estimator trainde by pinball loss on quantiles < 50 [e.g. 45,38, etc., see what has bias ~0]
-    #To not use quantile regression, just leave list empty
-    QUANTILES = [.45,.47,.48],#[None],#[.20, .30, .40, .50, ,75, .90] #ValueError: Multi-valued hyperparameters cannot be empty: QUANTILES   ->  so make it a list with "None" in it      
-    LAMBDA=.01 #Scale factor for relative weight of quantile loss for the point estimate SMAPE loss
+    #point estimator trainde by pinball loss on quantiles < 50 [e.g. 45,38, etc., see what has bias ~0].
+    #So if doing quantiles, no longer optimizing SMAPE, but report it anyway to see. So, use the 0th element of QUANTILES list is used as the point estimate for SMAPE
+    #(but SMAPE will not be used in loss function: instead will use the average quantile loss (ave over all quantiles))
+    #If not using quantile regression, list is ignored
+    QUANTILES = [.45, .47, .5]#.05, .25, .40, .50, .60, 75, .95]
+    
+    
+    #Losses summed together using lembda weighting. 
+    #Vs. if False, just directly optimize quantile loss and ignore SMAPE [but still look at it for TEST sets]
+    #LAMBDA=.01 #Scale factor for relative weight of quantile loss for the point estimate SMAPE loss. Only relevant if SMAPE_AND_QUANTILE=True
 )
 
 
diff --git a/model.py b/model.py
index 09c3d1e..7a41954 100644
--- a/model.py
+++ b/model.py
@@ -272,16 +272,20 @@ def smape_loss(true, predicted, weights):
     return tf.losses.compute_weighted_loss(smape, weights, loss_collection=None)
 
 
-def decode_predictions(decoder_readout, inp: InputPipe):#!!!!!quantiles
+def decode_predictions(decoder_readout, inp: InputPipe):
     """
     Converts normalized prediction values to log1p(pageviews), e.g. reverts normalization
     :param decoder_readout: Decoder output, shape [n_days, batch]
     :param inp: Input tensors
     :return:
     """
-    # [n_days, batch] -> [batch, n_days]
-    batch_readout = tf.transpose(decoder_readout) #!!!!!quantiles
+    
+    
+    # [n_days, batch, quantiles] -> [quantiles, batch, n_days]
+    batch_readout = tf.transpose(decoder_readout)
+#    print('inp.norm_std',inp.norm_std)
     batch_std = tf.expand_dims(inp.norm_std, -1)
+#    print('batch_std',batch_std)
     batch_mean = tf.expand_dims(inp.norm_mean, -1)
     
     ret = batch_readout * batch_std + batch_mean
@@ -301,7 +305,7 @@ def quantile_loss(true, predicted, weights, quantile):
     return tf.reduce_sum(pinball * weights) / n_valid
 
 
-def calc_loss(predictions, true_y, additional_mask=None, DO_QUANTILES=False, QUANTILES=[]):
+def calc_loss(hparams, predictions, true_y, additional_mask=None):#, DO_QUANTILES=False, QUANTILES=[]):
     """
     Calculates losses, ignoring NaN true values (assigning zero loss to them)
     :param predictions: Predicted values
@@ -318,23 +322,48 @@ def calc_loss(predictions, true_y, additional_mask=None, DO_QUANTILES=False, QUA
     if additional_mask is not None:
         weights = weights * tf.expand_dims(additional_mask, axis=0)
 
-    mae_loss = tf.losses.absolute_difference(labels=true_y, predictions=predictions, weights=weights)
     
-    if DO_QUANTILES:
+#    FRACTION = 1#.01
+#    if DO_QUANTILES:
+#        quantile_losses = []
+#        for nn, q in enumerate(QUANTILES):
+#            quantile_losses.extend([quantile_loss(true_y, predictions[nn], weights, q)])
+#        #For the total quantile loss, use the mean loss over all quantiles
+#        ave_quantile_loss = FRACTION*tf.reduce_mean(quantile_losses)
+#        mae_loss = tf.losses.absolute_difference(labels=true_y, predictions=predictions[0], weights=weights)
+#        csr = calc_smape_rounded(true_y, predictions[0], weights)
+#        sl = smape_loss(true_y, predictions[0], weights) 
+#    else:
+#        ave_quantile_loss = 0.
+#        quantile_losses = []
+#        mae_loss = tf.losses.absolute_difference(labels=true_y, predictions=predictions, weights=weights)
+#        csr = calc_smape_rounded(true_y, predictions, weights)
+#        sl = smape_loss(true_y, predictions, weights)
+        
+    FRACTION = 1#.01
+    if hparams.DO_QUANTILES:
         quantile_losses = []
-        for nn, q in enumerate(QUANTILES):
-            quantile_losses.extend([quantile_loss(true_y, predictions[nn+1], weights, q)])
+        for nn, q in enumerate(hparams.QUANTILES):
+            quantile_losses.extend([quantile_loss(true_y, predictions[nn], weights, q)])
         #For the total quantile loss, use the mean loss over all quantiles
-        ave_quantile_loss = tf.reduce_mean(quantile_losses)
+        ave_quantile_loss = FRACTION*tf.reduce_mean(quantile_losses)            
     else:
-        ave_quantile_loss = 0.
-        quantile_losses = []
+        ave_quantile_loss = tf.constant(0.,dtype=tf.float32)
+        quantile_losses = []            
+            
+    if hparams.DO_MLP_POSTPROCESS or hparams.DO_MLP_POSTPROCESS:
+        mae_loss = tf.losses.absolute_difference(labels=true_y, predictions=predictions[0], weights=weights)
+        csr = calc_smape_rounded(true_y, predictions[0], weights)
+        sl = smape_loss(true_y, predictions[0], weights) 
+    else:
+        mae_loss = tf.losses.absolute_difference(labels=true_y, predictions=predictions, weights=weights)
+        csr = calc_smape_rounded(true_y, predictions, weights)
+        sl = smape_loss(true_y, predictions, weights)        
         
-    return mae_loss, smape_loss(true_y, predictions, weights), calc_smape_rounded(true_y, predictions,
-                                                                                  weights), tf.size(true_y), ave_quantile_loss, quantile_losses
+    return mae_loss, sl, csr, tf.size(true_y), ave_quantile_loss, quantile_losses
 
 
-def make_train_op(loss, ema_decay=None, prefix=None):#!!!!!quantiles
+def make_train_op(loss, ema_decay=None, prefix=None):
 #    OPTIMIZER=#'SGDN-HD',#'COCOB',#'ADAM',#'SGDN-HD',#'ADAM-HD'
 #    if OPTIMIZER=='COCOB':
 #        optimizer = COCOB()
@@ -474,7 +503,7 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
         print('self.lookback_K_actual',self.lookback_K_actual)
         self.MLP_DIRECT_DECODER = self.hparams.MLP_DIRECT_DECODER
         self.DO_QUANTILES = self.hparams.DO_QUANTILES
-        self.LAMBDA = self.hparams.LAMBDA
+#        self.LAMBDA = self.hparams.LAMBDA
         assert not (self.MLP_DIRECT_DECODER and self.DO_MLP_POSTPROCESS), 'Cannot do both DO_MLP_POSTPROCESS and MLP_DIRECT_DECODER.\n Choose exactly 1, or neither.'
  
         
@@ -584,11 +613,7 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
 #        vv = tf.Print(vv, ['decode_predictions',vv,tf.shape(vv)])
 #        self.predictions = vv
 #        print('self.predictions (still log1p(counts))')
-        print(self.predictions)
-        
-        
-        #!!!!!! for now just see if helps. ignore last nodes, only use 0th.
-        self.predictions = self.predictions[0] #Now is batch x time
+        print('self.predictions',self.predictions)
         
 
         # Calculate losses and build training op
@@ -604,20 +629,22 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
                     ema_vars = variables
                 self.ema.apply(ema_vars)
         else:
-            self.mae, smape_loss, self.smape, self.loss_item_count, self.ave_quantile_loss, self.quantile_losses = calc_loss(self.predictions, inp.true_y,
-                                                                               additional_mask=loss_mask,
-                                                                               DO_QUANTILES=self.hparams.DO_QUANTILES,
-                                                                               QUANTILES=self.hparams.QUANTILES)
+            self.mae, smape_loss, self.smape, self.loss_item_count, self.ave_quantile_loss, self.quantile_losses = calc_loss(hparams, self.predictions, inp.true_y,
+                                                                               additional_mask=loss_mask)
+                                                                               #DO_QUANTILES=self.hparams.DO_QUANTILES,
+                                                                               #QUANTILES=self.hparams.QUANTILES)
             #from calc_loss:
             #mae_loss, smape_loss(true_y, predictions, weights), calc_smape_rounded(true_y, predictions, weights), tf.size(true_y)
             
-            
             if is_train:
                 # Sum all losses
-                total_loss = smape_loss + enc_stab_loss + dec_stab_loss + enc_activation_loss + dec_activation_loss
+                total_loss =  enc_stab_loss + dec_stab_loss + enc_activation_loss + dec_activation_loss
                 #For quantile regression: just include a term for sum over all quantile losses [weights every quantile equally]
                 if self.hparams.DO_QUANTILES:
-                    total_loss += self.LAMBDA * self.ave_quantile_loss
+                    #total_loss += self.LAMBDA * self.ave_quantile_loss
+                    total_loss += self.ave_quantile_loss
+                else:
+                    total_loss += smape_loss
                 self.train_op, self.glob_norm, self.ema = make_train_op(total_loss, asgd_decay, prefix=graph_prefix)
 
 
@@ -886,11 +913,9 @@ def MLP_postprocess(self, decoder_targets, decoder_features, time_x, summary_z,
         right_pad_zeros = tf.fill(_, 0.0)
         
         
-
-        
         #If doing quantile regression
         Nquantiles = len(self.hparams.QUANTILES) if self.hparams.DO_QUANTILES else 0
-        N_out = 1 + Nquantiles #+1 because also do a point estimate optimized by SMAPE. (Even if some of the wuantiles e.g. .48 are really for point estimates, still keep the direct SMAPE optimized point estimate as well.)
+        N_out = Nquantiles if self.hparams.DO_QUANTILES else 1
         print('Nquantiles',Nquantiles)
         print('N_out',N_out)
         
@@ -920,48 +945,6 @@ def feedforward(_x):
             fc2 = tf.layers.dense(fc1, N_out, name='fc2', kernel_initializer=self.default_init())
             return fc2
         
-        
-        
-#        #Simple 2 layer feedforward
-#        def feedforward(kernel_window_cropped):
-##            _x = tf.placeholder(tf.float32, [None, N1])
-#            def network():
-#                with tf.Graph().as_default():
-#                    _x = tf.placeholder(tf.float32, [None, N1])
-#                    with tf.variable_scope("MLP_postprocess", 
-#                                       initializer=layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN', seed=seed)):
-#                        
-##                        tf.layers.Input(shape=(N1,), dtype=tf.float32)
-#                        print('_x',_x)
-#                        fc1 = tf.layers.dense(_x, N2, activation=selu, name='fc1')
-#                        print('fc1',fc1)
-#                        fc2 = tf.layers.dense(fc1, N_out, name='fc2')
-#                        print('fc2',fc2)
-#    #                    out = tf.layers.Network(kernel_window_cropped, fc2)
-#                        return fc2
-#                    
-#            with tf.Graph().as_default():
-#                net = network()
-##                config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
-##                with tf.Session(config=config) as sess:
-#                with tf.Session() as sess:                
-#                    result = sess.run(net,feed_dict={_x: kernel_window_cropped})
-#                    return result
-                
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-    
-    
         # Stop condition for decoding loop
         def cond_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
             return timestep < self.inp.horizon_window_size
@@ -1037,7 +1020,135 @@ def loop_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
 
 
-#Direct MLP decoder...
-#            MLP_DIRECT_DECODER=False,
-#    LOCAL_CONTEXT_SIZE=64,
-#    GLOBAL_CONTEXT_SIZE=256,
\ No newline at end of file
+
+    
+#    def direct_decoder(self, decoder_features, summary_z, local_context_size, global_context_size, seed):
+#        """
+#        A different kind of decoder. Using the max of the allowed horizon sizes
+#        as the horizon, do a direct forecast, with quantiles.
+#        Based on "A Multi-Horizon Quantile Recurrent Forecaster"
+#        by Amazon AI Labs:
+#        https://arxiv.org/pdf/1711.11053.pdf
+#        
+#        1. One time MLP/CNN to get context vectors [1 global, K local]
+#        
+#        2. Replace the RNN-based decoder with an MLP decoder, called iteratively on every input in the horizon.
+#        """
+#    
+#        #Get dimensions of things based on inputs, features, options:
+#        K = self.hparams.horizon_window_size_minmax[1]
+#        context_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT else 0 #Should just be the encoder RNN depth
+#        feature_depth = decoder_features.shape[-1].value
+#        batchsize = tf.shape(decoder_features)[0]
+#        N1 =  K*feature_depth + context_depth
+#        N2 = int(.5*N1)
+#        #Output of the global MLP/CNN has context vectors for each timestep in horizon 1,...,K; and also one global context vector
+#        N_1_out = global_context_size + K*local_context_size
+#
+#        
+#        
+#        #If doing quantile regression
+#        Nquantiles = len(self.hparams.QUANTILES) if self.hparams.DO_QUANTILES else 0
+#        N_2_out = 1 + Nquantiles #+1 because also do a point estimate optimized by SMAPE. (Even if some of the wuantiles e.g. .48 are really for point estimates, still keep the direct SMAPE optimized point estimate as well.)
+#        print('Nquantiles',Nquantiles)
+#        print('N_out',N_2_out)
+#        
+#
+#        #Quick check there are correct number timesteps:
+#        assert decoder_features.shape[1] == self.inp.horizon_window_size #!!!!!!!quantiles
+#
+#
+#        #Rearrange tensors to all have time first:
+#        # [batch_size, time, input_depth] -> [time, batch_size, input_depth]
+#        decoder_targets = tf.expand_dims(decoder_targets,axis=-1)
+#        decoder_features = tf.transpose(decoder_features, [1, 0, 2])
+#        print('decoder_targets',decoder_targets)
+#        print('decoder_features',decoder_features)        
+#        
+#        decoder_by_time = tf.concat([decoder_targets,decoder_features],axis=2)
+#        encoder_by_time = tf.transpose(time_x, [1, 0, 2]) #Has target and features stacked already
+#
+#        print('decoder_by_time',decoder_by_time)
+#        print('encoder_by_time',encoder_by_time)
+#        all_features_targets_by_time = tf.concat([encoder_by_time,decoder_by_time,right_pad_zeros],axis=0)
+#        print('all_features_targets_by_time',all_features_targets_by_time)
+#        
+#        
+#        #MLP/CNN 1 : Get context vectorsfrom [encoded summary_z; all future features]
+#        def net_1(_x):
+#            fc1 = tf.layers.dense(_x, N2, activation=selu, name='fc1', kernel_initializer=self.default_init())
+#            fc2 = tf.layers.dense(fc1, N_2_out, name='fc2', kernel_initializer=self.default_init())
+#            return fc2
+#        
+#        
+#        
+#        # Stop condition for decoding loop
+#        def cond_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
+#            return timestep < self.inp.horizon_window_size
+#
+#        def loop_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
+#            print(timestep)
+#            #Excerpt time window
+#            start = timestep - cropped_kernel_size + 1 + offset + self.inp.history_window_size
+#            end = timestep + offset + self.inp.history_window_size + 1
+#            cropped = all_features_targets_by_time[start:end,:,:]
+#            print('timestep,start,end', timestep,start,end)
+#            cropped = tf.transpose(cropped,[1,0,2])
+#            cropped = tf.contrib.layers.flatten(cropped)
+#            
+#            #If using context vector, also append it
+#            if self.hparams.RECURSIVE_W_ENCODER_CONTEXT:
+#                cropped = tf.concat([cropped,summary_z],axis=1)
+#
+#            print('cropped',cropped)
+#            
+#            #Include timestep related features:
+#            #Because using fixed dimension MLP with fixed input for input layer neurons,
+#            #but window is sliding over boundaries with different meaning (0 padded vs legit values),
+#            #use 2 additional features that help control issue of boundary effects.
+#            #Use the (normalized) percent through decoder phase, = normalize(timestep/horizon),
+#            #and the (normalized) percent overlap of the kernel with the 0padded region
+#            f1 = timestep/self.inp.horizon_window_size - .5
+#            f2 = tf.maximum(0, offset - (self.inp.horizon_window_size - timestep)) / offset - .5
+#            _ = tf.stack([batchsize,1])
+#            f1 = tf.cast(tf.fill(_, f1),tf.float32)
+#            f2 = tf.cast(tf.fill(_, f2),tf.float32)
+#            print('f1',f1)
+#            print('f2',f2)
+#            print('cropped',cropped)
+#            flattened_input = tf.concat([cropped,f1,f2],axis=-1)
+#            #Before even running this, we know the shape (other than batchsize which may be dynamic)
+#            #So, since dense layer in feed_forward needs last dimension specified, do this:
+#            flattened_input.set_shape((None,N1))
+#            print('flattened_input',flattened_input)
+#
+#            #Pass tensor into the simple feedforward network:
+#            projected_output = feedforward(flattened_input)
+#            #Append this timestep results 
+#            array_targets = array_targets.write(timestep, projected_output)
+#            
+#                
+#            return timestep + 1, all_timesteps_input, array_targets #!!!!!! quantiles: projected_output will be diff dims
+#
+#
+#        #Since we know the dimensions beforehand, jsut use an init with defined dimension so can initialize the feedforward net
+##        _ = tf.stack([batchsize,N1])
+##        init_features_zeros = tf.cast(tf.fill(_, 0.),tf.float32)
+##        print('init_features_zeros',init_features_zeros)
+#        
+#        # Initial values for loop
+#        loop_init = [tf.constant(0, dtype=tf.int32), #timestep
+#                    all_features_targets_by_time, #init_features_zeros,#all_features_targets_by_time, #all_timesteps_input
+#                    tf.TensorArray(dtype=tf.float32, size=self.inp.horizon_window_size)] #array_targets
+#
+#        # Run the loop
+#        _timestep, _, targets_ta = tf.while_loop(cond_fn, loop_fn, loop_init)        
+#        
+#        #Get the post-processed predictions
+#        #[time, batch_size, Nptcl]
+#        targets = targets_ta.stack()
+#        
+#        # [time, batch_size, 1] -> [time, batch_size]
+##        targets = tf.squeeze(targets, axis=-1)
+#                
+#        return targets
\ No newline at end of file
diff --git a/trainer.py b/trainer.py
index db8aedf..6484d7f 100755
--- a/trainer.py
+++ b/trainer.py
@@ -248,9 +248,9 @@ def train_step(self, sess: tf.Session, epoch: int):
         ops = [self.inc_step] + self.global_ops
         for trainer in self.active():
             ops.extend(trainer.train_ops)
-        print('ops', ops)
+#        print('ops', ops)
         results = self._metric_step(Stage.TRAIN, ops, sess, epoch, summary_every=20)
-        print('results: ', results)
+#        print('results: ', results)
         #return results[:len(self.global_ops) + 1] # step, grad_norm
         return results[0]
 
@@ -270,133 +270,6 @@ def has_active(self):
         return len(self.active())
 
 
-#class ModelTrainer:
-#    def __init__(self, train_model, eval_model, model_no=0, summary_writer=None, keep_best=5, patience=None):
-#        self.train_model = train_model
-#        self.eval_model = eval_model
-#        self.stopped = False
-#        self.smooth_train_mae = Ema()
-#        self.smooth_train_smape = Ema()
-#        self.smooth_eval_mae = Ema(0.5)
-#        self.smooth_eval_smape = Ema(0.5)
-#        self.smooth_grad = Ema(0.9)
-#        self.summary_writer = summary_writer
-#        self.model_no = model_no
-#        self.best_top_n_loss = []
-#        self.keep_best = keep_best
-#        self.best_step = 0
-#        self.patience = patience
-#        self.train_pipe = train_model.inp
-#        self.eval_pipe = eval_model.inp
-#        self.epoch_mae = []
-#        self.epoch_smape = []
-#        self.last_epoch = -1
-#
-#    @property
-#    def train_ops(self):
-#        model = self.train_model
-#        return [model.train_op, model.update_ema, model.summaries, model.mae, model.smape, model.glob_norm]
-#
-#    def process_train_results(self, run_results, offset, global_step, write_summary):
-#        offset += 2
-#        summaries, mae, smape, glob_norm = run_results[offset:offset + 4]
-#        results = self.smooth_train_mae(mae), self.smooth_train_smape(smape), self.smooth_grad(glob_norm)
-#        if self.summary_writer and write_summary:
-#            self.summary_writer.add_summary(summaries, global_step=global_step)
-#        return np.array(results)
-#
-#    @property
-#    def eval_ops(self):
-#        model = self.eval_model
-#        return [model.mae, model.smape]
-#
-#    @property
-#    def eval_len(self):
-#        return len(self.eval_ops)
-#
-#    @property
-#    def train_len(self):
-#        return len(self.train_ops)
-#
-#    @property
-#    def best_top_loss(self):
-#        return -np.array(self.best_top_n_loss).mean()
-#
-#    @property
-#    def best_epoch_mae(self):
-#        return min(self.epoch_mae) if self.epoch_mae else np.NaN
-#
-#    @property
-#    def mean_epoch_mae(self):
-#        return np.mean(self.epoch_mae) if self.epoch_mae else np.NaN
-#
-#    @property
-#    def mean_epoch_smape(self):
-#        return np.mean(self.epoch_smape) if self.epoch_smape else np.NaN
-#
-#    @property
-#    def best_epoch_smape(self):
-#        return min(self.epoch_smape) if self.epoch_smape else np.NaN
-#
-#    def remember_for_epoch(self, epoch, mae, smape):
-#        if epoch > self.last_epoch:
-#            self.last_epoch = epoch
-#            self.epoch_mae = []
-#            self.epoch_smape = []
-#        self.epoch_mae.append(mae)
-#        self.epoch_smape.append(smape)
-#
-#    @property
-#    def best_epoch_metrics(self):
-#        return np.array([self.best_epoch_mae, self.best_epoch_smape])
-#
-#    @property
-#    def mean_epoch_metrics(self):
-#        return np.array([self.mean_epoch_mae, self.mean_epoch_smape])
-#
-#    def process_eval_results(self, run_results, offset, global_step, epoch):
-#        totals = np.zeros(self.eval_len, np.float)
-#        for result in run_results:
-#            items = np.array(result[offset:offset + self.eval_len])
-#            totals += items
-#        results = totals / len(run_results)
-#        mae, smape = results
-#        if self.summary_writer and global_step > 200:
-#            summary = tf.Summary(value=[
-#                tf.Summary.Value(tag=f"test/MAE_{self.model_no}", simple_value=mae),
-#                tf.Summary.Value(tag=f"test/SMAPE_{self.model_no}", simple_value=smape),
-#            ])
-#            self.summary_writer.add_summary(summary, global_step=global_step)
-#        smooth_mae = self.smooth_eval_mae(mae)
-#        smooth_smape = self.smooth_eval_smape(smape)
-#        self.remember_for_epoch(epoch, mae, smape)
-#
-#        current_loss = -smooth_smape
-#
-#        prev_best_n = np.mean(self.best_top_n_loss) if self.best_top_n_loss else -np.inf
-#        if self.best_top_n_loss:
-#            log.debug("Current loss=%.3f, old best=%.3f, wait steps=%d", -current_loss,
-#                      -max(self.best_top_n_loss), global_step - self.best_step)
-#
-#        if len(self.best_top_n_loss) >= self.keep_best:
-#            heapq.heappushpop(self.best_top_n_loss, current_loss)
-#        else:
-#            heapq.heappush(self.best_top_n_loss, current_loss)
-#        log.debug("Best loss=%.3f, top_5 avg loss=%.3f, top_5=%s",
-#                  -max(self.best_top_n_loss), -np.mean(self.best_top_n_loss),
-#                  ",".join(["%.3f" % -mae for mae in self.best_top_n_loss]))
-#        new_best_n = np.mean(self.best_top_n_loss)
-#
-#        new_best = new_best_n > prev_best_n
-#        if new_best:
-#            self.best_step = global_step
-#            log.debug("New best step %d, current loss=%.3f", global_step, -current_loss)
-#        else:
-#            step_count = global_step - self.best_step
-#            if step_count > self.patience:
-#                self.stopped = True
-#
-#        return mae, smape, new_best, smooth_mae, smooth_smape
 
 
 def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01,
@@ -458,7 +331,7 @@ def random_draw_history_and_horizon_window_sizes(trainer,sess):
 #        metrics = []
         history = np.random.randint(low=hparams.history_window_size_minmax[0],high=hparams.history_window_size_minmax[1]+1)
         horizon = np.random.randint(low=hparams.horizon_window_size_minmax[0],high=hparams.horizon_window_size_minmax[1]+1)        
-        print('random draw: history, horizon', history, horizon)
+#        print('random draw: history, horizon', history, horizon)
 #        attn_window = history - horizon + 1
 #        max_train_empty = min(history-1, int(np.floor(history * (1 - TT.train_model.inp.train_completeness_threshold))))
 #        max_predict_empty = int(np.floor(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))   

From df1b28873163685030f147897d635a7db0860587 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 16 Aug 2018 23:46:32 -0700
Subject: [PATCH 41/42] working direct quantile forecaster

---
 PERFORMANCE_HEATMAPS.py |   7 +-
 RUN_ALL_PREDICTIONS.py  |   4 +-
 hparams.py              |  10 +-
 model.py                | 287 +++++++++++++++++++---------------------
 trainer.py              |  16 ++-
 5 files changed, 161 insertions(+), 163 deletions(-)

diff --git a/PERFORMANCE_HEATMAPS.py b/PERFORMANCE_HEATMAPS.py
index 2e81e78..c2de0fe 100755
--- a/PERFORMANCE_HEATMAPS.py
+++ b/PERFORMANCE_HEATMAPS.py
@@ -20,7 +20,7 @@
 # =============================================================================
 # PARAMETERS
 # =============================================================================
-OUTDIR = 'output/redo_full'
+OUTDIR = 'output'
 NAMES = ['TESTset1', 'TESTset2', 'TESTset3', 'TESTset4']
 
 
@@ -83,7 +83,7 @@ def aggregate__overall(data_dict, real_only, id_subsets, bad_ids):
 #    print(horizons)
     
     metrics_arrays = {}
-    for metric in ['mean','median']:
+    for metric in ['mean','median', 'sd','5pctl','95pctl']:
         _array = np.nan * np.ones((len(histories),len(horizons)))
         for k,v in metrics_dict.items():
             i = histories.index(k[0])
@@ -124,7 +124,7 @@ def make_heatmap(metrics_arrays, histories, horizons, outdir, name):
         for x, hor in enumerate(np.arange(len(horizons))):
             for y, hist in enumerate(np.arange(len(histories))):
                 s = np.round(v[y,x],1)
-                plt.text(x, y, s)
+                plt.text(x-.7, y, s)
     #    plt.grid()
         savepath = os.path.join(outdir,f'{savename}.png')
         plt.savefig(savepath)
@@ -187,7 +187,6 @@ def make_heatmap(metrics_arrays, histories, horizons, outdir, name):
     
     
-    f=fffffffffff
     
     
     # =============================================================================
diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index 60fc412..0a2dcd8 100755
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -8,7 +8,7 @@
 import pandas as pd
 import numpy as np
 from trainer import predict
-#from hparams import build_hparams
+from hparams import build_hparams
 import hparams
 
 from make_features import read_all
@@ -385,7 +385,7 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                 savename = f"{str(history)}_{str(horizon)}_{name}.xls"
                 savename = os.path.join(OUTPUT_DIR,savename)
                 sheetnames = [str(i) for i in offs]
-#                SaveMultisheetXLS(dflist, sheetnames, savename)
+                SaveMultisheetXLS(dflist, sheetnames, savename)
                 #each sheet is for a single backoffset, so each sheet contains all ~1800 id's
     
         
diff --git a/hparams.py b/hparams.py
index e1ffea0..5257c4d 100755
--- a/hparams.py
+++ b/hparams.py
@@ -3,7 +3,7 @@
 
 # Manually selected params
 params_encdec = dict(
-    batch_size=123,#256,
+    batch_size=128,#256,
     #train_window=380,
 #    train_window=283,#now make this a bash input to do train-validation window size performance heatmaps
     #train_window=30,#try 65 w our data to see if allows more samples through filter
@@ -83,14 +83,14 @@
     
     # MLP POSTPROCESSOR (ADJUST PREDICTIONS IN LOCAL WINDOWS, AND CAN DO QUANTILES)
     #True or False to use MLP module postprocessor to locally adjust estimates
-    DO_MLP_POSTPROCESS=True,#True,#False
+    DO_MLP_POSTPROCESS=False,#True,#False
     MLP_POSTPROCESS__KERNEL_SIZE=15,
     MLP_POSTPROCESS__KERNEL_OFFSET=7,
     
     
     # DIRECT MLP DECODER (REPLACE RNN CELLS IN DECODER WITH MLP MODULES, AND DO QUANTILES)
     #Do a direct, quantile forecast by using an MLP as decoder module instead of RNN/LSTM/GRU cells:
-    MLP_DIRECT_DECODER=False,
+    MLP_DIRECT_DECODER=True,
     LOCAL_CONTEXT_SIZE=8,
     GLOBAL_CONTEXT_SIZE=64,
     
@@ -98,14 +98,14 @@
     
     # QUANTILE REGRESSION
     # For whatever kind of decoder, whether or not to use quantiles
-    DO_QUANTILES=False,
+    DO_QUANTILES=True,
     #If doing quantile regression in addition to point estimates trained to minimize SMAPE.
     #Also, since SMAPE point estimates are biased positive, can use alternative
     #point estimator trainde by pinball loss on quantiles < 50 [e.g. 45,38, etc., see what has bias ~0].
     #So if doing quantiles, no longer optimizing SMAPE, but report it anyway to see. So, use the 0th element of QUANTILES list is used as the point estimate for SMAPE
     #(but SMAPE will not be used in loss function: instead will use the average quantile loss (ave over all quantiles))
     #If not using quantile regression, list is ignored
-    QUANTILES = [.45, .47, .5]#.05, .25, .40, .50, .60, 75, .95]
+    QUANTILES = [.45,   .05, .25, .50, .75, .95]
     
     
     #Losses summed together using lembda weighting. 
diff --git a/model.py b/model.py
index 7a41954..ede40f1 100644
--- a/model.py
+++ b/model.py
@@ -351,7 +351,7 @@ def calc_loss(hparams, predictions, true_y, additional_mask=None):#, DO_QUANTILE
         ave_quantile_loss = tf.constant(0.,dtype=tf.float32)
         quantile_losses = []            
             
-    if hparams.DO_MLP_POSTPROCESS or hparams.DO_MLP_POSTPROCESS:
+    if hparams.DO_MLP_POSTPROCESS or hparams.MLP_DIRECT_DECODER:
         mae_loss = tf.losses.absolute_difference(labels=true_y, predictions=predictions[0], weights=weights)
         csr = calc_smape_rounded(true_y, predictions[0], weights)
         sl = smape_loss(true_y, predictions[0], weights) 
@@ -565,18 +565,27 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
 
         print('building decoder')
 
-        # Run decoder
+        # Run (regular RNN-based) decoder
         #... = decoder(encoder_state, attn_features, prediction_inputs, previous_y)
-        decoder_targets, decoder_outputs = self.decoder(encoder_state,
-                                                        attn_features if hparams.use_attn else None,
-                                                        summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
-                                                        inp.time_y, inp.norm_x[:, -self.lookback_K_actual:]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
-        
-#        decoder_targets = tf.Print(decoder_targets,['decoder_targets BEFORE',tf.shape(decoder_targets),decoder_targets,'decoder_outputs',tf.shape(decoder_outputs),decoder_outputs])
-        
-        
-        
+        if not self.MLP_DIRECT_DECODER:
+            decoder_targets, decoder_outputs = self.decoder(encoder_state,
+                                                            attn_features if hparams.use_attn else None,
+                                                            summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None,
+                                                            inp.time_y, inp.norm_x[:, -self.lookback_K_actual:]) #in decoder function def:   inp.time_y = "prediction_inputs";  inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd))
+
+        #Vs. if doing the direct MLP decoder (while loop of MLP's instead of while loop of RNN cells):
+        elif self.MLP_DIRECT_DECODER:
+            decoder_targets, decoder_outputs = self.direct_decoder(inp.time_y,  #
+                                                   summary_z, 
+                                                   self.hparams.LOCAL_CONTEXT_SIZE, 
+                                                   self.hparams.GLOBAL_CONTEXT_SIZE, 
+                                                   None)
+
         #If doing the MLP postprocessing step to adjust predictions:
+        #Technically, you could do this in addition to MLP_DIRECT_DECODER if direct_decoder was outputting point estimates only.
+        #Or could change postprocess to also refine quantiles.
+        #But this is overkill. Only allowed to use 0 or 1 of [MLP_DIRECT_DECODER, DO_MLP_POSTPROCESS]
+#        decoder_targets = tf.Print(decoder_targets,['decoder_targets BEFORE',tf.shape(decoder_targets),decoder_targets,'decoder_outputs',tf.shape(decoder_outputs),decoder_outputs])
         if self.DO_MLP_POSTPROCESS:
             #Could 0 pad, but for now instead just ensure kernelsize and offset are such that
             #thre are no encoder-side issues with kernel extending beyond history (relevant for very short histories)
@@ -593,17 +602,12 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a
 #        decoder_targets = tf.Print(decoder_targets,['encoder_state',encoder_state,'inp.time_y',inp.time_y,'inp.norm_x',inp.norm_x])
 #        decoder_targets = tf.Print(decoder_targets,['decoder_targets',decoder_targets,'decoder_outputs',decoder_outputs])
         
-        #Vs. if doing the direct MLP decoder (while loop of MLP's instead of while loop of RNN cells):
-        elif self.MLP_DIRECT_DECODER:
-            #LOCAL_CONTEXT_SIZE=64,
-            #GLOBAL_CONTEXT_SIZE=256,
-            pass
-        
-        
+
         
         # Decoder activation losses
-        dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.horizon_window_size)
-        dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.horizon_window_size)
+        #If doing the Direct MLP decoder, there is no RNN decoder output, so these losses are not applicable, set to 0.
+        dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.horizon_window_size) if not self.MLP_DIRECT_DECODER else tf.constant(0.)
+        dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.horizon_window_size) if not self.MLP_DIRECT_DECODER else tf.constant(0.)
         print('dec_stab_loss',dec_stab_loss)
         print('dec_activation_loss',dec_activation_loss)
 
@@ -864,7 +868,7 @@ def MLP_postprocess(self, decoder_targets, decoder_features, time_x, summary_z,
         
         INPUTS:
             decoder_targets - [HORIZON x BATCH]  (for now ignoring quantiles just single value per decoder step)
-            decoder_features - decoder side features for all horizon timesteps [BATCH x HISTORY x FEATURES] (but gets transposed later to make easier)
+            decoder_features - decoder side features for all horizon timesteps [BATCH x HORIZON x FEATURES] (but gets transposed later to make easier)
             time_x - encoder side features and ground truth for all history timesteps [BATCH x HISTORY x FEATURES+1] (but gets transposed later to make easier)
             summary_z - encoded context vector, simple way using same vector as context for every decoder step [BATCH x RNNSTATEDEPTH]. Will be None if not doing encoder context option
             
@@ -1022,133 +1026,114 @@ def loop_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
 
 
-#    def direct_decoder(self, decoder_features, summary_z, local_context_size, global_context_size, seed):
-#        """
-#        A different kind of decoder. Using the max of the allowed horizon sizes
-#        as the horizon, do a direct forecast, with quantiles.
-#        Based on "A Multi-Horizon Quantile Recurrent Forecaster"
-#        by Amazon AI Labs:
-#        https://arxiv.org/pdf/1711.11053.pdf
-#        
-#        1. One time MLP/CNN to get context vectors [1 global, K local]
-#        
-#        2. Replace the RNN-based decoder with an MLP decoder, called iteratively on every input in the horizon.
-#        """
-#    
-#        #Get dimensions of things based on inputs, features, options:
-#        K = self.hparams.horizon_window_size_minmax[1]
-#        context_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT else 0 #Should just be the encoder RNN depth
-#        feature_depth = decoder_features.shape[-1].value
-#        batchsize = tf.shape(decoder_features)[0]
-#        N1 =  K*feature_depth + context_depth
-#        N2 = int(.5*N1)
-#        #Output of the global MLP/CNN has context vectors for each timestep in horizon 1,...,K; and also one global context vector
-#        N_1_out = global_context_size + K*local_context_size
-#
-#        
-#        
-#        #If doing quantile regression
-#        Nquantiles = len(self.hparams.QUANTILES) if self.hparams.DO_QUANTILES else 0
-#        N_2_out = 1 + Nquantiles #+1 because also do a point estimate optimized by SMAPE. (Even if some of the wuantiles e.g. .48 are really for point estimates, still keep the direct SMAPE optimized point estimate as well.)
-#        print('Nquantiles',Nquantiles)
-#        print('N_out',N_2_out)
-#        
-#
-#        #Quick check there are correct number timesteps:
-#        assert decoder_features.shape[1] == self.inp.horizon_window_size #!!!!!!!quantiles
-#
-#
-#        #Rearrange tensors to all have time first:
-#        # [batch_size, time, input_depth] -> [time, batch_size, input_depth]
-#        decoder_targets = tf.expand_dims(decoder_targets,axis=-1)
-#        decoder_features = tf.transpose(decoder_features, [1, 0, 2])
-#        print('decoder_targets',decoder_targets)
-#        print('decoder_features',decoder_features)        
-#        
-#        decoder_by_time = tf.concat([decoder_targets,decoder_features],axis=2)
-#        encoder_by_time = tf.transpose(time_x, [1, 0, 2]) #Has target and features stacked already
-#
-#        print('decoder_by_time',decoder_by_time)
-#        print('encoder_by_time',encoder_by_time)
-#        all_features_targets_by_time = tf.concat([encoder_by_time,decoder_by_time,right_pad_zeros],axis=0)
-#        print('all_features_targets_by_time',all_features_targets_by_time)
-#        
-#        
-#        #MLP/CNN 1 : Get context vectorsfrom [encoded summary_z; all future features]
-#        def net_1(_x):
-#            fc1 = tf.layers.dense(_x, N2, activation=selu, name='fc1', kernel_initializer=self.default_init())
-#            fc2 = tf.layers.dense(fc1, N_2_out, name='fc2', kernel_initializer=self.default_init())
-#            return fc2
-#        
-#        
-#        
-#        # Stop condition for decoding loop
-#        def cond_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
-#            return timestep < self.inp.horizon_window_size
-#
-#        def loop_fn(timestep, all_timesteps_input, array_targets: tf.TensorArray):
-#            print(timestep)
-#            #Excerpt time window
-#            start = timestep - cropped_kernel_size + 1 + offset + self.inp.history_window_size
-#            end = timestep + offset + self.inp.history_window_size + 1
-#            cropped = all_features_targets_by_time[start:end,:,:]
-#            print('timestep,start,end', timestep,start,end)
-#            cropped = tf.transpose(cropped,[1,0,2])
-#            cropped = tf.contrib.layers.flatten(cropped)
-#            
-#            #If using context vector, also append it
-#            if self.hparams.RECURSIVE_W_ENCODER_CONTEXT:
-#                cropped = tf.concat([cropped,summary_z],axis=1)
-#
-#            print('cropped',cropped)
-#            
-#            #Include timestep related features:
-#            #Because using fixed dimension MLP with fixed input for input layer neurons,
-#            #but window is sliding over boundaries with different meaning (0 padded vs legit values),
-#            #use 2 additional features that help control issue of boundary effects.
-#            #Use the (normalized) percent through decoder phase, = normalize(timestep/horizon),
-#            #and the (normalized) percent overlap of the kernel with the 0padded region
-#            f1 = timestep/self.inp.horizon_window_size - .5
-#            f2 = tf.maximum(0, offset - (self.inp.horizon_window_size - timestep)) / offset - .5
-#            _ = tf.stack([batchsize,1])
-#            f1 = tf.cast(tf.fill(_, f1),tf.float32)
-#            f2 = tf.cast(tf.fill(_, f2),tf.float32)
-#            print('f1',f1)
-#            print('f2',f2)
-#            print('cropped',cropped)
-#            flattened_input = tf.concat([cropped,f1,f2],axis=-1)
-#            #Before even running this, we know the shape (other than batchsize which may be dynamic)
-#            #So, since dense layer in feed_forward needs last dimension specified, do this:
-#            flattened_input.set_shape((None,N1))
-#            print('flattened_input',flattened_input)
-#
-#            #Pass tensor into the simple feedforward network:
-#            projected_output = feedforward(flattened_input)
-#            #Append this timestep results 
-#            array_targets = array_targets.write(timestep, projected_output)
-#            
-#                
-#            return timestep + 1, all_timesteps_input, array_targets #!!!!!! quantiles: projected_output will be diff dims
-#
-#
-#        #Since we know the dimensions beforehand, jsut use an init with defined dimension so can initialize the feedforward net
-##        _ = tf.stack([batchsize,N1])
-##        init_features_zeros = tf.cast(tf.fill(_, 0.),tf.float32)
-##        print('init_features_zeros',init_features_zeros)
-#        
-#        # Initial values for loop
-#        loop_init = [tf.constant(0, dtype=tf.int32), #timestep
-#                    all_features_targets_by_time, #init_features_zeros,#all_features_targets_by_time, #all_timesteps_input
-#                    tf.TensorArray(dtype=tf.float32, size=self.inp.horizon_window_size)] #array_targets
-#
-#        # Run the loop
-#        _timestep, _, targets_ta = tf.while_loop(cond_fn, loop_fn, loop_init)        
-#        
-#        #Get the post-processed predictions
-#        #[time, batch_size, Nptcl]
-#        targets = targets_ta.stack()
-#        
-#        # [time, batch_size, 1] -> [time, batch_size]
-##        targets = tf.squeeze(targets, axis=-1)
-#                
-#        return targets
\ No newline at end of file
+    def direct_decoder(self, decoder_features, summary_z, local_context_size, global_context_size, seed):
+        """
+        A different kind of decoder. Using the max of the allowed horizon sizes
+        as the horizon, do a direct forecast, with quantiles.
+        Based on "A Multi-Horizon Quantile Recurrent Forecaster"
+        by Amazon AI Labs:
+        https://arxiv.org/pdf/1711.11053.pdf
+        
+        1. One time MLP/CNN to get context vectors [1 global, K local]
+        
+        2. Replace the RNN-based decoder with an MLP decoder, called iteratively on every input in the horizon.
+        
+        Inputs:        
+            decoder_features - decoder side features for all horizon timesteps [BATCH x HORIZON x FEATURES]
+            summary_z - encoded context vector, simple way using same vector as context for every decoder step [BATCH x RNNSTATEDEPTH]. Will be None if not doing encoder context option
+                    
+        """
+    
+        #Get dimensions of things based on inputs, features, options:
+        K = self.hparams.horizon_window_size_minmax[1]
+        summary_z_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT else 0 #Should just be the encoder RNN depth
+        feature_depth = decoder_features.shape[-1].value
+        batchsize = tf.shape(decoder_features)[0]
+        N1 =  K*feature_depth + summary_z_depth
+        N_1_mid = int(.5*N1)
+        print('N1',N1)
+        print('N_1_mid',N_1_mid)
+        #Output of the global MLP/CNN has context vectors for each timestep in horizon 1,...,K; and also one global context vector
+        N_1_out = global_context_size + K*local_context_size
+        print('N_1_out',N_1_out)
+        print('local_context_size',local_context_size)
+        print('global_context_size',global_context_size)
+        
+        
+        #If doing quantile regression
+        N_2_in = global_context_size + local_context_size + feature_depth
+        N_2_mid = int(.5*N_2_in)
+        Nquantiles = len(self.hparams.QUANTILES) if self.hparams.DO_QUANTILES else 0
+        N_2_out = Nquantiles if self.hparams.DO_QUANTILES else 1
+        print('N_2_in',N_2_in)
+        print('Nquantiles',Nquantiles)
+        print('N_2_out',N_2_out)
+        
+        
+        
+        #MLP/CNN 1 : GLOBAL : Get context vectors from [encoded summary_z; all future features]
+        def net_1(_x):
+            net_1_fc1 = tf.layers.dense(_x, N_1_mid, activation=selu, name='net_1_fc1', kernel_initializer=self.default_init())
+            net_1_fc2 = tf.layers.dense(net_1_fc1, N_1_out, name='net_1_fc2', kernel_initializer=self.default_init())
+            return net_1_fc2
+
+        #MLP/CNN 2 : LCOAL : Get quantiles from [local_features; global_context; local_context]
+        #??? Could try feeding in summary_z as well to local. But the global context should basically capture summary_z, so should not need, and would increase net_2 size a lot.
+        def net_2(_x):
+            net_2_fc1 = tf.layers.dense(_x, N_2_mid, activation=selu, name='net_2_fc1', kernel_initializer=self.default_init())
+            net_2_fc2 = tf.layers.dense(net_2_fc1, N_2_out, name='net_2_fc2', kernel_initializer=self.default_init())
+            return net_2_fc2      
+        
+        
+        # Stop condition for decoding loop
+        def cond_fn(timestep, summary_z, decoder_features, local_context_vectors, global_context_vector, array_targets: tf.TensorArray):
+            return timestep < self.inp.horizon_window_size
+
+        def loop_fn(timestep, summary_z, decoder_features, local_context_vectors, global_context_vector, array_targets: tf.TensorArray):
+            print(timestep)
+
+            #Concatenate the 3 inputs: [global context; local context; features for that timestep]
+            net2_input = tf.concat([global_context_vector, local_context_vectors[:,timestep,:], decoder_features[:,timestep,:]],axis=1)
+            quantiles_output = net_2(net2_input)
+            array_targets = array_targets.write(timestep, quantiles_output)
+                
+            return timestep + 1, summary_z, decoder_features, local_context_vectors, global_context_vector, array_targets #!!!!!! quantiles: projected_output will be diff dims
+
+        
+        #One time pass through the GLOBAL network:
+        
+#        ddddd = tf.transpose(tf.expand_dims(summary_z,-1),,perm=[0,2,1])
+        flattened_input = tf.concat([tf.contrib.layers.flatten(decoder_features), summary_z],axis=1)
+#        flattened_input.set_shape((None,N1))
+        context_vectors = net_1(flattened_input)
+        print('context_vectors',context_vectors)
+        global_context_vector = context_vectors[:,:global_context_size]
+        print('global_context_vector',global_context_vector)
+        print('context_vectors[:,global_context_size:]',context_vectors[:,global_context_size:])
+        print('[batchsize, self.inp.horizon_window_size, local_context_size]',[batchsize, K, local_context_size])
+        local_context_vectors = tf.reshape(context_vectors[:,global_context_size:], [batchsize, K, local_context_size])
+        
+        # Initial values for loop
+        loop_init = [tf.constant(0, dtype=tf.int32), #timestep
+                    summary_z,
+                    decoder_features, #init_features_zeros,#all_features_targets_by_time, #all_future_input
+                    local_context_vectors,
+                    global_context_vector,
+                    tf.TensorArray(dtype=tf.float32, size=self.inp.horizon_window_size)] #array_targets
+
+        # Run the loop
+        _timestep, _, _, _, _, targets_ta = tf.while_loop(cond_fn, loop_fn, loop_init)        
+        
+        #[time, batch_size, Nptcl]
+        targets = targets_ta.stack()
+        
+        # [time, batch_size, 1] -> [time, batch_size]
+#        targets = tf.squeeze(targets, axis=-1)
+                
+
+        #Does not use RNN-based decoder, so decoder_outputs are not relevant
+        #But could do regularization on the feedforward net1 and net2 weights...
+#        decoder_outputs = tf.constant(0.,shape=[batchsize, self.inp.horizon_window_size, self.hparams.rnn_depth])
+        decoder_outputs = None
+        
+        return targets, decoder_outputs
\ No newline at end of file
diff --git a/trainer.py b/trainer.py
index 6484d7f..738ac57 100755
--- a/trainer.py
+++ b/trainer.py
@@ -715,8 +715,22 @@ def predict(features_set, sampling_period, checkpoints, TEST_dir, hparams, histo
                 utf_names = [str(name, 'utf-8') for name in pname]
             except UnicodeDecodeError:
                 utf_names = [str(name) for name in pname]
+            
+            if hparams.DO_QUANTILES:
+                print('pred',pred.shape)
+                Nquantiles = pred.shape[0]
+                assert Nquantiles==len(hparams.QUANTILES), f'QUANTILES is {QUANTILES} but pred.shape is {pred.shape}'
+                for nn, qq in enumerate(hparams.QUANTILES):
+                    #p = preds[nn]
+                    s=ssssssssssss
+                pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred))
+            else:
+#                print('pred')
+#                print(pred)
+#                print(pred.shape)
+                pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred[0]))
+                
                 
-            pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred))
 #            print(pred_df)
 #            print()
             pred_buffer.append(pred_df)

From ccc7710970e15dc2d4bfc443c55abe69c1cedf03 Mon Sep 17 00:00:00 2001
From: gk <gkocher10@gmail.com>
Date: Thu, 23 Aug 2018 16:17:08 -0700
Subject: [PATCH 42/42] multistep encoder predicts differently

---
 PERFORMANCE_HEATMAPS.py  |   4 +-
 RUN_ALL_PREDICTIONS.py   | 158 ++++++--
 Readme.md                |  10 +-
 directdecoder_Run_All.py | 470 ++++++++++++++++++++++
 how_it_works.md          |   0
 hparams.py               |   2 +-
 model.py                 |   2 +
 quantile_plots.py        |  83 ++++
 trainer copy.py          | 827 +++++++++++++++++++++++++++++++++++++++
 trainer.py               | 109 +++---
 10 files changed, 1575 insertions(+), 90 deletions(-)
 create mode 100644 directdecoder_Run_All.py
 mode change 100644 => 100755 how_it_works.md
 create mode 100644 quantile_plots.py
 create mode 100755 trainer copy.py

diff --git a/PERFORMANCE_HEATMAPS.py b/PERFORMANCE_HEATMAPS.py
index c2de0fe..164818d 100755
--- a/PERFORMANCE_HEATMAPS.py
+++ b/PERFORMANCE_HEATMAPS.py
@@ -124,7 +124,7 @@ def make_heatmap(metrics_arrays, histories, horizons, outdir, name):
         for x, hor in enumerate(np.arange(len(horizons))):
             for y, hist in enumerate(np.arange(len(histories))):
                 s = np.round(v[y,x],1)
-                plt.text(x-.7, y, s)
+                plt.text(x-.25, y, s)
     #    plt.grid()
         savepath = os.path.join(outdir,f'{savename}.png')
         plt.savefig(savepath)
@@ -144,6 +144,8 @@ def make_heatmap(metrics_arrays, histories, horizons, outdir, name):
     
     #for each of the 4 dicts:
     
+    
+    #HOLLYWOOD
     #Make list of id's that were held out from training, to assess transfer ability
     HOLD_OUTS = [str(i) for i in range(500)] #Not actually held out, but just get an idea of performance on earlier ids
     special_ids = [str(i) for i in [531, 1007, 143, 130, 197, 203, 209, 215, 342, 476, 328, 182, 200, 145, 242, 44, 94, 147, 1, 5, 6, 7, 8, 12, 387, 429, 1005, 943]]
diff --git a/RUN_ALL_PREDICTIONS.py b/RUN_ALL_PREDICTIONS.py
index 0a2dcd8..4a699eb 100755
--- a/RUN_ALL_PREDICTIONS.py
+++ b/RUN_ALL_PREDICTIONS.py
@@ -17,13 +17,15 @@
 import time
 from pandas import ExcelWriter
 
+from collections import defaultdict
+
 
 # =============================================================================
 # PARAMETRS
 # =============================================================================
 #For histories, we care most about shorter series, so sample lower numbers more densely
 HISTORY_SIZES=[7,8,10,12,15,20,30,50,100]#,200,360]
-HORIZON_SIZES=[7,8,10,12,15,20,30,60]
+HORIZON_SIZES=[7,8,10,12,15,20,30,60] #If doing the Amazon decoder, only do the single fixed length that was used for K [eg. 60 days]
 EVAL_STEP_SIZE=4#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
 PREDICT_MODE = 'backtest'#'disjoint'
 NAMES = ['TESTset1', 'TESTset2', 'TESTset3', 'TESTset4']
@@ -36,11 +38,11 @@
 PARAM_SETTING_FULL_NAME = hparams.params_encdec #Which of the parameter settings to use corresponding to the PARAM_SETTING. The mapping is defined in hparams.py at the end in "sets = {'s32':params_s32,..."
 OUTPUT_DIR = 'output'
 
-SAVE_PLOTS = False
-
-
-
+#If doing quantiels (and no SMAPE optimized point estimate), then choose one of the quantiles to use as the point estimate
+#E.g. 40 45 or 47 since positively biased [otherwise use 50]
+Q_pnt_est = 0.45
 
+SAVE_PLOTS = False
 
 
@@ -112,42 +114,59 @@ def do_predictions_one_setting(history,horizon,backoffset,TEST_dir,save_plots,n_
     #tf.reset_default_graph()
     #preds = predict(paths, default_hparams(), back_offset=0,
     #                    n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True)
-    t_preds = []
+    t_preds = defaultdict(lambda:[]) if hparams.DO_QUANTILES else []
     for tm in range(Nmodels):
         tf.reset_default_graph()
         _ = predict(FEATURES_SET, SAMPLING_PERIOD, paths, TEST_dir, build_hparams(PARAM_SETTING_FULL_NAME), history, horizon, back_offset=backoffset, return_x=False,
                     n_models=Nmodels, target_model=tm, seed=2, batch_size=batchsize, asgd=True)
-        t_preds.append(_)
+        if hparams.DO_QUANTILES:
+            for kk, vv in _.items():
+                t_preds[kk].append(vv)
+        else:
+            t_preds.append(_)
         
     
     # =============================================================================
     # average the N models predictions
     # =============================================================================
-    preds = sum(t_preds)/float(Nmodels)
-    
+    if hparams.DO_QUANTILES:
+        preds = {}
+        for kk, vv in t_preds.items():
+            preds[kk] = sum(vv)/float(Nmodels)
+    else:
+        preds = sum(t_preds)/float(Nmodels)
+        preds = {'point_est':preds}
     
     
-    # =============================================================================
-    # look at missing
-    # =============================================================================
-    missing_pages = prev.index.difference(preds.index)
-    print('missing_pages',missing_pages)
-    # Use zeros for missing pages
-    rmdf = pd.DataFrame(index=missing_pages,
-                    data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns)
-    if DATA_TYPE=='kaggle':
-        f_preds = preds.append(rmdf).sort_index()
-    elif DATA_TYPE=='ours':
-        f_preds = preds
-    # Use zero for negative predictions
-    f_preds[f_preds < 0.5] = 0
-    # Rouns predictions to nearest int
-    f_preds = np.round(f_preds).astype(np.int64)
     
+    #Do for every quantile
+    final_preds = {}
+    for kk, vv in preds.items():
+        
+        # =============================================================================
+        # look at missing
+        # =============================================================================
+        if DATA_TYPE=='kaggle':
+            missing_pages = prev.index.difference(preds.index)
+            print('missing_pages',missing_pages)
+            # Use zeros for missing pages
+            rmdf = pd.DataFrame(index=missing_pages,
+                            data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns)
+            f_preds = preds.append(rmdf).sort_index()
+        elif DATA_TYPE=='ours':
+#            f_preds = preds
+            f_preds = vv            
+        # Use zero for negative predictions
+        f_preds[f_preds < 0.5] = 0
+        # Rouns predictions to nearest int
+        f_preds = np.round(f_preds).astype(np.int64)
+        
+        final_preds[kk] = f_preds
+
+#    print('final_preds',final_preds)
     
     
-#    print(f_preds)
     
     # =============================================================================
     # save out all predictions all days (for our stuff will be relevant, for his Kaggle maybe just needed one day)
@@ -191,7 +210,7 @@ def do_predictions_one_setting(history,horizon,backoffset,TEST_dir,save_plots,n_
     #Cannot view on the AWS so move to local:   
     #zip -r output.zip output
     #cp output.zip /home/...../sync
-    return f_preds
+    return final_preds
     
     
@@ -254,6 +273,7 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
 
 if __name__ == '__main__':
     
+    hparams = build_hparams(PARAM_SETTING_FULL_NAME)
     
     
     #For the 4 chunk backtesting performance assessment
@@ -280,12 +300,31 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
         elif PREDICT_MODE=='backtest':
             data_timesteps, N_series = get_data_timesteps_Nseries__backtest(TEST_DF_PATH,TRAIN_DF_PATH)
         
+#        #For the direct decoder, only need to do single prediction (all smaller horizons contained in the max horizon)
+#        if hparams.MLP_DIRECT_DECODER:
+#            #HORIZON_SIZES = max(HORIZON_SIZES)
+#            HORIZON_SIZES = [hparams.horizon_window_size_minmax[1]]
+        
+        
         hist_horiz__all = {}
         t0 = time.clock()
         for history in HISTORY_SIZES:
-            for horizon in HORIZON_SIZES:
+            for horizon_size in HORIZON_SIZES:
                 print('HISTORY ',history, 'of ', HISTORY_SIZES)
-                print('HORIZON ',horizon, 'of ', HORIZON_SIZES)
+                print('HORIZON ',horizon_size, 'of ', HORIZON_SIZES)
+                
+                
+                
+                #For the direct decoder, need to always use the same K horizon, 
+                #but then once the predictions are made, if want to look at horizon < K, then just excerpt leftmost horizon points.
+                #Technically, for this direct decoder, only need to do single prediction (all smaller horizons contained in the max horizon)
+                #which would speed things up a lot. However, would require reorganizing code a bit, so for easiness, just use current code with duplication.
+                if hparams.MLP_DIRECT_DECODER:
+                    horizon = hparams.horizon_window_size_minmax[1]
+                    print('horizon used for direct decoder: {} instead of {}'.format(horizon,horizon_size))
+                else:
+                    horizon = horizon_size
+                
                 
                 #For the disjoint mode, the test set does not overlap art all w the train set. 
                 #The history + horizon window must completely fit in the test set alone.
@@ -308,13 +347,47 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                     offs = [i for i in range(horizon, data_timesteps+1, EVAL_STEP_SIZE)]
 
 
-                dflist = []
-                for backoffset in offs:
+
+
+
+                dflists = [] #defaultdict(lambda:[])# if hparams.DO_QUANTILES else [] #!!!!!!!
+                for gg, backoffset in enumerate(offs):
                     print('backoffset ',backoffset, 'of ', offs)
                     f_preds = do_predictions_one_setting(history,horizon,backoffset,TEST_dir,SAVE_PLOTS,N_series,chunk)
+#                    print('f_preds',f_preds)
+                    
+                    #If doing the direct decoder, even though the model is built to predict horizon K ()
+                    if hparams.MLP_DIRECT_DECODER:
+                        for kk,vv in f_preds.items():
+                            K_cols = f_preds[kk].columns[:horizon_size]
+                            f_preds[kk] = vv[K_cols]
+                    
+                    
+                    #Save out some example quantile predictions to make sure they look ok
+                    #!!!!! This is not all of the quantiles: it is only the first backoffset. But can use to plot later
+                    if gg==0:
+                        for kk,vv in f_preds.items():
+                            __out = os.path.join(OUTPUT_DIR, f"{kk}__{history}_{horizon_size}_{chunk}.csv")
+                            vv.to_csv(__out)
+                            
+                    #Just focus on point estimates for now: using q45
+                    if hparams.DO_QUANTILES:
+                        f_preds = f_preds[Q_pnt_est]
+                    else:
+                        f_preds = f_preds['point_est']   
+
+
+
+                    
+                    #COlumns are same for all quantiles/point estimates, so just use point_est to get dates:
+                    #cols = f_preds[Q_pnt_est].columns if hparams.DO_QUANTILES else f_preds['point_est'].columns
+                    #cols = f_preds.values()[0].columns
                     cols = f_preds.columns
                     dates = [i.strftime('%Y-%m-%d') for i in cols]
-                    print(dates)
+#                    print(dates)
+                    
+                    
+                    
                     
                     #For each series
                     for jj in range(len(f_preds)):
@@ -339,7 +412,7 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
     #                    mae = asdasdasd
                         bi = mean_bias(true, series.values)
     #                    print(smape,bias)
-                        hist_horiz__all[(history,horizon,backoffset,_id)] = {'SMAPE':smp, 
+                        metr_dct = {'SMAPE':smp, 
                                         'bias':bi,
                                         #'MAE':mae,
                                         'predict_start_date':dates[0],
@@ -347,9 +420,18 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                                         'history_missing_count':history_missing_count,
                                         'horizon_missing_count':horizon_missing_count
                                         }
+                        #When doing quantiles, also see what fraction is in that quantile
+#                        if hparams.DO_QUANTILES:
+#                            metr_dct[asd] = asdasd
+#                            metr_dct['sharpness'] = 0.
+#                            #MSIS ...
+                            
+                        hist_horiz__all[(history,horizon_size,backoffset,_id)] = metr_dct
     #                    print(hist_horiz__all)
                         
-                        
+                      
+    
+    
                     #For saving out predictions:
                     dates = [i.strftime('%m/%d/%Y') for i in cols]
                     d = {cols[i]:dates[i] for i in range(len(cols))}
@@ -366,7 +448,7 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                         s = f_preds[f_preds['Page']==u]
                         if len(s)>1:
                             s = s.head(1)
-                        df += [s]
+                        df.append(s)
                     f_preds = pd.concat(df,axis=0)
                     cols = list(f_preds.columns)
                     cols.remove('Page')
@@ -374,7 +456,7 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                     f_preds = f_preds[cols]                 
 #                    print(f_preds)
                     
-                    dflist += [f_preds]
+                    dflists.append(f_preds)
                     #Care about the metrics within different partitions:
                     #Beside just history and horizon size, also consider:
                     #real vs. synthetic augmented series
@@ -382,10 +464,10 @@ def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
                     #series contains holiday vs. only non-holidays
                     #day of week
                     
-                savename = f"{str(history)}_{str(horizon)}_{name}.xls"
+                savename = f"{str(history)}_{str(horizon_size)}_{name}.xls"
                 savename = os.path.join(OUTPUT_DIR,savename)
                 sheetnames = [str(i) for i in offs]
-                SaveMultisheetXLS(dflist, sheetnames, savename)
+                SaveMultisheetXLS(dflists, sheetnames, savename)
                 #each sheet is for a single backoffset, so each sheet contains all ~1800 id's
     
         
diff --git a/Readme.md b/Readme.md
index 3acc2ac..4d7e117 100755
--- a/Readme.md
+++ b/Readme.md
@@ -28,11 +28,11 @@ The complete pipeline is:
 
 1. $source activate gktf.               #previously set up a conda environment w/ Python 3.6, tensorflow 1.4.0, to match same versions as Kaggle solution
 2. $cd ..../kaggle-web-traffic
-4. $python3 PREPROCESS.py               #Maximize reuse of existing architecture: just put my data in exact same format as Kaggle competition csv's
-5. $./MAKEFEATURES_TRAIN_ALL.sh         #For backtestign in chunks method [4 partially overlapping train-test set pairs]
-6. $python3 RUN_ALL_PREDICTIONS.py      #Run predictions for every ID over triplets of (history, horizon, start point)
-7. $python3 PERFORMANCE_HEATMAPS.py     #Analyze the prediction metrics across different dimensions 
-
+3. $python PREPROCESS.py               #Maximize reuse of existing architecture: just put my data in exact same format as Kaggle competition csv's
+4. $./MAKEFEATURES_TRAIN_ALL.sh         #For backtestign in chunks method [4 partially overlapping train-test set pairs]
+5. $python RUN_ALL_PREDICTIONS.py      #Run predictions for every ID over triplets of (history, horizon, start point)
+6. $python PERFORMANCE_HEATMAPS.py     #Analyze the prediction metrics across different dimensions 
+7. $python quantile_plots.py			#FOr a subet of the predictions, get an idea what the quantiles look like
 
 
diff --git a/directdecoder_Run_All.py b/directdecoder_Run_All.py
new file mode 100644
index 0000000..e94420c
--- /dev/null
+++ b/directdecoder_Run_All.py
@@ -0,0 +1,470 @@
+import tensorflow as tf
+
+#import matplotlib
+#matplotlib.use('Agg')
+#import matplotlib.pyplot as plt
+
+import os
+import pandas as pd
+import numpy as np
+from trainer import predict
+from hparams import build_hparams
+import hparams
+
+from make_features import read_all
+
+import pickle
+import time
+from pandas import ExcelWriter
+
+from collections import defaultdict
+
+
+# =============================================================================
+# PARAMETRS
+# =============================================================================
+#For histories, we care most about shorter series, so sample lower numbers more densely
+HISTORY_SIZES=[100]#[7,8,10,12,15,20,30,50,100]#,200,360]
+HORIZON_SIZES=[60]#[7,8,10,12,15,20,30,60] #If doing the Amazon decoder, only do the single fixed length that was used for K [eg. 60 days]
+EVAL_STEP_SIZE=10#step size for evaluation. 1 means use every single day as a FCT to evaluate on. E.g. 3 means step forward 3 timesteps between each FCT to evaluate on.
+PREDICT_MODE = 'backtest'#'disjoint'
+NAMES = ['TESTset1', 'TESTset2', 'TESTset3', 'TESTset4']
+
+FEATURES_SET = 'full'# 'arturius' 'simple' 'full'
+SAMPLING_PERIOD = 'daily'
+DATA_TYPE = 'ours' #'kaggle' #'ours'
+Nmodels = 3
+PARAM_SETTING = 'encdec' #Which of the parameter settings to use [s32 is the default Kaggle one, with a few thigns modified as I want]
+PARAM_SETTING_FULL_NAME = hparams.params_encdec #Which of the parameter settings to use corresponding to the PARAM_SETTING. The mapping is defined in hparams.py at the end in "sets = {'s32':params_s32,..."
+OUTPUT_DIR = 'output'
+
+#If doing quantiels (and no SMAPE optimized point estimate), then choose one of the quantiles to use as the point estimate
+#E.g. 40 45 or 47 since positively biased [otherwise use 50]
+Q_pnt_est = 0.45
+
+SAVE_PLOTS = False
+
+
+
+
+NAMES = ['TESTset3'] #!!!!!!
+
+
+
+
+
+
+
+# =============================================================================
+# MAIN
+# =============================================================================
+
+
+# =============================================================================
+# Performance Metrics
+# =============================================================================
+def smape(true, pred):
+    summ = np.abs(true) + np.abs(pred)
+    smape = np.where(summ == 0, 0, np.abs(true - pred) / summ)
+    #return np.mean(kaggle_smape) * 200
+    return smape * 200
+
+def mean_smape(true, pred):
+    raw_smape = smape(true, pred)
+    masked_smape = np.ma.array(raw_smape, mask=np.isnan(raw_smape))
+    return masked_smape.mean()
+
+def bias(true, pred):
+    """
+    Check if the forecasts are biased up or down
+    
+    All of the predictions have already been clipped to 0 min.
+    Actual is always nonnegative (and 0 means missing so can mask)
+    So if pred+true is 0, means missing, can ignore those
+    """
+    summ = pred + true
+    bias = np.where(summ == 0, 0, (pred - true) / summ)
+    return 100. * bias
+
+def mean_bias(true, pred):
+    raw_bias = bias(true, pred)
+    masked_bias = np.ma.array(raw_bias, mask=np.isnan(raw_bias))
+    return raw_bias.mean()
+
+
+    
+def do_predictions_one_setting(history,horizon,backoffset,TEST_dir,save_plots,n_series,chunk):
+    
+    # =============================================================================
+    # 
+    # =============================================================================
+    #read_all funcion loads the (hardcoded) file "data/all.pkl", or otherwise train2.csv
+    print('loading data...')
+
+    df_all = read_all(DATA_TYPE,SAMPLING_PERIOD,f'TEST{chunk}')
+    print('df_all.columns')
+    print(df_all.columns)
+#        filename = f'train_2_{data_type}_{sampling_period}'
+#        df = read_file(filename)    
+    
+
+    batchsize = n_series #For simplicity, just do all series at once if not too many for memory
+    print('batchsize',batchsize)
+    # =============================================================================
+    # 
+    # =============================================================================
+    prev = df_all#.loc[:,:'2017-07-08']
+    paths = [p for p in tf.train.get_checkpoint_state(f'data/cpt/TRAIN{chunk}').all_model_checkpoint_paths]
+    #tf.reset_default_graph()
+    #preds = predict(paths, default_hparams(), back_offset=0,
+    #                    n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True)
+    t_preds = defaultdict(lambda:[]) if hparams.DO_QUANTILES else []
+    for tm in range(Nmodels):
+        tf.reset_default_graph()
+        _ = predict(FEATURES_SET, SAMPLING_PERIOD, paths, TEST_dir, build_hparams(PARAM_SETTING_FULL_NAME), history, horizon, back_offset=backoffset, return_x=False,
+                    n_models=Nmodels, target_model=tm, seed=2, batch_size=batchsize, asgd=True)
+        if hparams.DO_QUANTILES:
+            for kk, vv in _.items():
+                t_preds[kk].append(vv)
+        else:
+            t_preds.append(_)
+        
+    
+    # =============================================================================
+    # average the N models predictions
+    # =============================================================================
+    if hparams.DO_QUANTILES:
+        preds = {}
+        for kk, vv in t_preds.items():
+            preds[kk] = sum(vv)/float(Nmodels)
+    else:
+        preds = sum(t_preds)/float(Nmodels)
+        preds = {'point_est':preds}
+    
+    
+    
+    #Do for every quantile
+    final_preds = {}
+    for kk, vv in preds.items():
+        
+        # =============================================================================
+        # look at missing
+        # =============================================================================
+        if DATA_TYPE=='kaggle':
+            missing_pages = prev.index.difference(preds.index)
+            print('missing_pages',missing_pages)
+            # Use zeros for missing pages
+            rmdf = pd.DataFrame(index=missing_pages,
+                            data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns)
+            f_preds = preds.append(rmdf).sort_index()
+        elif DATA_TYPE=='ours':
+#            f_preds = preds
+            f_preds = vv            
+        # Use zero for negative predictions
+        f_preds[f_preds < 0.5] = 0
+        # Rouns predictions to nearest int
+        f_preds = np.round(f_preds).astype(np.int64)
+        
+        final_preds[kk] = f_preds
+
+#    print('final_preds',final_preds)
+    
+    
+    
+    
+    # =============================================================================
+    # save out all predictions all days (for our stuff will be relevant, for his Kaggle maybe just needed one day)
+    # =============================================================================
+    #firstK = 1000 #for size issues, for now while dev, just a few to look at
+    #ggg = f_preds.iloc[:firstK]
+    #ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True)
+    #Instead of saving indivual, just wait and append and look at finals.
+#    f_preds.to_csv(f'{OUTPUT_DIR}/all_predictions_ours.csv.gz', compression='gzip', index=False, header=True)
+    
+    
+    
+    
+    # =============================================================================
+    # visualize to do wuick check
+    # =============================================================================
+    if save_plots:
+        randomK = 1000
+        print('Saving figs of {} time series as checks'.format(randomK))
+        pagenames = list(f_preds.index)
+        pages = np.random.choice(pagenames, size=min(randomK,len(pagenames)), replace=False)
+        N = pages.size
+        for jj, page in enumerate(pages):
+            print(f"{jj} of {N}")
+            plt.figure()
+            if DATA_TYPE=='kaggle':
+                prev.loc[page].fillna(0).plot()#logy=True)
+                f_preds.loc[page].fillna(0).plot(logy=True)
+            elif DATA_TYPE=='ours':
+                prev.loc[int(page)].plot()
+                f_preds.loc[page].plot()
+            plt.title(page)
+            if not os.path.exists(OUTPUT_DIR):
+                os.mkdir(OUTPUT_DIR)
+            pathname = 'ddddddddddd'#os.path.join(OUTPUT_DIR, 'fig_{}.png'.format(jj))
+            plt.savefig(pathname)
+            plt.close()
+        
+        
+        
+    #Cannot view on the AWS so move to local:   
+    #zip -r output.zip output
+    #cp output.zip /home/...../sync
+    return final_preds
+    
+    
+    
+def get_data_timesteps_Nseries(df_path):
+    """
+    Get the data_timesteps value from the TEST set data.
+    Because every day will be used, it is just the number of days in the df.
+    
+    And get number of time series to prdict on [number of rows], to use as batchsize
+    """
+    df = pd.read_csv(df_path)
+    columns = list(df.columns)
+    columns.remove('Page')
+    return len(columns), len(df)
+
+
+
+def get_data_timesteps_Nseries__backtest(test_path,train_path):
+    """
+    For backtest chunk mode only.
+    Get the number of data timesteps (which potentially varies per testset1,2,3,4),
+    in order to determine backoffset range.
+    
+    Because in this mode the TEST set also includes the TRAIN ste in it, cannot 
+    just use length of TEST set alone to get datatimesteps.
+    """
+    test = pd.read_csv(test_path)
+#    test_day = test.columns[-1]
+    train = pd.read_csv(train_path)
+#    train_day = train.columns[-1]
+    
+    #Assuming consecutive days, just get diff of number columns:
+    data_timesteps = len(test.columns) - len(train.columns)
+    
+    #Depending if did holdout id's, then TEST would have extra id's not in TRAIN
+    #For batchsize, using N_series as number of rows of TEST set.
+    #Since metrics are later made from dicts, if an ID is predicted on more than once,
+    #is ok, since would have same key in dict and only be there once anyway.
+    N_series = len(test)
+    
+    return data_timesteps, N_series
+        
+    
+
+
+
+
+def SaveMultisheetXLS(list_dfs, list_sheetnames, xls_path):
+    """
+    xls_path - must be .xls or else will not save sheets properly
+    """
+    writer = ExcelWriter(xls_path)
+    for s in range(len(list_dfs)):
+        list_dfs[s].to_excel(writer,list_sheetnames[s],index=False)
+    writer.save()
+    
+    
+    
+
+
+if __name__ == '__main__':
+    
+    hparams = build_hparams(PARAM_SETTING_FULL_NAME)
+    
+    
+    #For the 4 chunk backtesting performance assessment
+    for name in NAMES:
+        
+        print('name: ',name)
+        chunk = name.replace('TEST','')
+        TEST_DF_PATH = f"data/ours_daily_{name}.csv"
+        TEST_dir = f"data/{name}"
+        TRAIN_DF_PATH = TEST_DF_PATH.replace('TEST','TRAIN')
+        print('TEST_DF_PATH',TEST_DF_PATH)
+        print('TEST_dir',TEST_dir)
+        print('TRAIN_DF_PATH',TRAIN_DF_PATH)
+    
+        groundtruth = pd.read_csv(TEST_DF_PATH)
+        groundtruth.sort_values(['Page'])    
+        print('groundtruth',groundtruth)
+    
+        if not os.path.exists(OUTPUT_DIR):
+            os.makedirs(OUTPUT_DIR)
+        
+        if PREDICT_MODE=='disjoint':
+            data_timesteps, N_series = get_data_timesteps_Nseries(TEST_DF_PATH)
+        elif PREDICT_MODE=='backtest':
+            data_timesteps, N_series = get_data_timesteps_Nseries__backtest(TEST_DF_PATH,TRAIN_DF_PATH)
+        
+
+        
+        
+        hist_horiz__all = {}
+        t0 = time.clock()
+        for history in HISTORY_SIZES:
+#            for horizon in HORIZON_SIZES:
+            #For the direct decoder, only need to do single prediction (all smaller horizons contained in the max horizon)
+            if hparams.MLP_DIRECT_DECODER:
+                #HORIZON_SIZES = max(HORIZON_SIZES)
+                horizon = hparams.horizon_window_size_minmax[1]
+            
+            print('HISTORY ',history, 'of ', HISTORY_SIZES)
+            print('HORIZON ',horizon, 'of ', HORIZON_SIZES)
+            
+            #For the disjoint mode, the test set does not overlap art all w the train set. 
+            #The history + horizon window must completely fit in the test set alone.
+            #vs.
+            #in backtest chunk mode, test set include full train set, but 
+            #horizon window always starts after the train set (so horizon 
+            #is fully inside test set). SO for backtest chunk mode, irrelevant
+            #what history + horizon is, only matters that the horizon is fully inside TEST set.
+            if (PREDICT_MODE=='disjoint') and (history+horizon >= data_timesteps):
+                print(f'history+horizon ({history+horizon}) >= data set size ({data_timesteps})')
+                continue
+            if (PREDICT_MODE=='backtest') and (horizon > data_timesteps):
+                print(f'horizon ({horizon}) > test region size ({data_timesteps})')
+                continue                
+            
+            #Get the range of values that will step through for 
+            if (PREDICT_MODE=='disjoint'):
+                offs = [i for i in range(horizon, data_timesteps - history +1, EVAL_STEP_SIZE)]
+            if (PREDICT_MODE=='backtest'):
+                offs = [i for i in range(horizon, data_timesteps+1, EVAL_STEP_SIZE)]
+
+
+
+
+            #!!!!!!! dflist as dict per quantile
+            #dflists = defaultdict(lambda:[])# if hparams.DO_QUANTILES else []
+            dflists = []
+            for gg, backoffset in enumerate(offs):
+                print('backoffset ',backoffset, 'of ', offs)
+                f_preds = do_predictions_one_setting(history,horizon,backoffset,TEST_dir,SAVE_PLOTS,N_series,chunk)
+#                    print('f_preds',f_preds)
+                
+                
+                #Save out some example quantile predictions to make sure they look ok
+                if gg==0:
+                    for kk,vv in f_preds.items():
+                        vv.to_csv(f"{kk}__{history}_{horizon}_{chunk}.csv")
+                        
+                #Just focus on point estimates for now: using q45
+                if hparams.DO_QUANTILES:
+                    f_preds = f_preds[Q_pnt_est]
+                else:
+                    f_preds = f_preds['point_est']                
+                
+                
+
+                
+#                for horizon in HORIZON_SIZES:
+
+                
+                #COlumns are same for all quantiles/point estimates, so just use point_est to get dates:
+                #cols = f_preds[Q_pnt_est].columns if hparams.DO_QUANTILES else f_preds['point_est'].columns
+                cols = f_preds.columns
+                dates = [i.strftime('%Y-%m-%d') for i in cols]
+#                    print(dates)
+                
+                
+                
+                
+                #For each series
+                for jj in range(len(f_preds)):
+                    series = f_preds.iloc[jj]
+                    _id = series.name
+                    true = groundtruth[groundtruth['Page'].astype(str) ==_id]
+                    
+                    
+                    first_pred_day = dates[0]
+                    d1 = pd.date_range(first_pred_day,first_pred_day)[0] - pd.Timedelta(history,unit='D')
+                    history_dates = pd.date_range(start=d1, end=first_pred_day, freq='D')[:-1]   #!!!!!! asuming daily sampling...
+                    history_dates = [i.strftime('%Y-%m-%d') for i in history_dates]
+                    history_missing_count = np.isnan(true[history_dates].values[0]).sum()
+#                    print('history_missing_count',history_missing_count)                    
+#                    print('true',true)
+                    true = true[dates].values[0]
+                    horizon_missing_count = np.isnan(true).sum()
+#                    print('horizon_missing_count',horizon_missing_count)
+                    
+                    #Get smape, mae, bias over this prediction
+                    smp = mean_smape(true, series.values)
+#                    mae = asdasdasd
+                    bi = mean_bias(true, series.values)
+#                    print(smape,bias)
+                    metr_dct = {'SMAPE':smp, 
+                                    'bias':bi,
+                                    #'MAE':mae,
+                                    'predict_start_date':dates[0],
+                                    'predict_end_date':dates[-1],
+                                    'history_missing_count':history_missing_count,
+                                    'horizon_missing_count':horizon_missing_count
+                                    }
+                    #When doing quantiles, also see what fraction is in that quantile
+#                    if hparams.DO_QUANTILES:
+#                        metr_dct[asd] = asdasd
+#                        metr_dct['sharpness'] = 0.
+#                        #MSIS ...
+                        
+                    hist_horiz__all[(history,horizon,backoffset,_id)] = metr_dct
+#                    print(hist_horiz__all)
+                    
+                  
+
+
+                #For saving out predictions:
+                dates = [i.strftime('%m/%d/%Y') for i in cols]
+                d = {cols[i]:dates[i] for i in range(len(cols))}
+                f_preds.rename(columns=d,inplace=True)
+                f_preds['Page'] = f_preds.index.values
+                #Depending on missing data in the test set in the history window for this backoffset,
+                #it oculd be that that particular id did not pass the train completeness threshold.
+                #Then it will not be included, but the batchsize will still be len(df), so to fill that missing
+                #id, it will repeat id's that already had predictions. THey will be identical, 
+                #so just take the 1st occurrence for those repeated id's:
+                df = []
+                u_ids = np.unique(f_preds['Page'].values)
+                for u in u_ids:
+                    s = f_preds[f_preds['Page']==u]
+                    if len(s)>1:
+                        s = s.head(1)
+                    df.append(s)
+                f_preds = pd.concat(df,axis=0)
+                cols = list(f_preds.columns)
+                cols.remove('Page')
+                cols = ['Page'] + cols
+                f_preds = f_preds[cols]                 
+#                    print(f_preds)
+                
+                dflists.append(f_preds)
+                #Care about the metrics within different partitions:
+                #Beside just history and horizon size, also consider:
+                #real vs. synthetic augmented series
+                #training ID vs. new ID only in TEST set
+                #series contains holiday vs. only non-holidays
+                #day of week
+                
+            savename = f"{str(history)}_{str(horizon)}_{name}.xls"
+            savename = os.path.join(OUTPUT_DIR,savename)
+            sheetnames = [str(i) for i in offs]
+            SaveMultisheetXLS(dflists, sheetnames, savename)
+            #each sheet is for a single backoffset, so each sheet contains all ~1800 id's
+
+        
+#        print(hist_horiz__all)
+        t1 = time.clock()
+        print('elapsed time: ',t1-t0)
+        #Now that all metrics stored in dict, save dict, and analyze further
+        #pickle ... hist_horiz__all
+    #    print(hist_horiz__all)
+        dict_savename = os.path.join(OUTPUT_DIR,f"hist_horiz__{name}.pickle")
+        with open(dict_savename, "wb") as outp:
+            pickle.dump(hist_horiz__all, outp)#, protocol=2)
\ No newline at end of file
diff --git a/how_it_works.md b/how_it_works.md
old mode 100644
new mode 100755
diff --git a/hparams.py b/hparams.py
index 5257c4d..164e8d6 100755
--- a/hparams.py
+++ b/hparams.py
@@ -105,7 +105,7 @@
     #So if doing quantiles, no longer optimizing SMAPE, but report it anyway to see. So, use the 0th element of QUANTILES list is used as the point estimate for SMAPE
     #(but SMAPE will not be used in loss function: instead will use the average quantile loss (ave over all quantiles))
     #If not using quantile regression, list is ignored
-    QUANTILES = [.45,   .05, .25, .50, .75, .95]
+    QUANTILES = [.45,   .05, .25, .40, .50, .75, .95]
     
     
     #Losses summed together using lembda weighting. 
diff --git a/model.py b/model.py
index ede40f1..fe40f8f 100644
--- a/model.py
+++ b/model.py
@@ -293,6 +293,8 @@ def decode_predictions(decoder_readout, inp: InputPipe):
     return ret
 
 
+
+#!!!!!!! would be good to run on logged (NOT the exp)
 def quantile_loss(true, predicted, weights, quantile):
     """
     When doing quantile regression, get the pinball loss on each quantile
diff --git a/quantile_plots.py b/quantile_plots.py
new file mode 100644
index 0000000..9744010
--- /dev/null
+++ b/quantile_plots.py
@@ -0,0 +1,83 @@
+#Look at some example quantile predictions.
+#Not fully integrated in to the full pipeline yet, this script just
+#looks at 1 example test set, and for one forecast creation time.
+
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import os
+
+
+# =============================================================================
+# PARAMETERS
+# =============================================================================
+
+#Which cities to look at
+IDs = [i for i in range(1200)]#[1,2,3,5,6,12,622,555]     [1,273,284, 245, 385, 458]
+
+#The original test set data that has the true values
+groundtruth_path = r".../Desktop/forecasting/exData/4splits_aug/ours_daily_augmented__TESTset1.csv"
+#corrsponding to r".../Desktop/forecasting/kaggle-web-traffic/output/quantiles/testset1"
+
+
+
+
+
+
+
+
+
+
+
+# =============================================================================
+# MAIN
+# =============================================================================
+
+#each csv of predictions is named as format:
+#"{quantile}__{history}_{horizon}_{chunk}.csv"
+
+files = os.listdir('.')
+files = [i for i in files if i.endswith('.csv')]
+files = [i for i in files if not i.startswith('0.4')]
+files = files[::-1]#reverse order for plotting so .05th on bottom, .95th on top
+
+actual_df = pd.read_csv(groundtruth_path)
+
+for id_ in IDs:
+    _ = []
+    quantiles = []
+    for i, f in enumerate(files):
+        df = pd.read_csv(f)
+#        print(df)
+        row = df.loc[df['Unnamed: 0']==id_].head(1) #in case predictions repeated
+#        print(row)
+        _.append(row.values[:,1:].flatten())
+        quantiles.append(os.path.split(f)[1][:-4])
+#    _=np.array(_)
+#    print(quantiles)
+    dates = df.columns.tolist()[1:]
+#    print(dates)
+    try:
+        fig=plt.figure()
+        ax=fig.add_subplot(111)
+        plt.title('{}'.format(id_))
+        colors = ['r','g','b','g','r']
+        lines = ['--','--','-','--','--']
+        for qq in range(len(_)):
+            plt.plot(_[qq], color=colors[qq], linestyle=lines[qq], label=quantiles[qq])
+        x = np.arange(len(_[0]))
+    #    print(_[0])
+        ax.fill_between(x,_[0],_[-1],color='r',alpha=.2)
+        ax.fill_between(x,_[1],_[-2],color='g',alpha=.2)
+    #    print(_[1])
+        groundtruth = actual_df.loc[actual_df['Page']==id_]
+        groundtruth = groundtruth[dates].T
+        plt.plot(groundtruth,color='k',label='actual')
+        K=14
+        plt.xticks(np.arange(len(dates))[::K],dates[::K])
+        plt.legend(numpoints=1)
+    #        plt.show()
+        plt.savefig('{}.png'.format(id_))
+    except:
+        continue
\ No newline at end of file
diff --git a/trainer copy.py b/trainer copy.py
new file mode 100755
index 0000000..9b93998
--- /dev/null
+++ b/trainer copy.py	
@@ -0,0 +1,827 @@
+import os.path
+import shutil
+import sys
+import numpy as np
+import tensorflow as tf
+from tqdm import trange
+from typing import List, Tuple
+import heapq
+import logging
+import pandas as pd
+from enum import Enum
+
+from hparams import build_from_set, build_hparams
+from feeder import VarFeeder
+from input_pipe import InputPipe, ModelMode, Splitter,FakeSplitter, page_features
+from model import Model
+import argparse
+
+from collections import defaultdict
+
+
+log = logging.getLogger('trainer')
+
+class Ema:
+    def __init__(self, k=0.99):
+        self.k = k
+        self.state = None
+        self.steps = 0
+
+    def __call__(self, *args, **kwargs):
+        v = args[0]
+        self.steps += 1
+        if self.state is None:
+            self.state = v
+        else:
+            eff_k = min(1 - 1 / self.steps, self.k)
+            self.state = eff_k * self.state + (1 - eff_k) * v
+        return self.state
+
+
+class Metric:
+    def __init__(self, name: str, op, smoothness: float = None):
+        self.name = name
+        self.op = op
+        self.smoother = Ema(smoothness) if smoothness else None
+        self.epoch_values = []
+        self.best_value = np.Inf
+        self.best_step = 0
+        self.last_epoch = -1
+        self.improved = False
+        self._top = []
+
+    @property
+    def avg_epoch(self):
+        return np.mean(self.epoch_values)
+
+    @property
+    def best_epoch(self):
+        return np.min(self.epoch_values)
+
+    @property
+    def last(self):
+        return self.epoch_values[-1] if self.epoch_values else np.nan
+
+    @property
+    def top(self):
+        return -np.mean(self._top)
+
+
+    def update(self, value, epoch, step):
+        if self.smoother:
+            value = self.smoother(value)
+        if epoch > self.last_epoch:
+            self.epoch_values = []
+            self.last_epoch = epoch
+        self.epoch_values.append(value)
+        if value < self.best_value:
+            self.best_value = value
+            self.best_step = step
+            self.improved = True
+        else:
+            self.improved = False
+        if len(self._top) >= 5:
+            heapq.heappushpop(self._top, -value)
+        else:
+            heapq.heappush(self._top, -value)
+
+
+class AggMetric:
+    def __init__(self, metrics: List[Metric]):
+        self.metrics = metrics
+
+    def _mean(self, fun) -> float:
+        # noinspection PyTypeChecker
+        return np.mean([fun(metric) for metric in self.metrics])
+
+    @property
+    def avg_epoch(self):
+        return self._mean(lambda m: m.avg_epoch)
+
+    @property
+    def best_epoch(self):
+        return self._mean(lambda m: m.best_epoch)
+
+    @property
+    def last(self):
+        return self._mean(lambda m: m.last)
+
+    @property
+    def top(self):
+        return self._mean(lambda m: m.top)
+
+    @property
+    def improved(self):
+        return np.any([metric.improved for metric in self.metrics])
+
+
+class DummyMetric:
+    @property
+    def avg_epoch(self):
+        return np.nan
+
+    @property
+    def best_epoch(self):
+        return np.nan
+
+    @property
+    def last(self):
+        return np.nan
+
+    @property
+    def top(self):
+        return np.nan
+
+    @property
+    def improved(self):
+        return False
+
+    @property
+    def metrics(self):
+        return []
+
+
+class Stage(Enum):
+    TRAIN = 0
+    EVAL_SIDE = 1
+    EVAL_FRWD = 2
+    EVAL_SIDE_EMA = 3
+    EVAL_FRWD_EMA = 4
+
+
+class ModelTrainerV2:
+    def __init__(self, train_model: Model, eval: List[Tuple[Stage, Model]], model_no=0,
+                 patience=None, stop_metric=None, summary_writer=None):
+        self.train_model = train_model
+        if eval:
+            self.eval_stages, self.eval_models = zip(*eval)
+        else:
+            self.eval_stages, self.eval_models = [], []
+        self.stopped = False
+        self.model_no = model_no
+        self.patience = patience
+        self.best_metric = np.inf
+        self.bad_epochs = 0
+        self.stop_metric = stop_metric
+        self.summary_writer = summary_writer
+
+        def std_metrics(model: Model, smoothness):
+            return [Metric('SMAPE', model.smape, smoothness), Metric('MAE', model.mae, smoothness), Metric('qntl', model.ave_quantile_loss, smoothness)]
+
+        self._metrics = {Stage.TRAIN: std_metrics(train_model, 0.9) + [Metric('GrNorm', train_model.glob_norm)]}
+        for stage, model in eval:
+            self._metrics[stage] = std_metrics(model, None)
+        self.dict_metrics = {key: {metric.name: metric for metric in metrics} for key, metrics in self._metrics.items()}
+
+    def init(self, sess):
+        for model in list(self.eval_models) + [self.train_model]:
+            model.inp.init_iterator(sess)
+
+    @property
+    def metrics(self):
+        return self._metrics
+
+    @property
+    def train_ops(self):
+        model = self.train_model
+#        print('model.train_op',model.train_op)
+        return [model.train_op]  # , model.summaries
+
+    def metric_ops(self, key):
+        return [metric.op for metric in self._metrics[key]]
+
+    def process_metrics(self, key, run_results, epoch, step):
+        metrics = self._metrics[key]
+        summaries = []
+        for result, metric in zip(run_results, metrics):
+            metric.update(result, epoch, step)
+            summaries.append(tf.Summary.Value(tag=f"{key.name}/{metric.name}_0", simple_value=result))
+        return summaries
+
+    def end_epoch(self):
+        if self.stop_metric:
+            best_metric = self.stop_metric(self.dict_metrics)# self.dict_metrics[Stage.EVAL_FRWD]['SMAPE'].avg_epoch
+            if self.best_metric > best_metric:
+                self.best_metric = best_metric
+                self.bad_epochs = 0
+            else:
+                self.bad_epochs += 1
+                if self.bad_epochs > self.patience:
+                    self.stopped = True
+
+
+class MultiModelTrainer:
+    def __init__(self, trainers: List[ModelTrainerV2], inc_step_op,
+                 misc_global_ops=None):
+        self.trainers = trainers
+        self.inc_step = inc_step_op
+        self.global_ops = misc_global_ops or []
+        self.eval_stages = trainers[0].eval_stages
+
+    def active(self):
+        return [trainer for trainer in self.trainers if not trainer.stopped]
+
+    def _metric_step(self, stage, initial_ops, sess: tf.Session, epoch: int, step=None, repeats=1, summary_every=1):
+        ops = initial_ops
+        offsets, lengths = [], []
+        trainers = self.active()
+        for trainer in trainers:
+            offsets.append(len(ops))
+            metric_ops = trainer.metric_ops(stage)
+            lengths.append(len(metric_ops))
+            ops.extend(metric_ops)
+        if repeats > 1:
+            all_results = np.stack([np.array(sess.run(ops)) for _ in range(repeats)])
+            results = np.mean(all_results, axis=0)
+        else:
+            results = sess.run(ops)
+        if step is None:
+            step = results[0]
+
+        for trainer, offset, length in zip(trainers, offsets, lengths):
+            chunk = results[offset: offset + length]
+            summaries = trainer.process_metrics(stage, chunk, epoch, step)
+            if trainer.summary_writer and step > 200 and (step % summary_every == 0):
+                summary = tf.Summary(value=summaries)
+                trainer.summary_writer.add_summary(summary, global_step=step)
+        return results
+
+    def train_step(self, sess: tf.Session, epoch: int):
+        ops = [self.inc_step] + self.global_ops
+        for trainer in self.active():
+            ops.extend(trainer.train_ops)
+#        print('ops', ops)
+        results = self._metric_step(Stage.TRAIN, ops, sess, epoch, summary_every=20)
+#        print('results: ', results)
+        #return results[:len(self.global_ops) + 1] # step, grad_norm
+        return results[0]
+
+    def eval_step(self, sess: tf.Session, epoch: int, step, n_batches, stages:List[Stage]=None):
+        target_stages = stages if stages is not None else self.eval_stages
+        for stage in target_stages:
+            self._metric_step(stage, [], sess, epoch, step, repeats=n_batches)
+
+    def metric(self, stage, name):
+        return AggMetric([trainer.dict_metrics[stage][name] for trainer in self.trainers])
+
+    def end_epoch(self):
+        for trainer in self.active():
+            trainer.end_epoch()
+
+    def has_active(self):
+        return len(self.active())
+
+
+
+
+def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01,
+          seed=None, logdir='data/logs', max_epoch=100, patience=2, train_sampling=1.0,
+          eval_sampling=1.0, eval_memsize=5, gpu=0, gpu_allow_growth=False, save_best_model=False,
+          forward_split=False, write_summaries=False, verbose=False, asgd_decay=None, tqdm=True,
+          side_split=True, max_steps=None, save_from_step=None, do_eval=True, save_epochs_performance=False):#, horizon_window_size=63, history_window_size=283):
+
+    eval_k = int(round(26214 * eval_memsize / n_models))
+    eval_batch_size = int(
+        eval_k / (hparams.rnn_depth * hparams.encoder_rnn_layers))  # 128 -> 1024, 256->512, 512->256
+    eval_pct = 0.1
+    batch_size = hparams.batch_size
+#    history_window_size = hparams.history_window_size
+    tf.reset_default_graph()
+    if seed:
+        tf.set_random_seed(seed)
+
+    with tf.device("/cpu:0"):
+        inp = VarFeeder.read_vars(f"data/{name}")
+        if side_split:
+            splitter = Splitter(page_features(inp, features_set), inp.page_map, 3, train_sampling=train_sampling,
+                                test_sampling=eval_sampling, seed=seed)
+        else:
+            splitter = FakeSplitter(page_features(inp, features_set), 3, seed=seed, test_sampling=eval_sampling)
+
+    real_train_pages = splitter.splits[0].train_size
+    real_eval_pages = splitter.splits[0].test_size
+    
+    items_per_eval = real_eval_pages * eval_pct
+    eval_batches = int(np.ceil(items_per_eval / eval_batch_size))
+    steps_per_epoch = real_train_pages // batch_size
+    eval_every_step = int(round(steps_per_epoch * eval_pct))
+    # eval_every_step = int(round(items_per_eval * train_sampling / batch_size))
+
+    global_step = tf.train.get_or_create_global_step()
+    inc_step = tf.assign_add(global_step, 1)
+
+    all_models: List[ModelTrainerV2] = []
+
+    print('eval_pct', eval_pct)
+    print('eval_k', eval_k)
+    print('eval_batch_size', eval_batch_size)
+    print('real_train_pages', real_train_pages)
+    print('real_eval_pages', real_eval_pages)
+    print('batch_size', batch_size)
+    print('items_per_eval', items_per_eval)
+    print('eval_batches', eval_batches)
+    print('steps_per_epoch', steps_per_epoch)
+    print('eval_every_step', eval_every_step)
+
+
+    def random_draw_history_and_horizon_window_sizes(trainer,sess):
+        """
+        Want to not only have random start end, but also variable size chunks for 
+        history and horizon sizes in TRAINING phase.
+        (in prediction phase, use fixed sizes, and then for different sizes see how performance is.)
+        """
+#        metrics = []
+        history = np.random.randint(low=hparams.history_window_size_minmax[0],high=hparams.history_window_size_minmax[1]+1)
+        horizon = np.random.randint(low=hparams.horizon_window_size_minmax[0],high=hparams.horizon_window_size_minmax[1]+1)        
+#        print('random draw: history, horizon', history, horizon)
+#        attn_window = history - horizon + 1
+#        max_train_empty = min(history-1, int(np.floor(history * (1 - TT.train_model.inp.train_completeness_threshold))))
+#        max_predict_empty = int(np.floor(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))   
+        for TT in trainer.trainers:
+            TT.train_model.inp.history_window_size = history
+            TT.train_model.inp.horizon_window_size = horizon
+            TT.train_model.inp.attn_window = history - horizon + 1
+            TT.train_model.inp.max_train_empty = min(history-1, int(np.floor(history * (1 - TT.train_model.inp.train_completeness_threshold))))
+            TT.train_model.inp.max_predict_empty = int(np.floor(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))   
+#            TT.train_model.inp = InputPipeline
+#            TT.train_model.init(sess)
+            TT.train_model.inp.inp.restore(sess)
+            TT.train_model.inp.init_iterator(sess)
+            
+            
+            
+            #model.pipe = InputPipeline(...) #!!!!can just reinit new pipe each time?        
+#                #In InputPipe __init__:
+#                def init_iterator(self, session):
+#                    session.run(self.iterator.initializer) 
+            
+            
+            
+            
+#            metrics.append(TT.dict_metrics)
+#        MOD_=0
+#        STAGE_=1#index
+#        __ = list(metrics[MOD_].values())[STAGE_]['SMAPE']
+#        print(__.name)
+#        print(__.op)
+#        print(__.smoother)
+#        print(__.epoch_values)
+#        print(__.best_value)
+#        print(__.best_step)
+#        print(__.last_epoch)
+#        print(__.improved)
+#        print(__._top)
+        return trainer
+
+
+
+    def create_model(features_set, sampling_period, scope, index, prefix, seed):
+
+        #Just dummy filler, not important what value [since in training we will randomly vary these]
+        HISTORY_DUMMY = 100
+        HORIZON_DUMMY = 60
+
+        with tf.variable_scope('input') as inp_scope:
+            with tf.device("/cpu:0"):
+                split = splitter.splits[index]
+                pipe = InputPipe(features_set, sampling_period, inp, features=split.train_set, N_time_series=split.train_size,
+                                 mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose,
+                                 train_completeness_threshold=train_completeness_threshold,
+                                 predict_completeness_threshold=train_completeness_threshold, history_window_size=HISTORY_DUMMY,
+                                 horizon_window_size=HORIZON_DUMMY,
+                                 rand_seed=seed, train_skip_first=hparams.train_skip_first,
+                                 back_offset=HORIZON_DUMMY if forward_split else 0)
+                inp_scope.reuse_variables()
+                TCT = 0.01
+                if side_split:
+                    side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
+                                               mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
+                                               verbose=verbose, horizon_window_size=HORIZON_DUMMY,
+                                               train_completeness_threshold=TCT, predict_completeness_threshold=0,
+                                               history_window_size=HISTORY_DUMMY, rand_seed=seed, runs_in_burst=eval_batches,
+                                               back_offset=HORIZON_DUMMY * (2 if forward_split else 1))
+                else:
+                    side_eval_pipe = None
+                if forward_split:
+                    forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,
+                                                  mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None,
+                                                  verbose=verbose, horizon_window_size=HORIZON_DUMMY,
+                                                  train_completeness_threshold=TCT, predict_completeness_threshold=0,
+                                                  history_window_size=HISTORY_DUMMY, rand_seed=seed, runs_in_burst=eval_batches,
+                                                  back_offset=HORIZON_DUMMY)
+                else:
+                    forward_eval_pipe = None
+        avg_sgd = asgd_decay is not None
+        #asgd_decay = 0.99 if avg_sgd else None
+        train_model = Model(pipe, hparams, is_train=True, graph_prefix=prefix, asgd_decay=asgd_decay, seed=seed)
+        scope.reuse_variables()
+
+        eval_stages = []
+        if side_split:
+            side_eval_model = Model(side_eval_pipe, hparams, is_train=False,
+                                    #loss_mask=np.concatenate([np.zeros(50, dtype=np.float32), np.ones(10, dtype=np.float32)]),
+                                    seed=seed)
+            eval_stages.append((Stage.EVAL_SIDE, side_eval_model))
+            if avg_sgd:
+                eval_stages.append((Stage.EVAL_SIDE_EMA, side_eval_model))
+        if forward_split:
+            forward_eval_model = Model(forward_eval_pipe, hparams, is_train=False, seed=seed)
+            eval_stages.append((Stage.EVAL_FRWD, forward_eval_model))
+            if avg_sgd:
+                eval_stages.append((Stage.EVAL_FRWD_EMA, forward_eval_model))
+
+        if write_summaries:
+            summ_path = f"{logdir}/{name}_{index}"
+            if os.path.exists(summ_path):
+                shutil.rmtree(summ_path)
+            summ_writer = tf.summary.FileWriter(summ_path)  # , graph=tf.get_default_graph()
+        else:
+            summ_writer = None
+        if do_eval and forward_split:
+            stop_metric = lambda metrics: metrics[Stage.EVAL_FRWD]['SMAPE'].avg_epoch
+        else:
+            stop_metric = None
+        return ModelTrainerV2(train_model, eval_stages, index, patience=patience,
+                              stop_metric=stop_metric,
+                              summary_writer=summ_writer)
+
+
+
+
+    if n_models == 1:
+        with tf.device(f"/gpu:{gpu}"):
+            scope = tf.get_variable_scope()
+            all_models = [create_model(features_set, sampling_period, scope, 0, None, seed=seed)]
+    else:
+        for i in range(n_models):
+            device = f"/gpu:{i}" if multi_gpu else f"/gpu:{gpu}"
+            with tf.device(device):
+                prefix = f"m_{i}"
+                with tf.variable_scope(prefix) as scope:
+                    all_models.append(create_model(features_set, sampling_period, scope, i, prefix=prefix, seed=seed + i))
+    trainer = MultiModelTrainer(all_models, inc_step)
+    if save_best_model or save_from_step:
+        saver_path = f'data/cpt/{name}'
+        if os.path.exists(saver_path):
+            shutil.rmtree(saver_path)
+        os.makedirs(saver_path)
+        saver = tf.train.Saver(max_to_keep=10, name='train_saver')
+    else:
+        saver = None
+    avg_sgd = asgd_decay is not None
+    if avg_sgd:
+        from itertools import chain
+        def ema_vars(model):
+            ema = model.train_model.ema
+            return {ema.average_name(v):v for v in model.train_model.ema._averages}
+
+        ema_names = dict(chain(*[ema_vars(model).items() for model in all_models]))
+        #ema_names = all_models[0].train_model.ema.variables_to_restore()
+        ema_loader = tf.train.Saver(var_list=ema_names,  max_to_keep=1, name='ema_loader')
+        ema_saver = tf.train.Saver(max_to_keep=1, name='ema_saver')
+    else:
+        ema_loader = None
+
+    init = tf.global_variables_initializer()
+
+    if forward_split and do_eval:
+        eval_smape = trainer.metric(Stage.EVAL_FRWD, 'SMAPE')
+        eval_mae = trainer.metric(Stage.EVAL_FRWD, 'MAE')
+        eval_qntl = trainer.metric(Stage.EVAL_FRWD, 'qntl')
+    else:
+        eval_smape = DummyMetric()
+        eval_mae = DummyMetric()
+        eval_qntl = DummyMetric()
+
+    if side_split and do_eval:
+        eval_mae_side = trainer.metric(Stage.EVAL_SIDE, 'MAE')
+        eval_smape_side = trainer.metric(Stage.EVAL_SIDE, 'SMAPE')
+        eval_qntl_side = trainer.metric(Stage.EVAL_SIDE, 'qntl')
+    else:
+        eval_mae_side = DummyMetric()
+        eval_smape_side = DummyMetric()
+        eval_qntl_side = DummyMetric()
+
+    train_smape = trainer.metric(Stage.TRAIN, 'SMAPE')
+    train_mae = trainer.metric(Stage.TRAIN, 'MAE')
+    train_qntl = trainer.metric(Stage.TRAIN, 'qntl')
+    grad_norm = trainer.metric(Stage.TRAIN, 'GrNorm')
+    eval_stages = []
+    ema_eval_stages = []
+    if forward_split and do_eval:
+        eval_stages.append(Stage.EVAL_FRWD)
+        ema_eval_stages.append(Stage.EVAL_FRWD_EMA)
+    if side_split and do_eval:
+        eval_stages.append(Stage.EVAL_SIDE)
+        ema_eval_stages.append(Stage.EVAL_SIDE_EMA)
+
+    # gpu_options=tf.GPUOptions(allow_growth=False),
+    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
+                                          gpu_options=tf.GPUOptions(allow_growth=gpu_allow_growth))) as sess:
+        sess.run(init)
+        # pipe.load_vars(sess)
+        inp.restore(sess)
+        for model in all_models:
+            model.init(sess)#is just doing:
+#            class ModelTrainerV2:
+#                def init(self, sess):
+#                    for model in list(self.eval_models) + [self.train_model]:
+#                        model.inp.init_iterator(sess)            
+            
+        # if beholder:
+        #    visualizer = Beholder(session=sess, logdir=summ_path)
+        step = 0
+        prev_top = np.inf
+        best_smape = np.inf
+        # Contains best value (first item) and subsequent values
+        best_epoch_smape = []
+
+        #Save out per epoch values to look at later [only per epoch, not savingout per step]
+        if save_epochs_performance:
+            output_list = []
+                    
+        for epoch in range(max_epoch):
+
+            # n_steps = pusher.N_time_series // batch_size
+            if tqdm:
+                tqr = trange(steps_per_epoch, desc="%2d" % (epoch + 1), leave=False)
+            else:
+                tqr = range(steps_per_epoch)
+
+            for _ in tqr:
+                #!!!!!!!!!! Variable random length train predict windows
+                #Random draw the train, predict window lengths
+                print(_)
+                trainer = random_draw_history_and_horizon_window_sizes(trainer,sess)
+#                print('+++++++++++++++', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers])
+#                print('--------', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers])
+#                print('::::::::::::::', [(TT.train_model.inp.iterator.get_next()) for TT in trainer.trainers])
+#                print('::::::::::::::', [(TT.train_model.inp.time_x,TT.train_model.inp.time_y) for TT in trainer.trainers])
+                #model.init(sess) ????
+                #model.pipe = InputPipeline(...) #!!!!can just reinit new pipe each time?        
+#                #In InputPipe __init__:
+#                def init_iterator(self, session):
+#                    session.run(self.iterator.initializer)                
+                try:
+                    step = trainer.train_step(sess, epoch)
+#                    print('+-+-+-+-+-+-+-', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers])
+#                    print('0000000000000', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers])                    
+                except tf.errors.OutOfRangeError:
+                    break
+                    # if beholder:
+                    #  if step % 5 == 0:
+                    # noinspection PyUnboundLocalVariable
+                    #  visualizer.update()
+                if step % eval_every_step == 0:
+                    if eval_stages:
+                        trainer.eval_step(sess, epoch, step, eval_batches, stages=eval_stages)
+
+                    if save_best_model and epoch > 0 and eval_smape.last < best_smape:
+                        best_smape = eval_smape.last
+                        saver.save(sess, f'data/cpt/{name}/cpt', global_step=step)
+                    if save_from_step and step >= save_from_step:
+                        saver.save(sess, f'data/cpt/{name}/cpt', global_step=step)
+
+                    if avg_sgd and ema_eval_stages:
+                        ema_saver.save(sess, 'data/cpt_tmp/ema',  write_meta_graph=False)
+                        # restore ema-backed vars
+                        ema_loader.restore(sess, 'data/cpt_tmp/ema')
+
+                        trainer.eval_step(sess, epoch, step, eval_batches, stages=ema_eval_stages)
+                        # restore normal vars
+                        ema_saver.restore(sess, 'data/cpt_tmp/ema')
+
+                MAE = "%.3f/%.3f/%.3f" % (eval_mae.last, eval_mae_side.last, train_mae.last)
+                improvement = '↑' if eval_smape.improved else ' '
+                SMAPE = "%s%.3f/%.3f/%.3f" % (improvement, eval_smape.last, eval_smape_side.last,  train_smape.last)
+                qntl = "%.3f/%.3f/%.3f" % (eval_qntl.last, eval_qntl_side.last, train_qntl.last)
+                if tqdm:
+                    tqr.set_postfix(gr=grad_norm.last, MAE=MAE, SMAPE=SMAPE, qntl=qntl)
+                if not trainer.has_active() or (max_steps and step > max_steps):
+                    break
+
+            if tqdm:
+                tqr.close()
+            trainer.end_epoch()
+            if not best_epoch_smape or eval_smape.avg_epoch < best_epoch_smape[0]:
+                best_epoch_smape = [eval_smape.avg_epoch]
+            else:
+                best_epoch_smape.append(eval_smape.avg_epoch)
+
+            current_top = eval_smape.top
+            if prev_top > current_top:
+                prev_top = current_top
+                has_best_indicator = '↑'
+            else:
+                has_best_indicator = ' '
+            status = "%2d: Best top SMAPE=%.3f%s (%s)" % (
+                epoch + 1, current_top, has_best_indicator,
+                ",".join(["%.3f" % m.top for m in eval_smape.metrics]))
+
+            if trainer.has_active():
+#                status += ", frwd/side best MAE=%.3f/%.3f, SMAPE=%.3f/%.3f; avg MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, %d active models" % \
+#                          (eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch,
+#                           eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch,
+#                           trainer.has_active())
+                status += ", frwd/side best MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, qntl=%.3f/%.3f; avg MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, , qntl=%.3f/%.3f; %d active models" % \
+                          (eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch, eval_qntl.best_epoch, eval_qntl_side.best_epoch,
+                           eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch, eval_qntl.avg_epoch, eval_qntl_side.avg_epoch,
+                           trainer.has_active())                
+                print(status, file=sys.stderr)
+                if save_epochs_performance:
+                    output_list.append([eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch, eval_qntl.best_epoch, eval_qntl_side.best_epoch,
+                               eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch, eval_qntl.avg_epoch, eval_qntl_side.avg_epoch,
+                               trainer.has_active()])
+            else:
+                print(status, file=sys.stderr)
+                print("Early stopping!", file=sys.stderr)
+                break
+            if max_steps and step > max_steps:
+                print("Max steps calculated", file=sys.stderr)
+                break
+            sys.stderr.flush()
+
+        if save_epochs_performance:
+            x = np.array(output_list)
+            outname = f"{logdir}/{name}_training_epochs_performance.npy"
+            np.save(outname,x)
+            
+        # noinspection PyUnboundLocalVariable
+        return np.mean(best_epoch_smape, dtype=np.float64)
+
+
+def predict(features_set, sampling_period, checkpoints, TEST_dir, hparams, history_window_size, horizon_window_size, return_x=False, verbose=False, back_offset=0, n_models=1,
+            target_model=0, asgd=False, seed=1, batch_size=1024): #For predict: allow horizon_window_size to be fixed
+    with tf.variable_scope('input') as inp_scope:
+        with tf.device("/cpu:0"):
+#            inp = VarFeeder.read_vars("data/vars")
+            inp = VarFeeder.read_vars(TEST_dir)
+#            tf.Print(inp,['inp.counts',inp.counts])
+            print(inp)
+#            try:
+#            
+#            except:
+#                pass
+            pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size,
+                             train_completeness_threshold=0.01,
+                             horizon_window_size=horizon_window_size,
+                             predict_completeness_threshold=0.0, history_window_size=history_window_size,
+                             back_offset=back_offset)
+            
+            print('pipe.time_x',pipe.time_x)
+            _ = tf.where(tf.is_nan(pipe.time_x))
+            #pipe.time_x = tf.Print(pipe.time_x, ['where NANs in inp.time_x :', tf.shape(_), _, _[0], _[1], _[2], _[-3], _[-2], _[-1]])
+#            pipe.time_x = tf.Print(pipe.time_x, ['where NANs in inp.time_x :', tf.shape(_), _])
+            pipe.time_x = tf.check_numerics(pipe.time_x,'pipe.time_x has NANs')            
+            
+    asgd_decay = 0.99 if asgd else None
+    if n_models == 1:
+        model = Model(pipe, hparams, is_train=False, seed=seed, asgd_decay=asgd_decay)
+    else:
+        models = []
+        for i in range(n_models):
+            prefix = f"m_{i}"
+            with tf.variable_scope(prefix) as scope:
+                models.append(Model(pipe, hparams, is_train=False, seed=seed, asgd_decay=asgd_decay, graph_prefix=prefix))
+        model = models[target_model]
+
+    if asgd:
+        var_list = model.ema.variables_to_restore()
+        prefix = f"m_{target_model}"
+        for var in list(var_list.keys()):
+            if var.endswith('ExponentialMovingAverage') and not var.startswith(prefix):
+                del var_list[var]
+    else:
+        var_list = None
+    saver = tf.train.Saver(name='eval_saver', var_list=var_list)
+    x_buffer = []
+    predictions = None
+    pred_df_quantiles_dict = defaultdict(lambda:[])
+    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
+        pipe.load_vars(sess)
+        for checkpoint in checkpoints:
+            print('checkpoint',checkpoint)
+            pred_buffer = []
+            pipe.init_iterator(sess)
+            saver.restore(sess, checkpoint)
+            
+#            if return_x:
+#                pred, x, pname = sess.run([model.predictions, model.inp.true_x, model.inp.page_ix])
+##                print('pred',pred)
+##                print('x',x)
+##                print('pname',pname)
+#            else:
+#                pred, pname = sess.run([model.predictions, model.inp.page_ix])
+            
+            pred, pname = sess.run([model.predictions, model.inp.page_ix])            
+
+            #Our data already has page names (id's) as ints, so this decoding won't work, so just do str(id)
+            try:
+                utf_names = [str(name, 'utf-8') for name in pname]
+            except UnicodeDecodeError:
+                utf_names = [str(name) for name in pname]
+            
+            if hparams.DO_QUANTILES:
+                Nquantiles = pred.shape[0]
+                assert Nquantiles==len(hparams.QUANTILES), f'QUANTILES is {QUANTILES} but pred.shape is {pred.shape}'
+                for nn, qq in enumerate(hparams.QUANTILES):
+                    pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred[nn]))
+                    pred_df_quantiles_dict[qq].append(pred_df)
+#                    print('pred_df',pred_df)
+#                    print('pred',pred_df.shape)
+#                print('pred_df_quantiles_dict',pred_df_quantiles_dict)
+            else:
+                pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred[0]))
+                pred_buffer.append(pred_df)
+                
+                
+#            if return_x:
+#                # noinspection PyUnboundLocalVariable
+#                x_values = pd.DataFrame(index=utf_names, data=np.round(np.expm1(x)).astype(np.int64))
+#                x_buffer.append(x_values)
+            
+            
+                cp_predictions = pd.concat(pred_buffer)
+                if predictions is None:
+                    predictions = cp_predictions
+                else:
+                    predictions += cp_predictions
+           
+            
+
+    #!!!!!!!!!!!! need to change these lines when sampling WEEKLY MONTHLY
+    start_prediction = inp.data_end + pd.Timedelta('1D') - pd.Timedelta(back_offset, 'D')
+    end_prediction = start_prediction + pd.Timedelta(horizon_window_size - 1, 'D')
+                
+    if hparams.DO_QUANTILES:
+        predictions = {}
+        Ncpt = float(len(checkpoints))
+        for kk, vv in pred_df_quantiles_dict.items():
+            _ = sum(vv)/Ncpt
+            _.columns = pd.date_range(start_prediction, end_prediction)
+            predictions[kk] = _
+            
+    else:
+        predictions /= len(checkpoints) #Since it is averaging predictions over the chckpoints
+        predictions.columns = pd.date_range(start_prediction, end_prediction)
+        predictions['point_est'] = predictions
+#    if return_x:
+#        x = pd.concat(x_buffer)
+#        start_data = inp.data_end - pd.Timedelta(history_window_size - 1, 'D') - pd.Timedelta(back_offset, 'D')
+#        end_data = inp.data_end - pd.Timedelta(back_offset, 'D')
+#        x.columns = pd.date_range(start_data, end_data)
+#        return predictions, x
+#    else:
+#        return predictions
+        
+        
+#    print('predictions',predictions)
+    return predictions
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Train the model')
+    parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full','full_w_context'}")
+    parser.add_argument('sampling_period', help="{'daily','weekly','monthly'}")
+    parser.add_argument('--name', default='s32', help='Model name to identify different logs/checkpoints')
+    parser.add_argument('--hparam_set', default='s32', help="Hyperparameters set to use (see hparams.py for available sets)")
+    parser.add_argument('--n_models', default=1, type=int, help="Jointly train n models with different seeds")
+    parser.add_argument('--multi_gpu', default=False,  action='store_true', help="Use multiple GPUs for multi-model training, one GPU per model")
+    parser.add_argument('--seed', default=5, type=int, help="Random seed")
+    parser.add_argument('--logdir', default='data/logs', help="Directory for summary logs")
+    parser.add_argument('--max_epoch', type=int, default=100, help="Max number of epochs")
+    parser.add_argument('--patience', type=int, default=2, help="Early stopping: stop after N epochs without improvement. Requires do_eval=True")
+    parser.add_argument('--train_sampling', type=float, default=1.0, help="Sample this percent of data for training")
+    parser.add_argument('--eval_sampling', type=float, default=1.0, help="Sample this percent of data for evaluation")
+    parser.add_argument('--eval_memsize', type=int, default=5, help="Approximate amount of avalable memory on GPU, used for calculation of optimal evaluation batch size")
+    parser.add_argument('--gpu', default=0, type=int, help='GPU instance to use')
+    parser.add_argument('--gpu_allow_growth', default=False,  action='store_true', help='Allow to gradually increase GPU memory usage instead of grabbing all available memory at start')
+    parser.add_argument('--save_best_model', default=False,  action='store_true', help='Save best model during training. Requires do_eval=True')
+    parser.add_argument('--no_forward_split', default=True, dest='forward_split',  action='store_false', help='Use walk-forward split for model evaluation. Requires do_eval=True')
+    parser.add_argument('--side_split', default=False, action='store_true', help='Use side split for model evaluation. Requires do_eval=True')
+    parser.add_argument('--no_eval', default=True, dest='do_eval', action='store_false', help="Don't evaluate model quality during training")
+    parser.add_argument('--no_summaries', default=True, dest='write_summaries', action='store_false', help="Don't Write Tensorflow summaries")
+    parser.add_argument('--verbose', default=False, action='store_true', help='Print additional information during graph construction')
+    parser.add_argument('--asgd_decay', type=float,  help="EMA decay for averaged SGD. Not use ASGD if not set")
+    parser.add_argument('--no_tqdm', default=True, dest='tqdm', action='store_false', help="Don't use tqdm for status display during training")
+    parser.add_argument('--max_steps', type=int, help="Stop training after max steps")
+    parser.add_argument('--save_from_step', type=int, help="Save model on each evaluation (10 evals per epoch), starting from this step")
+#    parser.add_argument('--horizon_window_size', default=63, type=int, help="Number of days to predict")
+#    parser.add_argument('--history_window_size', default=283, type=int, help="Train window chunk size")#Now that we want to do train size - val size performance heatmaps
+    parser.add_argument('--save_epochs_performance', default=False, dest='save_epochs_performance', action='store_true', help='Save out per EPOCH metrics (NOT per step, only per EPOCH')
+    args = parser.parse_args()
+
+    param_dict = dict(vars(args))
+    param_dict['hparams'] = build_from_set(args.hparam_set)
+    del param_dict['hparam_set']
+    train(**param_dict)
+
+    # hparams = build_hparams()
+    # result = train("definc_attn", hparams, n_models=1, train_sampling=1.0, eval_sampling=1.0, patience=5, multi_gpu=True,
+    #                save_best_model=False, gpu=0, eval_memsize=15, seed=5, verbose=True, forward_split=False,
+    #                write_summaries=True, side_split=True, do_eval=False, horizon_window_size=63, asgd_decay=None, max_steps=11500,
+    #                save_from_step=10500)
+
+    # print("Training result:", result)
+    # preds = PREDICT('data/cpt/fair_365-15428', 380, hparams, verbose=True, back_offset=60, n_models=3)
+    # print(preds)
diff --git a/trainer.py b/trainer.py
index 738ac57..c5bb872 100755
--- a/trainer.py
+++ b/trainer.py
@@ -16,6 +16,8 @@
 from model import Model
 import argparse
 
+from collections import defaultdict
+
 
 log = logging.getLogger('trainer')
 
@@ -322,7 +324,7 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model
     print('eval_every_step', eval_every_step)
 
 
-    def random_draw_history_and_horizon_window_sizes(trainer,sess):
+    def random_draw_history_and_horizon_window_sizes(trainer,sess,direct):
         """
         Want to not only have random start end, but also variable size chunks for 
         history and horizon sizes in TRAINING phase.
@@ -337,7 +339,13 @@ def random_draw_history_and_horizon_window_sizes(trainer,sess):
 #        max_predict_empty = int(np.floor(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))   
         for TT in trainer.trainers:
             TT.train_model.inp.history_window_size = history
-            TT.train_model.inp.horizon_window_size = horizon
+            
+            #Randomly draw the horizon size. But only do this if not using the direct decoder.
+            #The direct decoder uses a fixed max horizon (e.g. 60 days), and whenever you only care to forecast fewer days, then just
+            #take the first N days of the full 60 day forecast. So the horizon stays fixed for this method.
+            if not direct:
+                TT.train_model.inp.horizon_window_size = horizon
+                
             TT.train_model.inp.attn_window = history - horizon + 1
             TT.train_model.inp.max_train_empty = min(history-1, int(np.floor(history * (1 - TT.train_model.inp.train_completeness_threshold))))
             TT.train_model.inp.max_predict_empty = int(np.floor(horizon * (1 - TT.train_model.inp.predict_completeness_threshold)))   
@@ -346,16 +354,11 @@ def random_draw_history_and_horizon_window_sizes(trainer,sess):
             TT.train_model.inp.inp.restore(sess)
             TT.train_model.inp.init_iterator(sess)
             
-            
-            
             #model.pipe = InputPipeline(...) #!!!!can just reinit new pipe each time?        
 #                #In InputPipe __init__:
 #                def init_iterator(self, session):
 #                    session.run(self.iterator.initializer) 
             
-            
-            
-            
 #            metrics.append(TT.dict_metrics)
 #        MOD_=0
 #        STAGE_=1#index
@@ -377,7 +380,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed):
 
         #Just dummy filler, not important what value [since in training we will randomly vary these]
         HISTORY_DUMMY = 100
-        HORIZON_DUMMY = 20
+        HORIZON_DUMMY = 60
 
         with tf.variable_scope('input') as inp_scope:
             with tf.device("/cpu:0"):
@@ -550,7 +553,7 @@ def ema_vars(model):
                 #!!!!!!!!!! Variable random length train predict windows
                 #Random draw the train, predict window lengths
                 print(_)
-                trainer = random_draw_history_and_horizon_window_sizes(trainer,sess)
+                trainer = random_draw_history_and_horizon_window_sizes(trainer,sess,hparams.MLP_DIRECT_DECODER)
 #                print('+++++++++++++++', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers])
 #                print('--------', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers])
 #                print('::::::::::::::', [(TT.train_model.inp.iterator.get_next()) for TT in trainer.trainers])
@@ -694,6 +697,7 @@ def predict(features_set, sampling_period, checkpoints, TEST_dir, hparams, histo
     saver = tf.train.Saver(name='eval_saver', var_list=var_list)
     x_buffer = []
     predictions = None
+    pred_df_quantiles_dict = defaultdict(lambda:[])
     with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
         pipe.load_vars(sess)
         for checkpoint in checkpoints:
@@ -702,13 +706,15 @@ def predict(features_set, sampling_period, checkpoints, TEST_dir, hparams, histo
             pipe.init_iterator(sess)
             saver.restore(sess, checkpoint)
             
-            if return_x:
-                pred, x, pname = sess.run([model.predictions, model.inp.true_x, model.inp.page_ix])
-#                print('pred',pred)
-#                print('x',x)
-#                print('pname',pname)
-            else:
-                pred, pname = sess.run([model.predictions, model.inp.page_ix])
+#            if return_x:
+#                pred, x, pname = sess.run([model.predictions, model.inp.true_x, model.inp.page_ix])
+##                print('pred',pred)
+##                print('x',x)
+##                print('pname',pname)
+#            else:
+#                pred, pname = sess.run([model.predictions, model.inp.page_ix])
+            
+            pred, pname = sess.run([model.predictions, model.inp.page_ix])            
 
             #Our data already has page names (id's) as ints, so this decoding won't work, so just do str(id)
             try:
@@ -717,48 +723,61 @@ def predict(features_set, sampling_period, checkpoints, TEST_dir, hparams, histo
                 utf_names = [str(name) for name in pname]
             
             if hparams.DO_QUANTILES:
-                print('pred',pred.shape)
                 Nquantiles = pred.shape[0]
                 assert Nquantiles==len(hparams.QUANTILES), f'QUANTILES is {QUANTILES} but pred.shape is {pred.shape}'
                 for nn, qq in enumerate(hparams.QUANTILES):
-                    #p = preds[nn]
-                    s=ssssssssssss
-                pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred))
+                    pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred[nn]))
+                    pred_df_quantiles_dict[qq].append(pred_df)
+#                    print('pred_df',pred_df)
+#                    print('pred',pred_df.shape)
+#                print('pred_df_quantiles_dict',pred_df_quantiles_dict)
             else:
-#                print('pred')
-#                print(pred)
-#                print(pred.shape)
                 pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred[0]))
+                pred_buffer.append(pred_df)
                 
                 
-#            print(pred_df)
-#            print()
-            pred_buffer.append(pred_df)
-            if return_x:
-                # noinspection PyUnboundLocalVariable
-                x_values = pd.DataFrame(index=utf_names, data=np.round(np.expm1(x)).astype(np.int64))
-                x_buffer.append(x_values)
+#            if return_x:
+#                # noinspection PyUnboundLocalVariable
+#                x_values = pd.DataFrame(index=utf_names, data=np.round(np.expm1(x)).astype(np.int64))
+#                x_buffer.append(x_values)
             
             
-            cp_predictions = pd.concat(pred_buffer)
-            if predictions is None:
-                predictions = cp_predictions
-            else:
-                predictions += cp_predictions
-    predictions /= len(checkpoints) #Since it is averaging predictions over the chckpoints
-    
+                cp_predictions = pd.concat(pred_buffer)
+                if predictions is None:
+                    predictions = cp_predictions
+                else:
+                    predictions += cp_predictions
+           
+            
+
     #!!!!!!!!!!!! need to change these lines when sampling WEEKLY MONTHLY
     start_prediction = inp.data_end + pd.Timedelta('1D') - pd.Timedelta(back_offset, 'D')
     end_prediction = start_prediction + pd.Timedelta(horizon_window_size - 1, 'D')
-    predictions.columns = pd.date_range(start_prediction, end_prediction)
-    if return_x:
-        x = pd.concat(x_buffer)
-        start_data = inp.data_end - pd.Timedelta(history_window_size - 1, 'D') - pd.Timedelta(back_offset, 'D')
-        end_data = inp.data_end - pd.Timedelta(back_offset, 'D')
-        x.columns = pd.date_range(start_data, end_data)
-        return predictions, x
+                
+    if hparams.DO_QUANTILES:
+        predictions = {}
+        Ncpt = float(len(checkpoints))
+        for kk, vv in pred_df_quantiles_dict.items():
+            _ = sum(vv)/Ncpt
+            _.columns = pd.date_range(start_prediction, end_prediction)
+            predictions[kk] = _
+            
     else:
-        return predictions
+        predictions /= len(checkpoints) #Since it is averaging predictions over the chckpoints
+        predictions.columns = pd.date_range(start_prediction, end_prediction)
+        predictions['point_est'] = predictions
+#    if return_x:
+#        x = pd.concat(x_buffer)
+#        start_data = inp.data_end - pd.Timedelta(history_window_size - 1, 'D') - pd.Timedelta(back_offset, 'D')
+#        end_data = inp.data_end - pd.Timedelta(back_offset, 'D')
+#        x.columns = pd.date_range(start_data, end_data)
+#        return predictions, x
+#    else:
+#        return predictions
+        
+        
+#    print('predictions',predictions)
+    return predictions
 
 
 if __name__ == '__main__':