From 5cd1644a8a257122d725de63e3f15419d37efa9f Mon Sep 17 00:00:00 2001 From: Greg Kocher Date: Mon, 25 Jun 2018 17:51:18 -0700 Subject: [PATCH 01/42] gk fork, adding preprocessing for own data, predict script --- .gitignore | 3 + PREDICT.py | 176 +++++++++++++++++++++++++ PREPROCESS.py | 271 +++++++++++++++++++++++++++++++++++++++ QUICKLOOK.py | 34 +++++ Readme.md | 24 ++++ ex_figs/quickcheck_0.png | Bin 0 -> 32041 bytes ex_figs/quickcheck_1.png | Bin 0 -> 37901 bytes ex_figs/quickcheck_2.png | Bin 0 -> 38176 bytes ex_figs/quickcheck_3.png | Bin 0 -> 41804 bytes ex_figs/quickcheck_4.png | Bin 0 -> 33743 bytes hparams.py | 103 +++++++++++++++ percent_dense.png | Bin 0 -> 8162 bytes 12 files changed, 611 insertions(+) create mode 100644 PREDICT.py create mode 100644 PREPROCESS.py create mode 100644 QUICKLOOK.py create mode 100644 ex_figs/quickcheck_0.png create mode 100644 ex_figs/quickcheck_1.png create mode 100644 ex_figs/quickcheck_2.png create mode 100644 ex_figs/quickcheck_3.png create mode 100644 ex_figs/quickcheck_4.png mode change 100644 => 100755 hparams.py create mode 100644 percent_dense.png diff --git a/.gitignore b/.gitignore index 5c09f44..4959db7 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ data/*.zip data/submission.csv.gz !data/2017-08-15_2017-09-11.csv.zip +data/* +*/.DS_STORE +.DS_STORE diff --git a/PREDICT.py b/PREDICT.py new file mode 100644 index 0000000..0617d65 --- /dev/null +++ b/PREDICT.py @@ -0,0 +1,176 @@ +""" +Created on Mon Jun 18 14:03:35 2018 + +@author: gk +""" + + + +#After training, do the predictions [but here as a script instead of .ipynb] + + +import tensorflow as tf + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +import os +import pandas as pd +import numpy as np +from trainer import predict +from hparams import build_hparams +import hparams + + + + + +# ============================================================================= +# Performance Metrics +# ============================================================================= +def smape(true, pred): + summ = np.abs(true) + np.abs(pred) + smape = np.where(summ == 0, 0, np.abs(true - pred) / summ) + #return np.mean(kaggle_smape) * 200 + return smape * 200 + +def mean_smape(true, pred): + raw_smape = smape(true, pred) + masked_smape = np.ma.array(raw_smape, mask=np.isnan(raw_smape)) + return masked_smape.mean() + + + +# ============================================================================= +# +# ============================================================================= +#read_all funcion loads the (hardcoded) file "data/all.pkl", or otherwise train2.csv +print('loading data...') +from make_features import read_all +df_all = read_all() +print('df_all.columns') +print(df_all.columns) + + +# ============================================================================= +# +# ============================================================================= +prev = df_all#.loc[:,:'2017-07-08'] +paths = [p for p in tf.train.get_checkpoint_state('data/cpt/s32').all_model_checkpoint_paths] + +#tf.reset_default_graph() +#preds = predict(paths, default_hparams(), back_offset=0, +# n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True) +t_preds = [] +for tm in range(3): + tf.reset_default_graph() + t_preds.append(predict(paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63, + n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True)) + + +# ============================================================================= +# average the 3 models predictions +# ============================================================================= +preds = sum(t_preds)/3. + + +# ============================================================================= +# look at missing +# ============================================================================= +missing_pages = prev.index.difference(preds.index) +# Use zeros for missing pages +rmdf = pd.DataFrame(index=missing_pages, + data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns) +f_preds = preds.append(rmdf).sort_index() + +# Use zero for negative predictions +f_preds[f_preds < 0.5] = 0 +# Rouns predictions to nearest int +f_preds = np.round(f_preds).astype(np.int64) + + + +# ============================================================================= +# save out all predictions all days (for our stuff will be relevant, for his Kaggle maybe just needed one day) +# ============================================================================= +firstK = 1000 #for size issues, for now while dev, just a few to look at +ggg = f_preds.iloc[:firstK] +ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True) + + + + + + +# ============================================================================= +# visualize to do wuick check +# ============================================================================= +""" +pages = ['(236984)_Astier_fr.wikipedia.org_all-access_all-agents', \ + '龍抬頭_zh.wikipedia.org_mobile-web_all-agents',\ + "'Tis_the_Season_(Vince_Gill_and_Olivia_Newton-John_album)_en.wikipedia.org_mobile-web_all-agents",\ + 'Peter_Townsend_(RAF_officer)_en.wikipedia.org_mobile-web_all-agents',\ + "Heahmund_en.wikipedia.org_desktop_all-agents"] +""" + +randomK = 1000 +print('Saving figs of {} time series as checks'.format(randomK)) +pagenames = list(f_preds.index) +pages = np.random.choice(pagenames, size=randomK, replace=False) +for jj, page in enumerate(pages): + plt.figure() + #prev.loc[page].fillna(0).plot(logy=True) + f_preds.loc[page].fillna(0).plot(logy=True) + #gt.loc[page].fillna(0).plot(logy=True) + f_preds.loc[page].plot(logy=True) + plt.title(page) + pathname = os.path.join('ex_figs', 'fig_{}.png'.format(jj)) + plt.savefig(pathname) + + + + + + + + + +# ============================================================================= +# load, maniupalte test data +# ============================================================================= +def read_keys(): + import os.path + key_file = 'data/keys2.pkl' + if os.path.exists(key_file): + return pd.read_pickle(key_file) + else: + print('Reading keys...') + raw_keys = pd.read_csv('data/key_2.csv.zip') + print('Processing keys...') + pagedate = raw_keys.Page.str.rsplit('_', expand=True, n=1).rename(columns={0:'page',1:'date_str'}) + keys = raw_keys.drop('Page', axis=1).assign(page=pagedate.page, date=pd.to_datetime(pagedate.date_str)) + del raw_keys, pagedate + print('Pivoting keys...') + pkeys = keys.pivot(index='page', columns='date', values='Id') + print('Storing keys...') + pkeys.to_pickle(key_file) + return pkeys +keys = read_keys() + +# ============================================================================= +# +# ============================================================================= +subm_preds = f_preds.loc[:, '2017-09-13':] +assert np.all(subm_preds.index == keys.index) +assert np.all(subm_preds.columns == keys.columns) +answers = pd.DataFrame({'Id':keys.values.flatten(), 'Visits':np.round(subm_preds).astype(np.int64).values.flatten()}) +answers.to_csv('data/submission.csv.gz', compression='gzip', index=False, header=True) + + + +print('f_preds') +print(f_preds) + +print('missing') +print(prev.loc[missing_pages, '2016-12-15':]) \ No newline at end of file diff --git a/PREPROCESS.py b/PREPROCESS.py new file mode 100644 index 0000000..747d5fc --- /dev/null +++ b/PREPROCESS.py @@ -0,0 +1,271 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Jun 21 12:55:54 2018 + +@author: gk +""" + +#Do some basic preprocessing to get my data in same format as Kaggle code + + +#import matplotlib +#matplotlib.use('Agg') +#import matplotlib.pyplot as plt + +import os +import pandas as pd +#import numpy as np + +#from statsmodels.tsa.seasonal import seasonal_decompose +#stl = seasonal_decompose(x) + + + + + + +def load_my_data(myDataDir): + """ + Load my data + """ + files = os.listdir(myDataDir) + files = [i for i in files if i.endswith('.csv')] + files = sorted(files, key=lambda x: int(x.split(".")[0])) + #Exclude certain cities + #ignore_list = [] #id's of the cities to ignore + #files = [i for i in files if i.split(".")[0] not in ignore_list] + dflist = [] + for ii, ff in enumerate(files): + df = pd.read_csv(os.path.join(myDataDir,ff)) + dflist += [df] + df = pd.concat(dflist,sort=False) + df = df[['id','date','y']] + df['id'] = df['id'].astype(int) + return df + + +def remove_cities(df,remove_id_list): + """ + Remove blacklisted id's [since some downloaded id's no longer relevant, + or suspected to not be useful, or be corrupted] + + Or just ignore these files when loading data and don't need this + """ + return df.loc[~df['id'].isin(remove_id_list)] + + +def get_earliest_latest_dates(df): + """ + Get first and last dates seen across any time series + """ + earliest = min(df['date']) + latest = max(df['date']) + print('earliest date',earliest) + print('latest date',latest) + return earliest, latest + + + + +#def __keep_btwn_dates(df,start_date,end_date): +# """ +# Excerpt only the data between [inclusive] start and end date. +# Both dates are formatted as 'YYYY-mm-DD' +# """ +# len1 = len(df) +# df = df.loc[(df['date']>=start_date) & (df['date']<=end_date)] +# df.reset_index(inplace=True,drop=True) +# len2 = len(df) +# rows_removed = len1 - len2 +# print('rows_removed:',rows_removed,'of',len1) +# return df + + + +def remove_seasonal_blocks(df): + """ + For places in the data where there are missing gaps of length > 1 seasonality, + remove + """ + return + + + + + + + +def do_imputation(df,imputation_method): + """ + For places in the data where missing gaps are smalle (<7 days), + just fill in those few missing days with a basic + remove + """ + + + def imputation_small_gaps(df,imputation_method): + """ + Do missing data imputation using the given forecasting method + Only use this for short missing segments; do not use for longer ones. + """ + if imputation_method == 'STL': + #stl = seasonal_decompose(x) + df_filled = df + pass + else: + raise Exception('That method not implemented yet') + return df_filled + + + def imputation_big_gaps(df): + """ + Do missing data imputation / removal + For big gaps [gaps bigger than 1 seasonality] + """ + df_filled = df + return df_filled + + + def imputation__simple(df): + """ + Juat as placeholder for now, + fill all missing with zeros, + or mean or median imputation + """ + df_filled = df + return df_filled + + + + #First deal with small gaps (missing gaps fewer than e.g. 7 days): + df = imputation_small_gaps(df,imputation_method) + + #Deal with longer gaps [e.g. by removing enough blocks of length S, where + #S is the seasonality, to completely get rid of gaps] + #... + #df = imputation_big_gaps(df) + + #Trim start and end of each series/ to align to get in phase + #df = + #... + + return df + + + + + + + +def format_like_Kaggle(df, myDataDir, start_date=None, end_date=None): + """ + Take my data and format it exactly as needed to use for the Kaggle seq2seq + model [requires making train_1.csv, train_2.csv, key_1.csv, key_2.csv] + [??? or does the seq2seq cTUlly OPEN THE .ZIPS DIRECTLY????????] + """ + + + def make_train_csv(df, save_path, start_date, end_date): + """ + Make the train_1.csv + """ + #Rename columns to be as in Kaggle data: + df.rename(columns={'id':'Page'},inplace=True) + + #Get earliest and latest date across all series to align times [pad start/end] + earliest, latest = get_earliest_latest_dates(df) + + #Excerpt only the relevant time interval, if manually specified + if start_date: + earliest = max(earliest,start_date) + if end_date: + latest = min(latest,end_date) + + idx = pd.date_range(earliest,latest) + OUT_OF_RANGE_FILL_VALUE = -1 #np.NaN #0 #puttign as nan casts to float and cannot convert to int + + #Reorganize data for each id (->"Page") + unique_ids = pd.unique(df['Page']) + df_list = [] + for i, u in enumerate(unique_ids): + d = df.loc[df['Page']==u] + #Nan / zero pad start and end date range if needed {end missing} + dates = pd.Series(d['y'].values,index=d['date']) + dates.index = pd.DatetimeIndex(dates.index) + dates = dates.reindex(idx, fill_value=OUT_OF_RANGE_FILL_VALUE) + dates.index = pd.to_datetime(dates.index).strftime('%Y-%m-%d') + dd = pd.DataFrame(dates).T + dd['Page'] = u + df_list.append(dd) + + df = pd.concat(df_list,axis=0) + cols = df.columns.tolist() + df = df[cols[-1:]+cols[:-1]] + df.reset_index(drop=True,inplace=True) + df.to_csv(save_path,index=False) + return df + + + def make_key_csv(df): + """ + Make the key_1.csv, key_2.csv + May actually not need this??? + """ + #save out + return + + + #Make the train csv [for now just do 1, ignore the train 2 part ???] + save_path = os.path.join(os.path.split(myDataDir)[0],'train_1_my_data.csv') + df = make_train_csv(df, save_path, start_date, end_date) + + #For the prediction phase, need the key ???? +# make_key_csv(df) + + return + + + + + + + + + + + +if __name__ == '__main__': + + # ============================================================================= + # PARAMETERS + # ============================================================================= + # TOTAL COMPLETED TRIPS: + myDataDir = r"/Users/......../Desktop/exData/totalCompletedTripsDaily" + imputation_method = 'STL' + START_DATE = '2015-01-01' #None + END_DATE = '2017-12-31' #None + REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful + + + + # ============================================================================= + # MAIN + # ============================================================================= + print('START_DATE',START_DATE) + print('END_DATE',END_DATE) + print('REMOVE_ID_LIST',REMOVE_ID_LIST) + print('imputation_method',imputation_method) + print('myDataDir',myDataDir) + + #Load + df = load_my_data(myDataDir) + + #Remove any bad/irrelevant cities + df = remove_cities(df,REMOVE_ID_LIST) + + #Put into same format as used by Kaggle, save out csv's + format_like_Kaggle(df, myDataDir, start_date=START_DATE, end_date=END_DATE) + + + #Imputation, dealing with missing seasonality blocks / out of phase + df = do_imputation(df,imputation_method) \ No newline at end of file diff --git a/QUICKLOOK.py b/QUICKLOOK.py new file mode 100644 index 0000000..8f22dbe --- /dev/null +++ b/QUICKLOOK.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Jun 18 14:03:35 2018 + +@author: gk +""" + +#For the KAGGLE data, looks like most series (~2/3) are dense [no sparsity] +#important because in Arturius's script there is threshold on #0's allowed, default he seems to use is not allow any 0's +#so then he is using ~2/3 of the time series ??? + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + + + + +filepath = r"/......./kaggle-web-traffic-master/data/train_1.csv" + +df = pd.read_csv(filepath) + +rows = df.values + +x = [(i>0.).sum() -1 for i in rows] +ndays = max(x) +x = [float(i) / float (ndays) for i in x] + +x.sort() + +#Sorted plot of percent dense [so about 2/3 of the 145K Kaggle are dense] +plt.figure() +plt.plot(x) +plt.show() \ No newline at end of file diff --git a/Readme.md b/Readme.md index f7ae3e9..d651938 100644 --- a/Readme.md +++ b/Readme.md @@ -35,3 +35,27 @@ load and evaluate 30 different model weights. At the end, you'll get `submission.csv.gz` file in `data` directory. See also [detailed model description](how_it_works.md) + + + + +----------------------------------- + +GK modifications for own data: +1. PREPROCESS.py - Maximize reuse of existing architecture: just put my data in exact same format as Kaggle competition csv's +2. $source activate gktf +3. $cd ..../kaggle-web-traffic +4. $python3 PREPROCESS.py +5. $python3 make_features.py data/vars --add_days=63 +6. $python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 +7. $python3 PREDICT.py + +- confirmed it runs with 2 layers stacked, or with attention mechanism. Performance is worse in both cases, at least initially. + + +To do: +1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks] +2. modify make_features / InputPipeline / VarFeeder etc. to NOT do the lagged autocorrelations [if ts too short], to NOT use lagged_x, to NOT use wikipedia specific features. +Use only features relevant to this data. Still use the (tiled) median series value (before standard scaling), or few other quantiles, too. Keep day of week, add onehot encoded continent or use country like he has it. +3. Prediction intervals +4. Architecture improvements \ No newline at end of file diff --git a/ex_figs/quickcheck_0.png b/ex_figs/quickcheck_0.png new file mode 100644 index 0000000000000000000000000000000000000000..794e34bc6bb121fd4008f0b43b9a084fde2a6ca8 GIT binary patch literal 32041 zcmdqIby!ww_bxhVq)}RFMU+MqK}t##Q0Z;~0cns96$D8Ur4$qdL6Gi{6cFi>?gnY; z+GDcT_uF;tbM15fJbvq2%lDn{oKKB$k9*u>%vTTZ-ytTTBR~*@_^zC^GJ;_GA_&Gf zJ}!K7y>oaP{)6o(c~=!5{&?a)3WU#R?Bui@5rotT{Xa&Uc&Y__DdZ%h>7-(7=Hz1J zV2W59IoUn2b$Vi9%=FaM!O_CjhKZjW{>RDm*vZLGn1|=T|A5=p!JOyi#~wliVM6Xo z-&S?~@MpwLS9RtXZ>>Ft_){M1mFHGprqxSUzi5>t4{Tq{)-uyqDZdhHs&z$sI8UbB zNWYaoI!it(&f=x~3RPOkv<7rIyVDtXaFZCq#ApcC(w_-(8zN2syT0o8tA{ z6>ILqXT}bMKR+!>ZkAz4!ynZ`8srsx^MMG9iJY7~BGL%C$il+nz)$3hUcUbl<2CyG zCSK$$To$6mm4K@(m+KHYxH|pl|8F;6mrzh3f(3r+Te9C;R8#mUMz2@qbo=2$GEYxW zGFsYK{)E($sohr?qTjytdxnL_A|i4@Utb^0L?W77FUNDwA%E@)J9~BNNAIBDnL2gN z&4k|G-t&Wn9NWu7sr{=hvDaKDUtJzwT_v@(wRKwTIdlHT6JDVNGR$BqF3CraE~3NG z&;;s#cb}O1=u@=s`QXD%j7)XHP}vthen{7QtyXN3FnF%rp5JoU(H9PWNqim`F1h7ek_RjZyf@&un_xAJg3hVIk{cU)a>4i68H&&_F=&T(;a`pJH-`7@B8 zVr}5HMJYf>{N~M@Z(>jP|1Q`$I%fP~i!;p}iRLkQD)&B}&@sCemXX-`ILEx50k25G ztL2EuX?2wTXn)J`Ver+_1W^xOlLn$kj~?ADyMm2dZMnKyb<%K$(aSF;hNh&XWPH5l zg=U%4mHqAI;1v02&~STi z(MdRjL6q+H?b~LnANfrQ-b6;OFBgvuZaQEhSq4>Mq2~oL5%q7H&xVI}%fs?vek~t_ zy8n2IAMdh`y3Ns-4?WJ_TI!oR+8JwfY!?lPzvW`Q^gC1Ssg0nZ;GeN7MxKf_$yml) zHp4gnjFcCj)i(XeEcMdxvYO@UdH;Uc+SV3jxafAwz*KwewL5+< zUk15t&YnS7n3-!HeFQTKf;}euu5YmDSm;E87>`#!!}Qh6HTKtQE7{e$H@ecn}7i4TvAC~hzJ=dvRGJ*Jct6py-kd2L5U{{9&_2S-q}-~ZiX zC&_xK2%nhvT~TRS8OznH1n|sSMxR4wYZn)nfb8t-$@PSUgs$k^eDn6n5tp(0!@bRk zsVS3OV%9rhKVq+$=a@8-WLOtYFJ)KB%E{qZ9&PKmy17lj#~TB4DFQ|-m78o4n6VOzCLQ(iuIRHLnY-MA2>9!KIxS|Em^HR zusc3V?i4&>p00nxzY5FQ?$VUN}CG~(VBlg@dw zll8$ge8$+|g79#N7SGzn4u~Z2Ej~KVwcUoPMm`X`4e&~L-4Z)fJs8XY3kjpY_0s!j zzh-FYd`(S_>&!ci9HY9I{4PyjzI>USi8hdJj^a`(e`@XL=l98WtddgSU(|7y2*ox5 ztKUSUqoZ)d^(Nt>FU3y|yC01W72BF@OgBC5N|lzQdZxBN%pTqLLd#ys?k|dp?%MY@!IUQ8WMsd)Z;j8)_=6ix zW)_d~yV*wd`5bRD{+;i{f`z83Wl#+kSiA@qKc?~b_YXSy>n>{68t1>ZX6Neeo+K4O z(qCl#eqhB+$2*Hid?%7kv9@^38|JQE4Lb!OS_f{La`ffF$1scTFBnK|ZEbS9Lfyer zI9L>4F)=aG(IGlaHeLQ$P#*B|-dxOW!AFr}J}2}R78dh>m|qWU#wzJ;Y-}EnR+Kh* zxqoMXe~N9#8se^7<(M{8d9K$}cO9W_14hj2x@K6h^;<`?!j-S9tIKZ8YsYA@=PS*1 z%eMu#-riy`ph6p*ItFqloZ+41bVhb%&=r+0hU=5{G&i2mI5;};nYUd?PEDOMIN6V$ zT$jWTp}mHSA~AY+m*k@~J+mSrBGUBAZ+IW?nT!1GkV>h%;c&3MT-OwNd3}E=o0XTB z?19KSHVVhn(~ksfhBXIygoWwAK(#eM8nRQPx#Tn zj(m8AMs~u$=ID11YB-qMnda#A-#WI55HNQ)+r`&c%2y}*b4|KRuD)3 z;$q(}y3^l7xi=Lq&ce-25=_n0z*BXIX<%R=@VG;w?4%=6>ZaSqMf4CFLeGB&gD?y4 z&NdK-(EJ2;_JQaYg>Mq6IXKCW#zy&rqxD9{pFe-HchjVNz7uW?KJ|FGgs;eY=w)@a zM8?5oHa3C-0!4j&VFWNMlSJ6rN}nQqkkz`DS)mjHrV9XMaW zRAaaaYy!Yd$I=#!h%4{evuD>vJyx0d`OgAYH&j2vvbH=Zb)4(S)Dc93E`$IX2M2z! zz3Fo(V%`b6kSqL>@A`F0U^>tAvm3#@!@+WlA;t%Wg~_5nJvnr&KmO>oO{-n-_}K&T zBMRrmp4*$Vt@e8xrhgZE2`eirQ78k?N8_^$d$F3fHl41X9-rf^0=O>IwQJ|B`m$u& z6GYdCY^%_fn2LQ7(#X=iZD+?Twm;8`jf2C?#YHS1#d#anWk3Cs9QAJrr4E!jaT`>I z2#$H4qrdf3+RTj2`(Rm6xBMw_>wCdFw{K%GxJ{Fzu{QGZ1KE#aUauiuVBwLBccsX8 z&9#AJ2f}HabS2;2o&OyEIx6ZV5EG}R-^8$Nba4J_*VfnSNT*=K&=8%h8-V1F zx;hnN2(~~Y=6N&0Yg-Fq|2c>mB|ti!t_*8^=^Gy|b)*q^Ovd&gfe6^#2VrM|ljEJL zh?tl_1D~TpNquwVrq8j6YzWOWFp#>D5&FA#@1jh{ZnlM%Q@7;d;A*Sj*bl&`wT)&T zlb^4d^s>ef}JC*yrS^%e&}Um-d@M6$1cOHCVY)hC0{b!PrRy*q%K+MBqt3 z*;GSl-SSW|pU<)9ll~kGKo`Upj*xM#{bS>&+b=p&A26cCM@&qt#9@ZOdvAuCf`TH3 zi{>GYogL1bZ@aL19K?Qk@Os99^+28skhVZ@x%b~sP)-5a2|g()Df;kwaHLgg#4kKN z95Mnj@F*&sq9^1J#P*UACMs@S_T^hZye8)6F2kNuHqLi`4nUa1cE_>mw{Zxm@H5pp z;>DSmB)}Km7p#k)?1iuG>_jIl!NHp35^4q}WkuYN7uNF9YoQf&FF=^?Cx~3}BQpkT z!1Q&Ui+7#`K&JOGXeERMXf)8G5 z4`7%la-G?aZ1ww20>Wf7Tw>J40F1+a|F6aSFqLe5QNE{tG?}H9ioZukN4rji3SuK) z4}4}@VkI+r@98zomJ!bARaYHu1gEK_hPm!9q$-vPv^x9vzkEp$D$5G!|D}HvwijKq zgM)(~nPf}cb$JNTK>*-awslG!QV=HZqb&`Xg80$jNQ7x}a+2bp6*#1EIhV8A!j`)d zP^Gqy;xO-zKF0&UGOql1dqoBKIr*zsuY#6&zW;p#emx0x27ItBL6idt1mF(_^Qf-` zd#B>&=AH!37_$#|%B21Ngv*&{k{)~RtLCCCbA!2taNU6!giTCe6h%`GuI3SHrgp!A*{yH&84v;ojz~!0_9L4F z>#rRd(F>vBlgzD}aE|=g{QDzEokuccIYE7kzus|R`o>%{4=ZR@rp}p&0SM>Wbi8zs zOISj80G15d2j~4jBq&`NkVin+WhJD)Uo-yz4M%cva@FnPCn24m@0b9c+J#&p<$(B_ zrK6)`F0gepkk5}*iK}PphvG4MlJW8Jbyc3bd|%q#T{yvGMd%QwDW|Ade`~dZwUy$A zlNwevnBxl#120PW)ep#g(5L)e9b=SwNmK)voP5NsUl9l)*Qo7-5dN7nxeJ9~b&9#O zbW4r9D{-~7v^2lFUoU&Q90ZXXLEN_%{l6%_H*R`!DJZrc(CY^r-umo&+erxKQupuQ zk8Lx30`B{`H=Pn_RzPj7@LC%--JLOa#huU~0yJ>^7s z9-L%JdHH8pH{uIsr|tCzvf0iuXDex0*{elBy*9}tV~EUY_#Xw{m9d{|i+?;+#05cB ztbTiOOA44X&Ue;WGfslZJ*JJw$ND0+-by&bf*4EY} z931i{McxbA%1TT7^^S%C%%ib2h~bRMc|jY$;9x=k)WpXR9zSN+F1F$NP8}y`O9Jj* zalF5@yXtddwJmi1Wv|HIbR--`VUkq|XIuQO0GJ0W(CVzLEISvMJ7bkz7;ld&`Mz{7 zP_)Y*$2zZ#-zH)Zp?UV~S(lq^I728L#RpLjDkR6}#2espd?EFt`h{YTot2X^| zz6OlrGqC2sprBd^OyW-S9T;G4k!{zEYOzrYf%2)9;nEkkTvp^v6!P-&UMD2jYYxo3njSUzHy2i{q62@>+0@a|J(gw3f2|0(h4qj0dfsoaF75ouYD#aCIk@gotRwn zRiF_N-TlkU%O^7c&&edm!RCXGxSTIV-_Z&i0D5%lbocK!z|EbNULFwn6CZpO>o0TW z$ji@1q2_(L4={sQXyO5MImqU77rI8OV%oAP6%`fx`3~mDzTY|6VH1Ul^71cIQ&XF| zZqBwDDWrBM8dQ1r=NMs)dTuH{87d<7Ib07A@cvq`W2~(mH_%rFnMTl@v7yaaL4iQN z57WT@oycKSt3a6+@Dt7Qr(7f#Zu$eIc#C z(q`iNabQC%Z)@&X^{^cYWSQNLPev<*Ca0#*3r@mJ zeusyikIc>Ahx7@1?(*($Ed`)?M4y4r_1Gsyil~-iSc8e4B0dR;EaW{C%gfeJ4U|MA z^z;YLU$>+{`3_{&fcW&Sao44uJmD0e3l(@e|97GLXH(Pd01|qIumL#vKf|Tx05Va| zAI2yi2K+iM({*d18xPoIiQ`P(U`3pb*%qJRwp9Rot~ z9dgXp_`3cG);o5GyY`$0l^0lLLtYmR?d#(c5&h`R&`>NopMJ0C@B|MRm*x8P-bdwJ z%YsE94gOu~YkwpeD%$Lb3V-k?$oeI3E?lTst0iPR_rhkh;w%0r5V(~4hJ>)U#>Omu zWK?6#pikM^+t7p8Zk3=*ae~I*;U7MXx4!4i0Lm1q#E*-utr9o z3t&+F#kP#bCMN4XCp$hU0T^h%VWLxPQ(tJ+hYEVYgB8~gmvdd$jUSnr-76+uT?UDS z++}sN9x~H3{H}(jG&*lH&hO#V;?zJz)!stHP6Yd8=j4=z*@0^ZXz07)>O$GSnr@N7CNYHQF^ZbwSQJU9k zj1yEl2D#O9h?a4O&mmisf{do~o7OeR7)>T>{n5yxdRsvC#&rHBQKN|6IC6M&l=86L zVdmbS{#>bb(-^HXFiMM7vC^vJeL~P4Zivr8q%sDn4b@u}+6rOlzsp0^OMTgbw?1Ek zPyiq%@{*J>3{qkF-n&xuy-`(mc*Jymn{(}Gt_hS{aL*D1nAY}`tA76eiQrIZbO*6? z4Jb1fdpZ$aI??g*F`Hgo6gDodG32U{Nej8}tYpS;*|dUrD42zKLms)dxhVzCk5GIs zg3u^sXO}b3_j@ftz=;RF%GSsc%4|oYF>nIBe(*@2&50>y@D)4{%NS$ymO5c++b- z9iOaUwNdQ~KpLDNO-etGJvjLkgi3k8%Ux^{8RlOP`Gmao3sjxmPL6i$xBqC#6h8tO zNvUdS2%{%tWMp*u(@)OK%v|EO$@cr3)(f=^^$9>5jYAwka9zMJ+HkRbcC(fk4EU|* zRG|dH0j0Z|-Er2^VH8r)&b!O1%A(+;ImH{dSpJ6pJJu@=_ zxoACT0(|h;1*zuYu4^xu=s@EKa>KDAFE3v=kZ->JizB=LP@68@MR#^MCWfPDA96Ru8W0ZZ&z0n#kC_&}R(g!CWH z!h14xs8Oi|l9n})gyecZ%+EhQ1im~!kbjv?G0qRdu~cgJ-R$M_wiV38&rmk0@3DLx zaAC3rkMRe9&o^9x7ieOM>hrL{N}8JZ;^N|H&I$W*ZNLQ_043YT#>Q(Pu3#YO?I=1b zDJccMdUabnQH_@I>rdDOAOjO32amd{Vgg;L>WL9$pu%;ZytYp2jFR9WhD&A z=~Q8#KBNRBv+W;kl{<+E2x^n>M)HDYw!6P?Jl~O6;=0a)Duf;@rFYgDAfl&p5`ByX zVYOcTc%2kw=Q6=myCA9OdhFOp%gCSx55%*}yeKZqPkH&~ll?OT$-vA|-KopE?vJLt z!3M1Zn*O1QIz9oxSzzC_?d=a>Yveg~idz1Zf~m0Kl3ZX1osShLp$vdmJ!D|X{g*+1 zhNQ><8xM~V)O2zN25x<~8OlhlR7r|fhIq*29(R8UMRR7Ti;;mMPt_dF!{bbDZk6v# z&7;o=^1XaKy)`H`JfN&{0QQ*Me+0(e`RO(mnpHr$Y+Jea z5=69yE8JAv74-EJXs(%s0R5l>nH(W&YHE_i@7Ai8#E*FQ?h@#6+-UAGF)_jH>E(r5 zN&R$FBxO5}ZwwSt6x&;43bm?1A`&==fyzT{ix`DuAOu7L1j+>Wa*zC}^~n$gv&iTh zg#Ao{f*K$>sDkqU%D{V12H(hyYV9?%)Q@zv3`ko1AYCB?W4IAJ0a`_DZ0sadkJKxQ zz^%zUJ3A-AWV*bIZAQ*Q9q2iP0o1ILd`^xbRXum36;sUnuw=e3`y;wj=tA2?HbX#N zdS0)U*FY$L`SRspFpOYO2f;>8m$VL^%kE7@lG901EFdX7e5jg6~%@gCpi@r8=REPNSSjz=-~ALhACbkbNqn#AOn=q4>&#$9tw~i zD63A6%L~Hwv#CL%J`0I11uV3-xmg}WJOxyxiR*Nq{Bh=#h9p7oM=t@ldIe7X3aWzl z8V7QWUbz14ydwjeqklj^09qacW$kr;xl49yc0QRTXkT~GXo60)%y}sYh;I1;lpnto&ufCPkYmkv>>cU$z+NBeOUuIs}$nQaUZM-q*nIx0@X z2)|aLesFb5yAbqiSx{gz(qtBUVGA>YJ9n0cG_(cc(}azYWV%(94Hf2wSQKhoY-=gb zoZbqgH07ANBT0s^2Zpz#OEv}{K^3t2120d{CzTr{5v>1qGsG?mN<0HGGw>?qzugZL zPQ15%)YmhE-VfD~lifz~H6Q@0zY2ArNg{3X4D;*Q*jM0D8qEWPYR^oN=827&W_&_I zN2hCOn*<4zOC*5~To@q=FvQ8Z0OyDGX`Xm`YHDiyHyN~7aPjl=lYwI$F}TPQ`OX;5 zZ*Q%ypSE128+~xwjdFbIc@FKf;QRmBXEBJ*93B7>;|0{00i#sSDyZy)pg>;@l`1%c zinI9SjVB93`awqtxPQGsy1OZ2)UB8J=^tj`8N^}gve55 ztR|D0C0zu)m$E!Qes!LWj9l#uJR-@Mo15;A;IqKAd+6>`I(ELrmdJW7BW>9Ly&|d# z`bf{U6Y_{4KJ;Rw212;yj#uOtnC9@$7kCo0;w;M7{&`T|18H^Sm0arwF{G@tG!as= z$9-A!kbY0i&yq!8v>QdWNR`TcF_UZfE@8d7`>qzlCb|GPF|etfuxmR=yH0VuwLIwZ$R`_G>e(0u`+It=2Kx?1^M=2b3w$F zjF5`68tn-P2?3Qsv!=Gp18$=^q$~4|Rfu$-Rw@jn@$oiMuQ;TZYi8W3KcW>(lkUy}g^R zYfSq)tJ(ddRX#p{hKflMJD?%JaxoCJegb+Xns!lz)1E(12x{2^2PfyZjRb^TP3S4c zxrqFS)<^~LZ6{%RsG3p?Nl8lDK}i6qhEf2k(iB(^o%T(@Z=fh_xE=$9UCQ0P?631e zSIRvv66hn+#DLi(?wcA4U3Hz0UC%$g!o+G1hEN;AiK9jN+E)&;}d)N}&!^{!>87eX$| z3^Emv0;p*S4}Y9NoRvATTRZurlZ}adpN{kMN-tgv_XLBLlfHWD zav^Z10~OlGRX-ifvZh@voox2cr-<-Ki9oql8f3ZpE|c`)4^zXypIW@g;4}ET=Pvqe zhqLG}aly?aB2hED+5q#J=0w}Qf)-}-eM^am$iLEHeWJg4#!oJuVhLIN8)k)1u5`%+ zDZsk?jO=UVid z|4&+AM%U4&fSG)aG=#JKfQ^nr7z5*Ayk7dRXHLD7IGr^LCp=hnR;X2-4rYShgL694 zUXJ)Fkf8IvcquXbKYIxywWQ8*8lH}O%Z^@x#re&DY>gU5Dz}G4{BnJIU{h`fqHOgy5;FEv=oz z0M#V8-j#U8d7CLi{=%@6$~-VmAR@Dih2H3l4#oe%1cg*|`Yq$=%2BLkxrSbc?MAO_ zU^4&buW&Lwr^iEtg2U;y#@2w9{Chup&jS4jHcQgIIs|_ldSM>-=P-X!h@G7OLZ)=R zhT)%wBhU#ZChWX8J#PlzDxbP#PxAuSI1n5A#hxofG`!WFip)uteqane-(>Ns0q2cC z#!EA(5`(TZNJ!1|+My=fSz>PjT@ft*)U(xx#X;rz(a@l-7WpdeC2An2H~;zb1O+Jr z&vpNS0}arw0qrJ$jyC{@2D(Lx=0uk+;UGEQNA3c)qZgp8kp0W-oD%UT>JRvm(u*^* z^SE#CfK&oN&N+Glg8yVawZRXN(GdhqvjCdcpdp0?Qc|=S3q`A3AXR8x9%}T!tI&>t zjKwRk)TjuZKwoW!FdJNQCI5_0N(n3I;RzFUaF z%g&AuwGp=7N}$s3RdK9SdM!aH;Yf0mR>c}_GNR@s{~Bpo?kg%Hf_6cIT3Wvi|8fEn ztJ@eB94sX*jV%o2-%}8N8i4xQS2{h6mS4YWq4OYl2vRh;d-o(IB#_#=I&ymYP>{!x zAaekrgb-@Sfp6bZgq#=5(w&RvHNs?5i17^xA%e^|YR3XLv>xo@k;0l4b3&<7~t|KEPgZ&z?qIYF?DbX#dw2y~8vTs6$^ty%pj9 zgUQBnxfXp6A;u|)2EhU1&zfW80~quKVRVpuz?MuFn{7by{^u{sebHIPqwbHC#Ow+5 zEM!#DOG~RgLqQm`2E{oFKr?d0QR_4~!2DkY8ZsjeKo{Z4KLG62;=GfG9!5RjQX3|l zG9&yNJ!=khb}`x8_OJ#2d_Zayh+m_|bvi+qlysZ_=~NpLexQM{z_a8vhn$8ezMOt| zrdKUJt&8rOGi*cv`bptGpBkS~^Pc<93^~f*lqcP_g4ae*HQ(Y2n!^6B3=_0A+O4&9bkr954B<)C?%PE4R2 z@#!a{v-tmv@rfRVY05XI-&2PKk!mfl0(~^}7W%&GxkGWex;;SL*Js;OyC4(|Z>I~t zWRz|487V2J@Dj)a)_OWOzc&nqp}6dAKB!ToqqFsUqg73ZWO36WA7BBePz02 z?i@^ANYw2|H)0ooBjFew$jXww0Ly|~r4nSZN*+JLc(@u!MNY29eR_8gx8dV)whV-d zAMBCEI%5N=N39eY7uShifKr4uq-Xhn@n$`As{nAGP^ zLr^K$4q85nGospDYU*X6il`D+e+0$u+i3M0(np1OVM?f_sP%fXq_c&XV_^A~zh_3! zb6HdYbosw!_)~Lp0U*UQ2?^0c8yvl|ib_WMp0uhWdmjepJ>QqH)hHW~1;~+=kulo- zGXOQin1KUm08v5Z1yHVt(DWW)k%g0!2vnt%rbcN4oAnPR+AU>zl4uZF*v%1*#d;8%$K4_0&cKm-1Ih=iNQaw!&U_Sm z(@QVQ&=3Hmx`PC;PEIeG+i>7U_#}ZLGwLz^r!G&+_N;{cs;>-VxZ9~Mpx~H#xp()` zB?&32L(d_tr&#AA7STii4Mz1jAKq3F?%MsLUpP1)>!t>&NzyMA`NJ?SDpNBnVqO`z zktg5ueBEWUbp96kR}@KwM6V#fEPWq{^h@lE1)%oe-S#t4WetI7Y z*nyN}U-XQ0gBEJ78y4RDjwoo9Q0wq+) zxbdJ~e)#o-o5tTHQll$(jF4{+-MtnL$RgZMPn8Hlm=Sl*(@tqJc`J;vQf*S-1))pf z*L1bRuD&e%#YL{w2&edHLfKBDNn!k!&|Z*b!n2Ivt#BnbBsmDS;*t2NDMV2{$G!I( zvm##4_uH^!A$s>>u~`hn zU*lwKL{`0o(;NstHSdY5jAk6Z`ZAQ&-zp^Yqy0E~v`Crv{4b3(2;)XMbc21VGM}E} zU7=b9%)jdn(G9rzmGLGHKKhAU^?@_duTARH52q8uqo1?pp{b>iDAY>8W7gI=(=qkS ztjj1A;FpDR(`w>VOJ`IDnxvf6W~yMY59JAJ*DtfCvz+b^ryfCDH81&eS7s8@Kbauh z<>iKxf-NCh&dKqHwE+<;-gWtV;B=Wpze4ZQ9BVoO+$7zVGTt&~9fO}YxkG_SOnuVU zTnm>&s*kI>)>=m&)gRBkH<;6`pf;V$te4yr0xC<`*nzf}B6BjYK zrdOS?k`-jV=kGr6=SwCjv=UBPO^}l9dza~axL&=LRTzSBA9@(d2n44!pqgyz>U z&#G)DwpPQ13+O5NG6w5HtM&tTRV1VzY2r52a*l5kA5j*M=qZQ`+nT;pIDGg`E%l8v zGSesUlzqTT^Cwug;4uYRM0X_B^P-0sPL9qsJ5yz!Dzh)MEbfiRhRNuC$`&2g9h@^O zT5x`jp?@Yh>B{B){vG!+o!A=1Y0jItC`wnlKFvzuc!o$E$YS&hW@Z}?ad@rP_jyFx zAJ|~nn9as|eYk(A53@*>KGSleuVa+pQxvd&RBE`_fJBK-?^5%SM@CtTA&d09xV*&@ z0WYX+Gx8=i$xV}*pO;1#`fI$kLZK7U<0>+JWB<^MLpNd-L z7tj*)BSNp5(N-u_LxbTZ8Rn+Thq|)lDn2udq`E<=w)(H;80DpT6TMK4oe0(^bS^a0{ zG#+pFl=6C$pIF9OSXkrKx7sieS!Zj%9->Vj2{JXRR1)8<#a=1sD?t!==RpQCb0~%a zLQX&W)s6Ec4KdT9k^fS1R*usATi%b>xID?wLTAJ!p%eqT`SjBFwY_#O}BU7Br+ z4~CcQJojdzyTpLDyB@AL@>vfGpt4AM+yK8x(F+WoQ) z*}H+^Gx9UUEy7M(*gz@a7MOQKN|WIN{q=1nhKGbs&nYF+9UzI&j)WH)Uf_|U zb>pCA=)rG<)>DMy2!WR}HeKfv19%;#e=3K^Mn^-bfvBFEyw`I^3tzaWolw7i3TbVM z#tGTbC0JSCJIL9Zb`H^d+A-Ux`Y0`fJNkKk5m~`|PH8Q|ryC~$o_IJCQ!~#rF_P22 z3?iMXdE@DReWwW@k4D>6w}TB zl1a-i6%2v2Z?7(z5(MY9ccy>&fb;F}Fly{=)e2i<$E`}tH2gf_8m(&88P$U*=r(>1 zUAU^t*GjyP%I>Ols6$x-q&v-`C$wl=Ic%&t$Y>uL26_}8%~IVgqh?u^KY#!qJPC=E6->MMDUT^cks`{jxnDhU=)^l=;Vp?Ea2z@j;6=_Z^a6Ol_rM& zxl$n%nJn+Jwq{=5F*h}KVT9t`j20dlVrjt@jxp@D{pGJ0t?&iTtI(>awO!~%OXI;) zqfYMkPOBFuPeOB~5&89A|5G1F1J7ACe-QdUynE*l66M|E$jHbDcrV%47lBeWD-ZDZsIgNaP@fW`L>d_7c+D5 z!1!QyL&a$Vgu16f2ZJarOY>UFj1vtrLYS}_a&mK zrMJ=$DWNL_tN$PCCPTbpwmoS|nt6}TB)HAeCa0t@!<)R&KPUqQv>a%yhxb=HloZ(< zw0qrRgEv|*aoSf#Jz;d|F)n8HS()HF!OS@$qx-Y-npitwADG&xq-K5xrg6W zihFULqo>1>D{cno+4I^Y+}hpiA1mQ5SI=C>Y*D-Gy?Ehs^XiqWzP}+NCSq!8_1<^W zfm&yM`<+Dn>XE|zjutWHTOoRLe0RKwCpQK2DAjU;(h%-`H-<-ujZQlI3qnQ*Kd_{h8+D{xz)_?t)ap zk`T)3pv8u0#>?3c-MGe|2gPtns}caaPCwgQMyR=;bx0{eTAUQ#E1a-GwL|)C*(lF? zHwo+Utxb*OxwqD_AIkF(*Xa#<-O_m05pMQ6mc~0gsU|xE?=`(8+%laUO*+$?`FR_j z#A7Qb@cB}14TqK|QtDNILzF2Ekpd~uZlQ}BS{Cu{2>Pcrr2W>bp81CS%J8~`(Xl~h zHI-|Htlin*nxlpSX@k2g_`J zr+W(R`Lhj0F_t_w^)dA=jZov)VojAW@AD&e(?DH>cES^Rsd? ziUdl!q0*@;7mBN&W%CG;o^BZgzuVWBCcC2ZG1K1RNbErQ@Avas`p*y?uo|YY!u2a( zEG&iTi2T<3jJVd!YuA*fsFt)`;X;H9Fdxr2zpKg)F`zEZ(>?RUiOBE@aX|#j8YRI@ z;WI6aXAGV9jLcOOx@K7xmkOt8Of~TbcXKJ# zFy6XgEdBi@q1k#2m9%u(kJZ=lPIt}owTBklWmTModrVK&k?VBCm2|xrC$z^BO?;V$ zBvnxn88OLBjM-7v6!Qe~vE-KcA2Bd!1m4L?QN-jcD^JRrH5NIimCGw&j-i5D#pzK!u&8p%&~B3KpcGW9A7p8P)UDs? zPr8#11MW4)cfB>wCA+6Y4j1k_y&8E5{8U~qerMM`PP8xa?GVkTHOu|Ocq7N5cD?r6 zwCoeau=48SPfz2_UG|Tn?@ccuQRYOS?3oIp*ZFxtgHcDmZa_A#^e(MUPdDizhI)*9 zU5ZA+dxM2oY1_evPsI&!}3GdA<{Z+=vNl?P! z{c$H)bzo+Q-;uEMsBT%VxveqLg+W$=D_pr8^1F@Vfn@-?tOsS!(K$^@?P!bMc-SLq z?W*^(AS3)>=^$#K#%e%LtbmE5eM7-&Z;WwZPxx>(oZ^tQIzPU)^+WbyzAtLsj3jzB z(*f`6`Z6AS$+^93|66Yo`u9 zC+3`)9P~TAlIOU@j*wPJhBAFQRoQ-il0Lt2ce#hy$>yx#wis3&O;uSQ!KdIaX_!|? zeUaBu@ubL2r}Z1k5L{jmA2siO3(_R?&CXK5M4YMwX_jjbIahYNBdXSaosh^ETk9c1 zi!>5idDmPDDRsh>1}nEI<`@o)ZSCI^@XdWKVr%5;I3r`)T_Ra2JaP6*mT1s}1k2#d zEa%^T$B>4S@M%{LyaLs+@Jp;D?x$8$`AE7IwVi3E+Q=jWBd1#;mIL8mxz?ZcjMO&F zqN#|#u&6B0-Gbec?ys>o^+agv-f*}CmAxT;cL~8yn|U^<-ORX(cYLo-!{bf!F6X^| ze!^W@vxMFQ@n8*oQn#U|rNFc~EOFcJjO3pK!e@RA(p2+jhCVK^a{HYl!o76A?|i6K zk4`K$zw@#ZWbsyEkjx*O4{05`?h^bCS$bYQID45S!f~ADRBuxvXNH%AT8k|V;#t7L z2tM65)z3)Y@p4{&oZ&G(r$^DVolx~E^`yM2HjIbVS1e&=Z98XJ?{OOTU~^P#PD-X} z-!~iU`sI3$Hz}_!|JJ-!nAu$YmdcQ4xTT>#<<-p(4nxV48VOp}8+|`^?=h)eKQkS< zdu?~g>;~1!enwZ@?dw<|hzu-4Zk(j2AVPBZPJOZR7kBm|B_eal2}5U6UT!%aTFn?8y0cJ*w~v`SR8rz8zjkW!jP|maUcOezV6S%KsahPntGb2i4>n_r z6+}qjkbPE)GJh}j%991rhx>V)UxtWA@-_(G7b5A!9W%D`bd^pSkJhcHK6~a)T^QwDdBerA z=Xg$5uX^Ky7G=I~#y~Gk69x>QHY}nmf5IMdQkm^-f&^E6eU8meYS~aQQz9H>kCUTi zXU*Xe-Tu)h)nsw!PN!9yt>#BnWyQC6)m_Koo1D5yE7|Y0)EUifh!U82o67HJI&adg zZ1TscS=|g1@q2%s$MyT$_0XuFNZ(UOj=wW&NMe2GCfCokI)xqb_@%T zXS8VlrsF$~y3&wxO)kZL-U{l_Xf-}RewP78fmxD>2o=M=M`U|e#v&XEgtt}H&^{0z zoIA;nRFH2ClqB+c&)?OwH`8ZP-GBSqbI2I`dt+bsV$0}Y!lfMxL}0DBlFZrd=ZmNc z9hFg>_qK_(=C92s!hQXwbxxOTc-!8z)ph$@N|vfTM#q}#-od~yr5?eSq_%H4CjmvI z#FLLWx=P%`E!@69>bcWQpa)OOcE4r z>vg+XLG>SJ)Hp&L1cp^Fy7}K5HQU>9kv}6B)v$l0WQhojn5D6wce$mLo=khyOWTDy ze`sdaTwb*4w;8Pt3u*H2h9By`$zF{zo|tVNU-F2|ExFK)rPPYnd1uVzaUNEB$~rGk zrbBHG!OzP%9(?_DtF=wq+rtnWVG`-k8Klx|uKS~){V5GoGhy8?<;p|V{g&{9=0aYT z{^~B8>Z)!>%1GXj*bId3`MdQJart)wb!Jn1zTMt8aV7Q!wc?yV7){mo*47^AkE+J& zv+H+vMRj~7qAkUVS?@m6GU$p)TUsP4m_C4-KlcW=o{P>^-`*0!{x$ist5ffcmd{&rjt-q2*HI&i$_(}S&!Dd}*yJz>qkLKi~u&!nX_rS-Dhm1(| ztBfywj5l}%x7*gd`AP0marjb|`3f>ty_AgCAHaDsVN0na)pctrTz>4t$HRqzK_)hf z_;5wk!<^OV3 z+x?ZIr<8GeD4~ngW3aLZ`+1#teP6QcF3%Wa{9;ym*5ZoO<%N`$F=zW&_>>iMZ+~IV z>*E}y`TlhMC9Wi)h)zvH&EdgD+Ruy~X^wmligE(Ss->E4)ZbyOUU3sGBr=Ta;dwl98FYND%q(%{3wlE6FomJD7#q~2Tm_gA+KF+O?X@17h#BEp?Z!9unZ zRI{FT4-OPN0Jj{&iXQRVBb~2#9;q7v7y(C&JgV>e$IhB%Z z!FG$=;p%TGy?qZbo!;5Y%9E4OG(-`#nIv1D_y~!ABlIA53wNqKZsiqn5QJH(-Lk35B&IzZP!hfNX@@DSDZQVaqQ+yi&G~G==Kk}7`gb`IjzRv&E3^jkrb_v z#|5|S_BuR0*Ot$;JDa7Bht#Gs6DOJe_CppH!c#cp1&VP=^rva7X<6Nk#;!bwsI64_ zg)we5L^{Q2u#2Z!No1HNRlSVysnaK6`YUevMGtrVl*EF}QsP4Ax%Hg~K5K375B6PV z-dH|KnOS?nW&Kl*e-9_>sZw>tM)!+^Il)0CA=B5BOD;Fo+gabH9J3$uLAl&Wh=-1! z;F1YhvsCqn7KpPr1C^zo;bXc%GAejej4t~4)#n1aPC6gA&~&P-m+t8D(WuQxA!+8E zQm;g;1#{2hrO2XthtGqL6-9hVz?Qd@}vJ*G|Zl3+= zRa8Z?lR5hAnqp?Z*YGT{P5pi`sXgPj&5u}*tt{#$HL4m`rjD&x2^~p?rq!xzUiwl- zHQyTPC-}^cJjvrQ=eOn}89qLwev|YHcOWN6nQb$rU8E*nq(PbP?1^i*gF4@ZjgJwB zCknyrNN59@fU~$6@`{~vlDYCBKkl1K^W5KfIqw5}J250E86>~Fk}7%Xqas0R@4WbG52!L69y50SQGy>QEvrCEXxW z(j_2wZQu9K{O-)1JNF-kfpd1Ocw(*fe7@AEBgCylJ4m66TJ(V^ay&CO2a!`%&PJ^& z@Sg92vb*BT`4cw6ZzyP5JXhyXOs1)V!Kc9Y=gs228M-MM+6QO$ZBIv*(scc;G zSR40teaSyQ#~l!CS$V#+e_`Ly!T>$JScqBt&OYy8=jiaPZNPH+rRH4r*K*5aa$UM| zQ1GvS_iQ<+r`4#b^=D(@?bR^H#4UN9Lcz{mw|w*Gf#6utRN4iyjnc%`;jP`6eVXK4 zL?HAl@M+PTS`kmWmiqdaOCnAI`93j{P|`q4mAU}B^6*U6JcI_ruWx9W_wnOZ;dCOy z5~0Zy1#*fZcgv7w-MR-}#5cdx;g6?1ZC!2t!ekZixm7fKvJo+tUv(2RM?Ha4o*_Z6 zJn-2{_E~ZFH7Ng4Mty#A>l3bIb{sU1BQIUe=M$Xkh58>O(N3ARyF0sQLTt`03LE^s zW|pI?R~YV&U{QJh6wraZid%TMT#FJv7#3r_rfOn8bU$CHV!N|0gT2Y{D14GcM|!$U zF(BYHqT;390mr{-Mn=H$U_NGrbN=YkL+>0H`Zl0~U(0Q#8z=bGT%4l3KfB1M*I!Rc zp;{!F0)n{jhCJCxNqH93QKYO(GWYi-@tqPJbVNVr@mBg_pzs-vn%c0^cHOkzubZ~l zS0*@3qmcv8?m6lBHA&?VfLggN4__pPJDD)&B*4(LQt>@Wjvw_{8Gos7>kq7&sjV^8 z-8J`&%+wdJ?3q`B9IK|j>l{A-`xQ)-YF}mP_LcGn9d9;?p=W5YP;7*w#cw2Uk(j1v z8EkQ}SMLUKR7jYq+&=re=_v^5(_$GqM463^L=|6N+&}cAQN3GQ{)NzcYtBAx0xwew zP(sE(T*Ke()1_^ab|co@qQU4$8;{@M-M&cs@LlB$jP^utJpw5Cr&M?%!mW1t(PP(B zA_Nx^RmAnku)Rz5zy4KX<`ohdMfWd+v_vLdtaV>*yFcJj&^8aIk!6Z9E_y>HS4xDa z|Kg2Td*@K4CX~FE^$lSnB@5)d^34L0w-hnv<9gNPUCOr4Zb`W{v`8N%b=2OizE^qL z(vy&#u-q8XXhC(49|^Vjyvz4IyXV!n_2GUfM(cgr*`=dQOtarkqX_f+VZ<8_*5;*U z8rjc`R^AA_n|&vrtHde~4$T8Pp)9OK9sIlUH=-!YjY{M5ye#@H_%sz`V=A9SSB%2B0HzL*%kmd(1Ki}S;eb}5cFOQqoyA<&+ zD2IFRstW7x%v^cA@p@H8m)^&a)0WI0N(tO)oC^rCz%A?{GL3HI`)<$+g`dc&`jrV6 z(%R);`g0FJ0Uv9$_KY(+V&90NEz41fcY+qz2AdK?+rhbQCGMnLPF31eV*;qYj!1)h zE#*})HC~uJyG_Et{vDbFIim;~^VGMs{>J5mT}ug^O8iKl0;ac(adUstt`(j1uy!mh zhw=JqdEpxlCB}T~!J|Lvj_^U^^elS2(RySbOz~m&R=*a$WVGsFihp^T#ZDxF>_)sx ztt}RN;tHi}A;_;}lnYuF@ev@25&W`2N#Pm9@Qo=9%iN=~Mz} z$`yS*=8}o2moceE@pd>V^l~$wZd}S`?O*_s^G#0nE!CHm}L2YuZo8H~hJ=pI@HjQ8F|5 zFodKuMHA5bFs$~UQLX=SZ}QHunGwcyUKzz$@huy)XK~nd2kHs=C zQE5*FydsQNM$1j!+ZpRe$^NtyCLMeEJHJ-jirj#fz(ktmi3!hb?;Qt+f!AFGHuATt z?8S@UQ84phgeLCOdD*Yt?(nDS%5r%8$mQg7kgmiSiTmFUR-2M)`b0^}aG#XYiM5#@ z>u%Ola+jk<;m6f^mReDBN4)9CUmnP9?xS-DXg|p3HM3dng;!2QgBZyGmW} zgn;q;8Bgw2OHmWuaUWuDpkXsUkLZc=oo_p1%P-B|^Q%|6l}=dfXp35T%R!7Fz@cvG zW(gdO-Uo2{6$M`QHxg4e zCP~wEgIoHnQ09vm;sN4d}jvM)RZu*Ge6GGUsgl1f7EF>re$sva6HEa|v5Cs?+<$w8dUCVxF0hft~6xV-`FlR`>V2U_P3Ah_IAN!N7 zXcUhTB7$qecBg0WZ5q+ZVeeJKtv3rbe0*)ew#kcESo{U1raohTgJz{GW$sU!on(_V z=}G<6&?{N=V1(|3*V1L3%#zwez6B;bOIN}`5)sSKunR14#kT%@uKD&xH+}ugZ?qo8 z{g4JeqO~`_FD7%wS_&$@sDBqj*IVqLAD2)C+36y*4n^n2+~<&dbH9Ag7uuT2UzN&s z387oCG)~0gP4H}wkOy~jyiivBSpG_@dAuzUKcN|@&QSZ%nccQ#nBhkZjWTR09e#N5 z<$O}Ug>(MpwE+bAL6_QBo+T-qt9wlGIuVj9&xb~)%5J?Oto2;C%5ZX@UyD7Xh|}%K zdFbIxoMS=m_BJ|rC_|7SYCpuD31NxiADJiFDJO>WAuW;5i|a$PQJqs$X2pFu2o)c` z>tKI{`!@(`o%`4aB{fqB#*gT%B(Q325kG&|galrDE_1>5-Sjn4)1t@?84{&X1n?6Y zlJ?&iP44!*a9uSJ;J4Oj-RA6tJj^~$%!~Mm2FO+)J9wk`rypml(O|Bclw(+oh}%vL z*(}aCbLJ|kNV+>fqI2druxUj=z1zsh>)fSzC&ElJiR`NspOdKnM$`ZTV)n7mQL)CG z{_W;XYr{o0@PckkM%@L^Yet04`C~4Q_uAGLSd75=!AtCyD;6SUA_Gwi64iRMKq6NB zNO39Wr?@*$&vabk7=s3sW`h4cGY@in=x6ZMhYZ9)Q(Zf0KZ4Lau@tYM%8B)N_D|mj zDMknhiT9tsB}V1eYUM^5zc-Juz&-y{Y)ze%5}qqH?9#b!+IS|P8Nx<7x3nl$dgIEi zQT%&Y2}Z^QT0-NxsauvU4o-Mc zsGdJ{{%ChlI~=v zqe3sznwy?bL-X$$;#vmyXd@d4FG)z$TqiV8QUZFtseu!+2R*9En>`dnJbW1f-?r-% zlw4P!`BL^*=0ErdIP8$&huvt#_Vjn9mai?f<)PV84eVUI+{|Y+o(9({{R^)f5XgmM z+#d}P_NMD&=gX12Qezwl8eVm2T8I%mN&iG#yO{#HVMgYVXV^&Qz0+w9+H!--o!{O ze6j5$j+Y4B!#NQ%wB-P|+}*@0mOuXBLjtxFJiDYj%ww_b-?J8|&cV}!rhh#m8n4%W z%>L`|6uCv${MI-M9?UxYa@1@c{6pE{I`?ODHL;V5Vr72# z!u@3bi`0$#U2UbLqDV|8S52-?4RzeR#^7DYtvAELF0w6N^Lc98jJVMBVWoZK*!Zr2 z0sEJKF`{Y-`mF+HFRJzY=+MZt6%Vo;-$O$4aobyl(}4$OcchEN$egb>T%`;GMJ&VK z1XtRXZqj_*d@TwQRs}pO3Kn`M9EJ79=ywMC3i9Lbf#ZY5sYFE@RfmsVe6;qO!m82@ z-=YndFAneV+*SSlKIw{vwg4k@SEyh#8LF|LPvy6x<{kl4M_lwnlW(%^Qj|yor5+BP z{^0wb($z8N+S;bC9kQ?wP_pI6IDwFrn#~SX;lIZgGq^5SQzdp0g}B$7B_pw53*kJ; ze|GKigb$Jh_RDz`Cs!o?A(@I-yNs!~EKAllW&d=|%k`8k+Qm#!ekzdAsvSiGhT(hu zP(qdMrI)JD z#+QdI;{J;a!N^JBsH_oNNYd=55(tK$Bm8EO#hp5C!BTq&-ij1 z9L<0*C>Jatq&Lg(n3&|g?OW?BAnaFfJA8m2pYp8b>$X;|m~8n+s{^^&B=Jet@jlM^ z3Ub@G^V%X7=+dO`z6%|}S&X(&;GGieun7*Yr~Z%j*XZj!ZlQj@TP(kvB*5q?ncbHE z(qGZw;sIOu8ePfb3TDLo_}_`zC$@YIpY~H1|B7(&8BWu)BdMQ+-RWSX3?kKS-*nXC z_BvHRUtHnaof7en9uuh%Z)^*Ffd6SHx(nH8l256Ot$>l6OBFZ9UhISRh|2Sy(N9qg ztL^3c)7%hf@Z6!u5J=RhJ&S5#Unlc>b^|ij)2MYyE5GP!P*n;zeq~X8a6f7SU8?^W zV()*~Qn0dUR>$BVKHV|BqWE4p4>LIUgi4J%Z2+?H zoN@mK&eQ|80`)mKjy>{j$g_0t_YN^4eKKFz+E`uM+G4Y0@)Pxb#9{45$5zx?$isDU zX>9I{ng;{x+Rs0dW!>OiaxHz_;v zq((acg5cH4j*Aun^bKNY>avH^%F)z+ER1?q0(n`xA-`Q6QAYoA2#brp@x5SyE4*N4 zCdrO_C;V7{nyh45LZIb3-X;u=)^$l=CN=o5$p-3XvD&e9i>=8QXpsfdw{o%0(@ zJD;eki|G|!aFpFa1Dr)Wpuc?w3?*X#FB%oD+0Ym zoMJ1TWLzQ2J#s|u9_z6^_M{xg880<&Bl@k;d`I2V?vGaWTd}31Sf7kmEko@C?!WVC zdN>x>ZW*vlOHV9_O{kJG$h;M~!8jikg|Dx#U-TP5`#NuX{x<7P7a@UQMTJt+8e$lx z3@R{xER21c+NzlHcVZV#|FA@*S)jYeM2fkFjc?Fm(<0#4f+cIMV$?95(Y6w9vf?d| zidVpp6M3(xep5dtPj9I28SB(`#Pc${#FV(g!KZN&qafJm7NG}{TlW1X5J zGogSz19P5DYl^i)&B0Nr^^J|jOJV@JeK@9_^e9y1R*1!_f7@e2A_%GMm0Mj#I&eqd z!N9d?wB_!1@;mJe0Wk^K05dguIEH~$k%z9naJ&|C(oER^t8UDYr0Ru6_)T&BB;;qPIq@VT({eG1c+8J=#?z99xSN}2?-%0 zqF+>Pf*kLx(fdWjr_k28*TO3r3Kg>4kDzhslJeXRDyTZFVwoZW%`w@298ht@)1rUr zMQCv{agw8?w>TA`VNEH_ofY#u`R`xX`!H= z?x_dzJ9@7z2YpShyCyO#oN8YK$sHGcE6$2t{rS32(UW9jG#CJ4wFaGY1)iBgF!dQm1j0Z{xBv-;Pr?JnQs(mQbMasWjAGD3*7WmW(Js;oF=vRHo0Pzv zC4N7TDPF?pSsR=0hI5-bB@>Y$>HV=LFeMETo2`JNM&_R{$;rt@rKO>Lb|ItP?&L3)Ki=yNXsbo(PV#n9 zTT?hkI^7FOli{+#kN+~xXHVGD_ke)^rig*d<@~vu-`@PnH?7-Cn6-D$>fYIYLQDln zQQU2OIeb&_Rv8@lXu|81H~Vh+lnUa0UwPiP=Fw*R9XGZucmQ$W?8Xl*n=-fyWokv_ z-E=ewjmfyT;$zgM`2aw8UTiRelmdMHuL5RK4Ba(*i1t^yE+Tco!Fw@`8o!~{mH1T6 zq`~TnthFtM!Zs9h*+7Wk@Nhf$%d(D#LR-(%fpbbrxVFrFHU|Vl?bjytdb3(^tzTwM zZG8nRZ7!uvIQ1__)U!&GItYYxT-gN!iAPNP`h?`z!()Cu=tynVFf*T&m`sts5{a;) z9tZVSGjoB$^x~Ny7`7&-1xy=OC8ww(5;Fu>?$c}k#&9fa82nqW*OhqKY78Iu#8r?PskKtXL@$MX>k=7 zc$Rj5HP@nhJnMSdD`RqsNk!CPXTET7!;dI?MFCTIeRH;aII#ecBf9U=Iaj?1((1;z z%&iHzh#t=vi^8^+FRas_pVKy#N|pFXskL=-*jb` zAiu+)!LX*4qEJGs{F&h*lWCF)xhg-}d4k3E3CTvSh*LiQ7sn1-^MBZCPRx~a80QNh z1q_cFGTBCdgtkrf`c+Ob9J~k7vhWp2_d>*5pU%bfM`1@Fa|A7}D;YKq3mU=FB3wTEHST8RF zNgQXLtOLy+Ct@o!MjdZ4BhRWoXY7>XWhBlM_Q2K*C{EnyA`jJv0McM$%+gwK>*}!L=S+N6!X~$iRs&F+$c@P zUc~gt3`gmB6@#PJ;71pHM9wXC@<|Lk9a}$scL|!0A+68{lwZFHb%ok6Or%T?7n$Nu zU*YBzc%r@@^&ClP8B-{J`-;=ka6@{ETkI}J1Y7LobmD%rSVnh?olhc}NOEOsBHUp& z$=uWQby44Kzb{2upGI|FDGA2Y%tAhd5E$DLReVA6yB-vug4L$lJD3+a=Q$MiZgItv zlCcQiDDXsV&zJvu_g&``zF_qrS!49OF%dA|~; zuz2mGAE!55D}FZ|Z+Lp%h!|cm?1k_ZkoTp3VojI# zc_|8=58lbfal|xi>%X$~w_;hj{G_{gGU^zSbO*gPWib_C{89QgS;FfIyjz5$qoYM- zW#PkJS=Ndm2dI7g)RInWEXG0kka$gO&pmh`%jqz{e1w_eOk3zwYPDq2HELCWHc6)Q z*`L2|7-4?eW*Lg1&!AhG;BKX*W_oX_K~FunuPx#u;BtA}AMeS_NphsD0jENZPovh5 zg8U^>587GagaJ zntqF-HCKFiWW7eSe3*kW2+H~!f->|cO$djRu%dlC=v7Q&UZTzwL_w-py-H+vAw2Y zO^9I7VZVO+S2FEgB!1wF%>Ekb9WGCcNYk%W zcYVAS6|aN<1H#G0rK_hG0@%HG&+bKDjnXX-(H$8V6X~PdIhp-&Yt&y{_d!Zf-D6{q z3zcPiV=Ovt#ey4zVCUP8`f!La=eCNR%H2ryfh&coLasjm$`u1Wu5f18s7co0^F6ENsEkaxftgIgW&9n~VRFt-lTT6oZ* z_?&6`2H zUCw^BY80^1BxGB+R9LJb1_pc5=nmZ5Clth8{&XXZ7LPs{vn;QvJ@2l3M$Mp+X4eej2LGjLfAq|TvUD~gJ{ zaZ+TF>`xASR=|PsX&KBgGynKK7s!e-v!~SABH!XaHYy5U-BL$2Y6)2C#ImTM_>)|L z1-TBl`lo5c5sllI_;7T`gcSILB0b!ZykThwx9A_(hS#y|z^2RncDOxXOUUAX&mnOn z&Pja%F31GIpVXo`5T{n4#-jYh{t%i#q(urw!mZ2gi8euJ5n6Qyi}V9N&7Kvwic41x z0r)aCm;g*9|Lz?OmOpbX|HofaTEPt|_yNQM{(oT%V|5UjUN7_!FEmn6WgjsQaIdkj z{uW@&07)yQD!zzF`GT-%2nL(DnVwKp;I-$3 zl!dg_fOn0QToi{ue9R0}JOOkTz*ZMPD!n7|vx_ae9gRT7gUXGM9|d4GeEl{cioxWm zV;3NB0wB*rJY!ml30LfY(?_8HbET(Gp2$Cb%#r1_2=ZOPq57Jq6kXxAb`R#^rV2j3 z%E<`^nJIy+x7Yi88XTr|al}=0K#(bW^A|P82VkToP<9;BVg7$|u5dtjR}lz^AYu3# zdgYq_fPKxBb`oTIQQn;(@TIkES=-N?rGpM>NB#TgL{ev7R8a&Dh>`@Kx5+N z-rn=BBk!He6Ve2{b&@xUwGXRdr4_+9~WUzGmT84ZA~-p4v-z$F2cuwaH3+ur_LCr%iH z7H}>KVSr{vWhZ?b(9)QHUbh2F1F^9wA7;SvG6n|8$CZ6{4@gzWb7f|{5Oy)YUUVt= zoNU$GI`bNyyK#dpvxV;y``^^HgFn0_qRgMAt^LrveUGlhsimN$j(T(~Dgo!sxa%|u z92x3dH`TEi(tF--6zD=`$9>#~V|Svgct5EEks~WzII!iRUTyAwI9mH>y+lDgrNbtu z!)9${gL&wSel*!9HQ$oo+4mPuNylXV8!!|0YvV$_)C(Zb>C!Nin){wb^KGDFKD4jQ zhmcG+Pko$m{y(XgQ?)jXv28B3QZ@h^O9chkfa@RqOA_$@6bIwM-6x*jH8G+2QEtgM z(F77FjSSJ-d9Fs_B+9#YC{R@t``?d}jwxVbV`E{==J76&GlWIY0Qx<@qJq@Gz@T=k zdWs(?q?ox4DtY~UV-E&Y^y&Vp@@_G#7@5aahWwYebSH7lcFG5R1+b-;R2E(cz<$4L zZhnJ4;KZ6v!h;aN(J`Q0Y_Qz2`H{H*D5BII>2QaDa?&!|Aq)!>j6+ZXQVlKt8IVJQ zU9z?{H;;91_y%7TTKf^3Y8k*j0#gNhH2_fj$`27Dr!at}s4MvZqDFx@=0Cr}A1b&F zPqfHL!SMg_VG!I!MzUvhxHJEH^uDxI@!>-jkYOjhd2=Pbk8(?$Y2721NVs-$b>WfR z^aZh~Y1;h98mt{Z24UJu@~e+u?1*rU+gyF2vHD}rlOY$>-#q}Dc!N!_NF;t6s2I}$ z$f&KJNc@1!SXn?2fpV&=DGf_Toi2gHWO_nzZD1ho;B)@zA8`H$4DJp9;{^D=!L4$J zt<}!`&-3Md-J-XLO9TdXjEu4^s$Tu>@u zXI1Tfhi`V3{Km2P9pDWV6|d*dOvL^Br32#c|L?*TXuZ5=9cNipk;hW*EGV?DN9PUC z`ALWJ*rSJ}?p(9S1xgLu9I(R#jXVU7Bit?k#gBlvU1vL0q??bC-|FwsUZ@N^_CH=| zWRUQh)2jlZ(|yU# z)pr5t>n<43Or5^jDpS-@;ybXakP9+!7?U5t5P%0z=No19#Jnl$&!@pM7|hBC$ay3{ z71pl2Hl24C&2t-7Ojv5vE^wTD8h9fgD7h{B*G)}M=D7(?I=2ytsL;y*KK{3!9tBNJ zB0%3=m}vsnFmoR@fY5V5mIIcT0&uEmFfu$g4q2IIj|W?PWxo7-Dai$K3&Lxa(#Ur)_++9dOn?i z3lEC!937MC>;>L5SZ=t9Lp!vb`8zxRtrpCt#@BVi*1s-pQPQ{G-_*v^kW4shFPDl~3ooM#vF zJ>rICIsE`hI91&JNfn>6Ua0m00J*}vPS9w#ZQAjsJ?H#S_OmbPwtCbb{f`T@6IvyK zhsTBc9q4(+Ih570PNw8@ENU+R^1G6zW~^=ViJm7Qc3((`+1uIez4A`=np39QzW6oo zuU)CK`qSyo<91LgTnV)Era{LWe||!d($3U3->9wlycbHe+tCw!OtJ0Hln5VQi;Prx zU<{~O|EJMnDR|iP+8RU&D%#r078dknjYs!k^zrkUnP;6qS}+!ve;_Wh2!O>f*N&wV zmS1G&;tJ`oX#q2X+_GoOV`)&CLCMACHdyk5wFH#uy$gDpM3{tT+~;I~co5JQ4Y&l% zqWXg~=DdE8aa|n(Vi7Uj-D-YE6ZXc<0aA3txZOQHgCir$Qg%T70v3&`d@{-jgZF{2 z0HP(V8Cr$M)HPGFVy-`}0N|*&+#Sq0n~o?7nBfd#mtbuspbF`i5)i1U_GP2V5-51U zIPp`Xd-sCVpM81X)2z(2e(M<%5%2>AwCFRSS>XZBMI8ew(lqOujqARt12yo?sCb zAOI?UOnVbw!D#0AIhSFntcy#fr@1v#ls_TFu>g(`=^Gd5Kyd&03O%S z@b&UI5Ko~4h@WNGZJ)dI9!|@S{c}ZnE40g8-D*+W?)HldlUYmV^W(3nY1%&Uqun-_`&2ZGi(2>oyee<<$;GVuYT2acOBg zS9wD)(B!%X$i=g#nuaA5U0q!uQUndg`@8uv02l^X^%2%)1Wec4Kmcaj8#bn2zupDC zK-bsT2P)KYp&Sq)*?pn`6S)Hb9bv3KEKIU_&7Pyx0w zy1IjnbL?bFylE`o6wPtai_O3s&iZP&x_8V(_q)7#>o)~D z_JM`Q`1S(0;WW_8X_VmM>C4Q{-fmiYDKYKt?mlyndG*w7)qmSfd_P^9(_%N?M9#@>aqB3$lEPTmS$7 literal 0 HcmV?d00001 diff --git a/ex_figs/quickcheck_1.png b/ex_figs/quickcheck_1.png new file mode 100644 index 0000000000000000000000000000000000000000..7e089e8ce3da430be79dfc6f52d2300078359e8f GIT binary patch literal 37901 zcmd?Rby!u~*EYO1n`TRQgGdMxA}K8>D3X%WC@7NBy%muzQMx2Vr9lA!=|)mgy1To- zxjg52p5Obt-(TOqpXbtZx!G&3Ip-MT9{0G%oCc|>+`EELiw{B26(vP^4G6+OLl9yL z7aM#c)H^l@{)g!(r}PLH{PMzm5(xf|XRoN|2th=q@NdLt>Fnp=Lop`>T_;UD3ny38 zmro%ZQzv_CJ16VsX3Q>6UphXwvt_=?3;yF_e&*z4FV4sJKVRUrduhq{HnpDsf|wyC zdD%y9No(WoUYc4b(p##1x5hai$hL z4f(yhZwv4_Z{qWcmWP(32{>5G(1IY$Eb&~#iw(}J7W2WlssA6B3bVAlvt`p z90*aP+%Aq6*URpB8y%S)@63beTHD+4S9b5#doFf8;QRSzAV(*lf`*)3`J*l5&zCb0 zoFeIIwmO)Xa5(9;+X=pJdsw8O$H>rO_5CDo{bRA)wpl8p>-qlB)vH&L&!0c{v5m6( zQ)&|u8j3P-pa1aD_Wq-`)>fGSLI$~URNt3aBM&^+NpBH$oe|z?-arO)L=UI?KXq=P8PaTBYe+hnaa<0I-(*Y(JXh~PQ@Dg$lBQ4Nbx?g%G315=)YlxBjkJnf1tPI=di2;SRsIu!6$P%aQMoB~>%}1I%a<=l=bOF}@7|G~`e6GZC@3g&i_HAbPgPFx zzvSjNYN|^5^BPt#OGz=QTHji_SYbL5~&QJCg6%}hg-U+Zj zSkr^hO=fkU9wzdeHR48$8W|Xbs%PAtKHaQofj_hz6OBC`8XPqH6(>L~&WytdHZ1cuI65XF3LX><)p?)|-X0>QUaL5S(UlrOi#T~nB z!h;)r_KRlrOz%A=XF76Z?MR7bZ}HOyRlqln4o2;Yp7pZEDYNfwPEM}R`V+_Tn_^rX zcl(;nGzRIGSX{CA759pQTiax~(DZYDzV=#;=b^2{`QGoN2Br(NUClYA>$o!&@So-g zric&RIy3Q>DRqmfzTp?=r?aJ_wvMANmcRwx*o>B%_NFW7mRi$<(~G@^T^~4ax&1W8 zg!>{x)lNINZjFy5qx&35lXUW?zSuM#Zim2$N0h(I$;xFdrn^7=0v;dAinA&j3Fb>8}OEunG0XR zM!vm5_er#@q_|iO?D)}{Nb$ zuL0=E59#TwQ0>}CNs&b-ZPn>UW%*1nH~h1j@b%rjv96AehN(I~2LL=gdc~NVKMxt( z+uOGq$y7z@cm0T@YFneO!p@niTZ93^L;yq$zg0hH05dVPQL)PFv^sbt)%$=3j?P?z zAEc^M$C>9B06W>(*i3*A9BfKHZ*2>u5iEStMtZt9e||XGB6&FGS6?qz^Yb40Tg<44 zgUv}kY(n~_y^B{!9FK3^(+vTV6WvsBOo9NI;qyHX@g4=s$Akndeo+T_BLczju`OO( zbvR8FItHdSlE4$Umog&YkPQST$7OjlTxydDF;`z+c%J$I=b{9^ODDU!x+tMW5YP%P zE~Ow+$pG5V0K^PljQ&$;+JJppsuazR2hChwY+jO|0waZBpoc4*bBN{OHzH)JKvV>u zzjSm2;Q9J!H}#U|4e`uOQwXb+l+l<&3q$aE=FrE14Uip%mOY;o>B>KR7`F81dyd;q z8%Od6#~b2FpVK^u83wGT1FalwJjiUhFYCZ2`L-*ifPlcvQg24E_fW3hRVeVYK%BLWmgz z;iq9c0OT0x83T|9z(=sVHC0DQoqXn}4V&rzqIqDmGW5~e2%zR20}uu2rKQ>qRRF7l zN^M538%4xC?TEpHSUEWE;j%Q2{cu!>V3cfJFPr3L^f@wRc|$BDB*e zKNACtGCMok4nEJ-&RH6-EGwUWOSd*^(VdcaJQ62tlMomYK_#5nS$zY%OF7|I+sB`h zo(JS)?8;jw&Kgh5u_Iakz=-28BQJ3@<>$8DM8&XCbAB*NQRcb7f(3~>ue|S=C1jGK z2l-9L#YOn**RO@ntJ-5Oqc+syPMm{zk8#p??~qba1%QYfvAONKIS~fu3jnjgedxqp zH`bOPG$M&4(#pLr&kvEGJ6fUIA!FYwesyRZCNE;jkfsacL}8nrN>@A2Pi%laXlZE? zw$SmLpzp1Njc{Aeij9nk!7dp=MYmy=j9916ueV1?U*M+YKYqLc(vhr&#=ARrGj$4# zmIm_@%|^j$AgJbi-Hu{(6F`2MjVr(~0e)J#YW9MPmFrOLaFIF1snV$9%a_ots>78b z$A&8+Q(trPU%E-QwY3p0&VcA5O6+_K5W#b7Vzwn>76k7>&BwD%Nf0@6^YbHu+B>H_ zlmu^xA(L+12YR*6tAo>nkL_UUv9{=P_b&GJ>(|##J$9=|Ef?eWCfrv#MJ5~fhm0ct z;sC5|Y;K+bT)^J5>kA$W{RCN1_TmAUHO$QdQn2^M%Dqor$SEjH`!bb+7r~0l*Goqu z$kP(d->QgHRi_S>$WXTO$hWTpW?B?OF-s6=jAmOnl`{W=(NcjBq(Z8`?Pz}WWcBa zyNlhkQTG^&T)(*2Ke;JZ^;X#C4}|{dxd^B7BEzDhqToxHwPDlE@b?F?j0MihI&i93 zOz|XyU#ny6?GFNyRZx)Zr{|%|sShP3B^YM9xPw>IT7(+)?DwazJ33mUr8dcoSFc@D zD3TNs8n78nR%hz47OosK($acr7gFL<`eLYSdjHvX^>f#~H~q<%{hF#`XL;EV6S1L# znzEA%08TdA1;uQuPROUK|6p37p%S4vLF!Mntdx^F=gx_T?MiKHYVDX$web-~6No>rZ1q z(JOw(kB1#kr^md1|AxxQ4Z&Bnh0&ENZP$$*&LquvFPwgRbXU#I6d;?5k`mtC2u2uc zzP}hsfByRu!aarL>fNapCf{$aQ3#nb+wq<+njB~AcD{FPZ1P&jWJ2#=o$y$_akd@d z+X#{x1MjnZFF+-ON4a(KF`-jbEdY2~+*e=+((ERy2`>#DJB6T?`=NlB-=?=aJy^G` zY$|z3nw?*0arjkIof#|)|cJB7qbUN z7sA%V{7`rS2Z)#eA|@9%$)``B9_@7SQH*Nm8{h#>^4RuA)9m8<#LwFmF6+U-ah_k~ zf%k>X%{_tahk1!^-3z6D1)|m*5SQ}pX1etV8o(T-F3%jFSXf-|vaw6inOT!?6nS8z zvYK>yyqFpl6*avrzy@QUP0Y>8l$24B-!>Y6Sbd`0YxZ1DB510_$YO+ zhn1I$meJJp_diljk)SyocV!A^lw460K&RsfqRAaLYA$`~#hq{;#PTZG+VX-FzkcYv z)Kj-ASK+wut1d~*ajStyde{$@wqaHd3Py5rauYDw;PuVwQ(jO;edI*U1_9)t08;{~ zi?+R^V;dyB%CSjn;n@KlV+D72v6KDPfc4Qb^Nq0|)RG?Gyw|2{Um>@L%_FAaYC>C^ z()PzZCEK-YEPQ-KkIQVEgoX_K0Q*f>2%&Ua`I61=x?v!((|WZW099mSVhEtB6B)cy zz^JC63Vc`&$LC=teK(BOA7<5ASy=!mnSCyfEq|p* z(hV89GO(*8Egc-&Rjv!8=tYTrPkyJW>FWned7m(Yga)%gh_iVIOCN;G-?@TZ5YYRq z5xmb~XHH4taLjROxG2NyfL$$CEJ57)`pcIud-5KOCRjmT#*F_2ipU+U~S^(WA*CRc+-ZraG?t-37scJuuA|oHgY*0x7gXmfaSUUHLusp8bj! z!~yAC_s7dD$qx2pzlRC8>Hso@gONXe{)`LP@%m-9VQ}#_S?vu-{c9a*jB7&%j#q?j z#{vtgPw)Z#S9tmIW`*k}To{;-l{+jA1=^c1m3n#{j*rmW0;!o^M2Sj5gr=a@fAKT+or|lU9 zfv4@{y`WN!ii_j4tvgrK$U=46O0182R`TpSJCXfqmSw5(oS=*dhtBSHHwXm@G zQ>K@v4bbWA4k#KP0xA)#ou?lJn3USw!oqdN+iqJX4*LT-AwVSxuJ%3+1|bXApxd!E zXHBMeZ%8DyRoG2RZI4b~Xn(UE*M|oHnLQA=O_0c>*YDZGsfyKnGN=)d*tOBFBr!mv zVdh|w3g}Fq=RUAiNDvuE@Iq%|2-rrrmT*`b7Kj)H#C%|2VCGA93Mq(Bs_pIVnUmE5 zsvMmHN>Iv^0y3bzwz}+@-IVBf3PRQl)U7b%&&Zp!!?6~tn91NA(f~OX8H4eK2|;kQF{pR$MTN((Fw2Jhh?Wck4sU2 zg=T;{aA~^iAfU1>nqwBIYXENCKPRTY0LTyXS2S?q5sn+gSij99B;SHM0N3B&U&?u% z6bqvg9mHwowby%3)Nuio|6_rcI_&P+a1j!qam$UevHGcq*JfZV;XIgMK7;>0Hg*=o zPjm6pj;*1Ziw|{7%lJr{{u|Mb16n%MfYR1UUmka>iEvnhh=4;%#nMebPa9M%Wv5`| zi14SkXvf1>1!%)$8CDfe-X$f4UY;#V!*wtqo;|?YFgyc>@O`tvQStH(F|W=nVx`*m zn}uJ1zO^+EED!z8R&6jVN_#PsuUlyHN;Ri~7>l$Ljrf%yM9>n>a0k@j)X>T>*a?cP z>)!y%jphg$b5(57;}Zcju?4{R+RIc>ZMzAX#|6GmmqNM$CpbMx!6Br_b#`p^cUk`< z*1z2MC!m!k<<7JzC{S!KRaPoq%g-ejc0sliRW$J4NJ8OJC9h(x|pp0+Pb7 zRB0x#@#UaOEV7@LL($%9^|`z}>%KGrh&){FeVdS8v=+q-gky!OLdGQxb@f*hKRsuG zXxLNbJMdL2PRd){qBFrCR5>PqZ2V0UKf7QhV7$WFQCIUltSP_qcg9xhGT|3}0s(y$ zR_ct@)pkJ*Q2z#xjNfiT;#ZOw8I#u*!pzJ}&B!SH&maA>WmVt$$%_M#&P5=E+;w*T zZuZ!~AZcMoA6BzHD2mLX7gkna0k1cXlcOW=l7u24r>o;J!W$r?p`ckYj-;+X|i;2J8+vn&^|$o!woA;@8#hFxo&l zaub%_@dbM&ytvJ7S9e+nf{fq?DjlxUg2!d~y$|GW=zqfnwg1vkfpJFo9k2=H_@>DS z9zMPnAn&Y~m6Vhexo*M%o(9yhI018PkR$^EB+YzEw5{*UdJq*I?FWchm3nt#N=i5o z4ByAZr0bPfgn84zTPjO2h%f;O{VgGbRn3iuJ+Md{!)F}sx%ra{L|&YrCE?D_4qPZ-q2n5_@V6{O(-nB~1@NBz=vBq$f zS;$C8{F#hu;{X0USp5HLLiFGyVakE;Plra!(7y6h1n;OPDwzBQk&>w68s>hktFj}U zb@S2toG3@GMGb%2`s-}dr|SDM=G}`Ij{ouT+HFG=ROrA@s;&mNJklelIi1pbOrRxX z7X~})4iVxmTK;1#)Pu9Uye#UpM0`_F@Xk0nYkFZbidp^Hf3`t5pTt3I_*bye@J6Y> zMSyVdN3}qt{@DNBJL$pULBd9v4n?F9>0-YPY&9OnQ6|TS*T?xp6g!gh)<2WwzqtCp zMhZ1y;#XJvU+=s8`zgr#wj%y{ z4+vKpc)H}`$_P?_%>SMyXz|Fs#gqB>`|{*Z!D!0{4o|P#RHZC3z*b;O8Iu242lm7O zKm7Ic#@C(Yi=8Mh*DV9EuXC=b%Ro6lu)cC~dMr&@cOhu1(r@w37%Au`k%NWSzhUXW z!i*0cKxF^1Gm)GI{8)-4iuUBpe^-f5h7C4eevaes6R)un|yl4AmR zy7)BgP!M<`NX*eL?bntW3F(AdWSv3MX#7PQDH_&^VRtuMr13gnYRU5-YI4pMIi7wz}7noA%?` zT2%e}l94z>Ca#4RmKWIje_s2AFH$~Ao?)wKh2-C%kFnkrF)Pp}TK!7(P-Z5EA;x8L7e8zFO@aOcnmTJ_2&5`1H^%|( z=w7AEx(PpZp~t>efHy4C|Jz#dKG~}Syo&sgii#=&Xl6@2ACVUziDv*sv(8E?>M=c1 zG3u8CDQ>qzx(VJYRG6NQ0hE+xAgz6Lzx=9|!w)n|I9q!jtknUL2j=N>b92Lo&nK$H zK;8*hTeF6hvf85j#Ssk{}6-S!$VJ6M%sJD!Mr6e89g= zhVl)m#a?EbodT__2#ChXJ6JU$IKRsQuTXI3_eE$5)jC8up43A0h z6K%RxFv|VLE(~5yPV|74f|b=*Gfyk4>p;W6fkeGeZhZoZJPYO=-NGC_4FYGagg!)5zG()T>v*gw+!@67q&!2@_j zm$%+4`klRQ4Y{g78z9$1zj&Yg7D%|IKuH1e@qu+Jo3!*K3-{6&z?>VKnvOHv zP7VVmWW9C`3)X}{HiuQ$z4cKlk#Q%o7+%BRq$GNYi=){vj|qSWaKEHbKQGK1PoB33 zM0Nbhy)X|AToNpJ&p}}VM5oW^rMIN)fJ~@c?Ii{`Imjs~VWQdE(Q&6cMY70cUEc{5 z%Kuk_6{n2i-a+AUyi&__SaV;cO8Kv|02qYocLLMZzzgoYMLBq?CxI#)L`KoX7@%rvO&=>Oip#6x+8x=O_LZT;$Wb zxCyNB6{ye?;lHuV1iE&!JsfEcq|!Muq=?=f})R+DO^>G}h5gpfm ze2`b#L?t!{Rk#t11zGy0E~nF6|R=4 z-r}i!(aQ2~4yj=-2A&#bPD7nCD_>U+QW#iA9V?szssFVeQz7v!tdN&n<;22oIHCS+ zMvxYP1p$KZq(y*e?)%*R9NwD0iR4of)-NcM^+A{d&9(z2|Gy6Ml!~7ZK)ux?E@_J& z|5}x+y_O%!#Leh}Zi0;V_u*!_9`d%Y3>Y$Zff4IrOIzo{4$=-a$FJ}z{I^5sjbJI0 zDAOXTndp#cfQ|fsob?GDW2xhIy~}LXFfYP@X3ST46#$=t-73gQ+3>{QZ;tDoqG7ED zcBHgDY@vTRNQn59c|0nZO@qo_&KqjoP5j4H59-tl?eiW5KRHlhJ`g~Ew7#P5^moZC zyaL#0a-~d&pCzUQ9rQX3PP}6IAKD4n6vC|tED>b>pfY4-0R9_#$@y-$qc#q9z~R3c zxq-*Th?(p4a3#94k7r7)^etz_UkmTE5mWkoQq*|ChuI1HZ!^4f`bxC>tIXDq`N(Ic zZxcD6{T;!a%07@*jvUf};+_a2rl@7oP0V0Rtb{UEqPgA;6UZh4T8uCJ*KNB3?MZPJ z@Dm@V4eGl}aok5RQz6;^vHK^LBL@_-La_db+gnW7c;fIA3;YCeREojGlrI{7(06|a z%L|8>_eIdBN=hTM2OomR{%a}to8VXn%ba1Y!@e^l29^kilLJxaLg^34rOfH^clu2k5U=}9k ze|H0lbv`pVZmIHA^IkIU-Aj; zZ5gJdiVf5<+&Di-v9A34BRm7HSFFl+sQ6*VjsZSQCY2i&MB^<~K#|chV1driss+Lk zyr>X<5!bQXXHhbwOfXO1edb_sG{6_~Td*JAmqN(5LpSfpMb)RrVevTs^9bw%T<&;r zMmNOH@q$<4tz@3GS{%s>=|`ZcZ2argUP>xQ3dYCnbGnpB;m@D z)BhbAA2K%&J-Epa!Z%8;nl!K<3uPaNR4;@F|3aHX0htKX&{ed$Ib`CRree|mw)a8O z<0z8zzz=_I)^L4wD>0OxnzmVyL;>TJ$0Xw>vNgU|z7Eh|#q3PI+6C&XW-CY;7esUl zgQ(_f5(44rQ@_QrjmO+{Gikq|9eNIY?7tHYLxh@B@%sl|dizp^FUHOm9lHJ)*TLUY z7Qm)PVUN7Dhokrva4QuhM4A_hx#Vi5rDZUm`#0mKVOSr8oXrigXf_0Lu@uQ4eNq!a z2Yzegcop~1nM%psMkd|JR<7$^3G5k5RieK$P9@4Pmle2j#MvFKru(OhH%i^MRywq zjOph)8c3Ggnu2K~4m496H2zu~OXra*B2k&0$Do`TKAZrGZU=B~${aK)6~Sp3=ih<$ zUwlFz$yuY!R*|*(E)h|LKTCw@9IK_C)ZE}(P1n|bsy`Vq-0g}V<)c{UTz;~@j zOK*VlRs;}ydIu8a9Ry05iW;dS#+r+)1#Qc{jd4I+x8E+DgPun{IBo$S z*aC+saQv{Z)B_b9NC6qc1%-vgM3Q^NfC0@O~s@XlQEs!M%aG)~Npwc;s+lxvk;tm4?P3vT;U7u$9jSbK}pYU8pqE z&UR0Y*Dk|D!ekA+u?q+5L$3PcOlQ)Sf2$@qI+$2r)02kyO8`23W)+5b{x=2Ad7S+c z2*$eKttjAAJTD4C>W zeC<*tQ0Py$UN|DtUR~>jf|qq@!vVolXK7n~A$6bF`&b`e3}Y#ilanMOtOveQn3} z+yD;x%fk=g9gCvsYW~ht_Wxg)HPD&h!*$=jk%N@9VzRK(a`2bQwOB8Nc?eSl3jswBeBIR*iz3VTpF#eMkT56*G@vFnZ#KtlY_``CDl ztLYCXJo!L#R=~209vl+*XxW{jTj_EG2>K4Yc+xTqkXb? zry>Mb**gq5bDEF)D%MiPngidnm6%=Xo+`V-42pOVF$FrKNinpMaC;c2cmx_DfoFH5 z1`A$T-L&5K9{@ll87x3;qV7%gfTq@%@BL_5at;wRdMm^=E{*DO4P{|dNRYeyTxB2w zl3Ed@j#unh9)|kYrepH+0Ma>ie#T#b(kgsVQzJ7E4s804YJ|UT7^D5uU^0?M)c5xS67=gr8pEytMg8$`ZhibW^VCQ*^8}SMksvt5@H@~ z)VL>K2LMPhAhXD)?+J0tD;CM$?GR%Zw?cD)e?mw7kFNhgc{b-%r7VGRQrzWuZ3q`S zemYQS75dwDQlic@@h4$gj{lpWRy#O*N0y+ND_ayJLjPW_Iv~@(4grf2C5Xf5WP}pH-km!R zVi`PApDkS9uG3+?d)35R7eCw(WGYyO1A@-0z5Q;>BEB+eh&US`x*_~=Bx4W?qxm1z~_(dO|ikcV9zoAr2(q=bQ8$K`QdH zO`ueOQZ9FxFDg80h9WA}KK+UVgof)j#ukRs$?V$usITD$#Cu~< z)Xr-%!TZ9axKPfa@+p}h)Srh<+7QH?soG_AcLL&8QOwI9nm(QN{#Gd+wA&=?jH0$Io^%<+N}|@$*{gaD zSXmm77UwK?(z<8nnPu|oe~@}Yt>CG^PWChvC)a0D&n{i9e!D4@6@aBhpD-7tC%{zh z--ikYJ5Xs7P?EjP?Iv#^?0RGhAu_{wkkoYMYM9Z1S>(fW$qc znY9Bt8OAz0@g6dD>J|<@{q-sCWL6MjJMW=gyiRnaFA~Wd3K2&)QGAEcF-jJlr&%GKI`^3+ z#oS8CA^t6q8|p78+&mt+26#)fK^QXc#IUc$TIrz@IB{W@jrST$6``pO`QS_@lfYqx zgDwK0B>u?2cVeP%|K{1|6t~dfj|xM@DB!VaY@ zwXqrB?Q?e_mxR$UEtq@tCi+GaCR&5iAFNq5v%XWwp^J}0#MQ=MXLC8s_DdUY#5IP7 zXw($bWeF9;Kh+*X-M@cRHSvM`{@*D{$Z&HO)D=)&-%1>OI3}gi4wWi~4pjZ!!AxsI zGAaZ!G&Yd!H)?)uUGyXSf4U|$>P%~B>1}ST!QIrxGo_1-&b*^hxL}1!%cgCH zJfaX!fECHG(NZQX&t7KT{eW7 zHD<|fMCN-$t_7qhuCA>$q&3PTTj`Qj=HdE`5NhT9ETgUv_WK-^q4cD(b968?ecDLZ zia=DUEnJ73QV{P~pbcrwdIweVn_Azs>AU%mJ%#vz(!nxFnUY3bgWl_Sko}f*5Ew#F z`huBt2d?;}HC=nZ@~QdHcL~zr6?Hv$u1Hfm)2{E8^kFoRlGI)0+ z;R(gjZ`K^Y$xG;e_~!c0IWJblJnJ~D0D0o$f9%MoxLYmuI>M*DXwv0F(H0woi7qR9 zB9iVepE45rIJDYvh}*}0t@8#pDPqI}ru4=?VHl zT@?2KWeZKH;AJI~?}ZanDI>Ri9NW(2+wkc-XAWTCFF6;k6>%|sWmaGG~Tz8C$ECh~*z%KeB4 zEl~4!iHG4_{`~#X+TZK!QB?p{#{wbq7?<5`xi>ze#lxh<~_p62>-4W_SSF1 z?4p;JgzPmPo93)`pzhoeTuzUtFG$YP#6jGlI{|+TvLN{{H@AW_Xizw}oI30CCwOiYv;{cX{epVH;PvPwQf`@E7 zfm%Wcc`266vF>XoY#OdF{X^!b`d4kpq7MIJ$E~&u-&pd1Q+@V$hD!8PL|Tp#vxq^y zgi&q~lQBL|&eu=#*YuP!iofbZGMdsr7=uW$a0-|~V#7i=cxKM52b^5rco9Rr%J0HA z0yIS>2X+ae;>7dPw$=zftQc`5XLmb=@mB)qT#}_NS^#Z881;GkOyp&3Aav_}7Jp(W z;drfZ4jA)4u4b%+#&HcL^-5ei!2}8ilT#63?-W!SXKp1!<*!?*FMnd{aC=Ws3~16o~y{3!f#J53jl;Htc4plDplwS zWG;aZJv8c9)3kt&xA(IUZHD9l66S^Zz%t#4R6K6-2W zg{MYhGx9S5l>EWd6wGZF$my-!Rt8Rp7u#&R98pXWhP4-26l3zyuY+mJPCr+oYZ843 zD$0}&Ufk9o?c}GqQaFr>-~OWKlU`MW6i{{g-IQ4X_C3P=vj#x?N=Xn+txQo+YsYnl z+s1Fl((gwcP%u{cxJRBf_DmiD*#OGuMGx+e*DgNDxpipDAd}uaOYx>Pf=4^i+e;&< zZRrrm<4~I>6}OfBYK^dY4>#SJi}G{Sl(&dey=F$m>3X?U?0_rs_7y1k31ovp1xYl6 z-C!DMwnQ zW`@g}%5IbSF8W79vD()QN~dOX4dy;P!+HB%Hie;TIhJ5J*-_wgx6&LRs1)Yz?Py8; zLbUY=8MNx*^whBOT=;oC`|=Rl@ZrP7l{hh?bV;rV{I$mVt0udwK?-P|?-D&egk8Im zd~`4+=Hemb|Oor!Bg=twXc1zk9=Q`;=~Lto&Bvi`<>*PLxU7hTO)gdE0jr z9JB(3kehD{*ZY3HFAH|qw~}BKsvI}QH5vL+VLixEsWLl?41WdC#hP@A^FgvCAK(HJ z_qr-k7wD<*D-w^>CpYKocUI6TkI88OHZn^}GJsQ$a^Sp`OzL9lBtw(24pZ&$P$}k~ zd#i8ecg=Z)WE|H@l&0-QdOXRV3x$72LgUCK!*j!!kCbbNj_tw95@jb)%E=RBnQ%Vi z>mKdFJf0$7vl%Z<${I>{PQIg&$%tEt?+t$)Kh-)q-{C2XAXr#4aWAXjFkGwKTk_O% zv)xCWEzi~_{hiRt@PauN1W|DxbMqP~4!xod>*Lio$M|WeFukL#v^4qy3qpV3KjR*b z-I5e*g3!(ZT{v?`cL&6l^4*FnsTTqo7S;8xIwNpgP-!PEeF{YP`=M}}TElE!GgWl+ zQk%4|c*td8<;9r3uPFZ_r2;lg0VLSkZDxhXDg+MD)43s3~&(UsWGpAUFE^|(XY+jDM|zi0z}Kt9KNr`{Sd66V$v z7SlRL^@IAguY&H` zVr^=}EQUaXp@WwecHy`75+h>wn3D2hmKVA&z8_0N?8Wrdb+6I+ z4dmQ$+*|1x%UJzm)tu{`4pf&>mj`qG{$H zG`1P5GD~soL@-5flZT69tF_dhosfOV49Oj&u3ZSSsF4UDH_&$**yeUelj9YAKiRWC z^s7sZrC%bIU$HjS`(W9z*CtIC%=-sAlg$Gh2j33PIf_yQx_K{F*?rszzf}DSCsR-~ z{&9PzIp06{%qH{=LB)wsdB`iDcHW(%;ZG11iz_G-hOw-RtZ=qpwZ`PMKq~{`Wd0v* zF_y_;Ch_id5p@`EaZUI`zh3C9)GpwzWWd?*3!kn#%h$S#ug)a|7KVb3=u*uPc^vCv z@$7AYaH+E^-SJ+Og%Yb&fhXvlG>s)un#;D&N?hYL zNxug|8~KA>dNkTU-r>mW&+wy!PYl zeBLs$o`mDo5_LmOk7$xkaNDLZ`EhlPFGrPAi;&9Uq;CZ#5nsXRWui1821K2VdUsg5 zkxJ=2m=S}8IAQv-}JON-jlN2r6TdlEQ-t{aWPimS{fNujIY2i0_G{^^`wu9 zu38odu4XRIG7ikuv0Kh*0+0M`j)LyoF(e)uPi2uRHPyn@;eK^QAc%fNk{>N2HRg^F zw1P1GoGaQ?dVG09bE`kdmh@^m>jxfW7>0O%Yat*(p~bKk5#G%u2dJ5by78HzAzS-b ztL>>8*FScuKsbY5onvTs^D`cvXpikKxHO>@Z;XahO~vR4n~ZM1t`*`99jUKEg|+nL8TQmH6kSmhND9h#G28VT9B?yZweNDASyXWuX&=KE4MN7m~QJLToYQU*bn$M ziPrR|kYebLW=e~-%B4R2Ls?b65UJWNI~{>Y3wJ=7Jo_xjkzf5l`h@GAV`T|gl}Vqd z%R*a-sQ31}juy#6YmS$}*SBknlX(wAw0Dm5Eb$n!=i;-6sWVu)vr_0Y_Y=NVpe5s{ zNa&{YYF&*d5$qWVzlu`19Cn9<)BdNB*1Q?HVJ+hmh_1I;pF<(rdwYxu$G*6bv^y_< zwJUYs{d}E|@We?X@8%<`Nyc9dbtPGDo)1d@YlZ?B1XuQBf8?M{obz0;o1*zljKxl! z3$(byB(kZI)a*1e$0vQCz+`&9Xe1}<7bxWr9CdKmx|~*l_$sxfjX?5+)_c~}ARedE zeEp$n9ZIsz?u=bHeXi!xEAF0tw+iJOF7S&lXK>7DW}3FuYG)JIa+2!RS$O(ZEu_|c zgusG$CS$@XW33O}#5f^0=~=c7{bGikAMsQFbr#^-hY8haUI85ua@YQ|WeJVnKnJZ;=lHp;BZCn%q$|NK=e1x&l`D+$e) zO0DnDo_;*PAtj8B?Ki7ztYjTp$MSn3j(6%t3-8o{Na@Vf2x-J04yKm^8~Zngo!bxJ z-ON(K__?BgsIc^%6GMh$l4_@>q;mQyc|16j3J^oQ+t(RaFPnE}y^}-2=a-`~ePg~R zRgv4QLqTfyM^xwvQPH^@3tG(bJVY9c}`h7Wy0C*?An@%?(Gz{v`yC0 z4f4|?-IOSqb#3Ifugat_yx-9Iogm9d^`eoW%y@lI?BU%zJvv+`4O{sT8t25d*e+xs zE|Ast9O!C?h^|K8%Qva2u~S8dF7Ey8$J;qpHsyymCSwg8DWR`^ZmMla8OxvhcnhG3 zG(w~oY^}}&O@IrI;D9pLM)m0iXjP7J=Ir=%juu;OGR?XrJ4vl-dq;i1EGFoEj?LeW zFW1rHd3=?HBAoknt00;(@ydr{v@8(upR+u33?+%iHrS}Sdr}vDwSv`g5ce!UH4#$l z#p2maACSt|&hk$OU8g^pDKfv~_TMIZtA4qBoAS=`Jn1PkFKgycG#O2PkuC+D!Pj_U z;kv)GCv%Z;>Z?9?jE&f<+2eUxPZi_lkt5Q;FHD+hMtS9&{q28>I5;qAWH@@m$stCG z$FE&WEh@cg0GY$ew5emOdDav^=K8qeDiP^xhD)21c??+QwD5I+I0)!1eHJLSX7V1^ zq$>-dd z>Bnz1(fNL_VLx8%PuwI_^;JI7ztc z8(F(VzK@s!=S-r&shSmwXMGN50brD-F`)nfaPvA%GMg`p-s{WDk3uPHL`>^Nm!lcm zHxX6BqRE&VOi;MD4fleVH!hRqytrgDvcqdLzc1vawDC8&ctp}(m#S{{Ih*#Ar21B0 z8<+A+xPG4yxwlp#e{Baf#kcrDy15@Q1-wkad%%c1MnqY(r(HjORq(~t-`|E>ugqi8 z9Md*haD2y&9VDWN@>Th%hu8R<=B$phcEi?SGvK!B^rkUWJ)oNyP;*r&7GIuZ`cj|s zfj7&g)c&DmQ2iZnKnI+~YChVY?FCn)KqoZU=;t5PRB)*v16;lVS3d8hN_iK83#i23 zZtbi|IrpBc_*m>u6X-~$beFV<{<)r$yx7KfDPu&NWSG|fRGM({16XDOl~)emCk_gs zD^xdpbNA9>Z}RJx{OBeh`fzl-G3DNL@PM~DP>fF6MO|2+U5h4;YI7-w%C?4r8FK^*$8l6Fzd z$l7OA;E=C^0+z?&1~|d(c4zkjoPRR~-Nrbf7nI;$il3|>wkfz_+uGKfKmJLij`7)g znA)w*SOMW`gVynDqE?cM+}JlC%&ZNsKx#FX5x%XJd0sc3@lX-^rwt$`uTl9OCWkRa z4PM!Z#!w=fI=@9=w|k2+=DM4!52-0HjUxygv#-+?=iY#>KHmxQ1TiC zf;$^#ZSSs0Ny44R`wt#aT&iy!y;P53+?|cug)l+y}jy#J-IDHRINLiB}SNqhc zh21ma-Llic4^Z>SY3O-`&!gq%dPUR<(D0UBiM8a$%n2%+6}dDgm@$H7P_ymkL(^Pc ziZ#h35m3Klb=IA3bCl+j6<#sR?PsZq|19U{)_|p zJ%GC|jPSeQROA0^3)EYD0UU7e$<-4)^#zULy?dI;eZQ#+7?0i|-dE51)JPEEbjQqW zAAY#TXmq(P3it8VjJQ&ct!rik^e4vlaxA;)YQu|NI%kFW3<}QeQK#K*+6_FUoPy}6 z-&kv1nyG6gm!kurT0nZ`%NGkpTHfF8=QyRWK--l|G?`vz-*Y}J%DSM+h(ch#7c|U5 zrI`V~Te-Rr!F_J;VK&c5FY!3IWn$8Q$X6oV`$yAJjy39&cn&9#oaFvSY4p*wV)f{` z*xVag3>P9aUbSt-ZG-299)Z~Of_uUT~(vC&vqSl}$4;=_kp)~XL5Hi3VJV6E&P zxDMvI@dF3kp-=$DVr2aC+0Zb!E2XZXVRp$UGIY%B>DzN%J&o#PEAO$!noe>j_g-3~ zpLE3~M!Zb0VAE9XZpPMVH3P;Wo2iVHZJV??wEr0U?!-P96IIOPN6UKxpiwe!4rL9E zl{pfe9c9-YZS`Ido)Rrj&{n2a4y+2}ygu`eRK<-Bx_I2k8I&KKV$H;tO^ksmz1OuD$@;_=|vtBfYjbvfLoQgHdMYu3D}sfmSG z#nbRUzJzhJ`+Dv7cqS`t*%)6lCHrGi|uINgbc2r3|}(_m|Za zQ2_R|vf&wV#vviKF9q(G{U7~t6g7XEdW+G2cE~w~0a#Dxcw27AP(Ao6Mi#vG&JK~b zQ+gJnlG8jmIE&TlsKKU}BLOaOU-5!^8`s2NF$LE2c1+UOHD^$54OEw0NSDXXQ9bj0 z!ao;)dzYv%d;Ttz_8^Dtw5D@kp+JP*S=vQ8P1u0l69sO=K_b z&hUw?f#^BR7c3-RSkuJve6DPeEDdV0GE zNCpz;%Tn2smZPgkt)QH3_wU6fKEL^>k~YdLd>O%NxJuhm3|g94m8RsgphX-{t^?Hulpepz}eqY`D99cABJH zOp3Q7gV{)nect|ha{vs$ZNWANz&#B#fPr|w>j8&b-yx#{VTHU4d1mch*`$&WXLrvw zf6>GpGe!2`MeCNK?GDAFpS`tHb%8J&wBDX)fP*!a zkCmu(piZW&r4{zp`7b3{$qE)Tb-83WH9C9YZK`{9*d|Lmy`c42R>TYNMa-t}~U z2^d>*1-3w$P$G7dr&r^Ve9_47PjW&ng8iAM|4sl^Nf|YNOv<@pe{FA;qt74{ALp`~ zJtmE_eDpIIIqnO~+=E^OP2(M(sFU4(2Mvtp;ln+@-Xjdks=CyLFszg=@Kc(IpeYHa z=a(F6>g!E*ndKjlc$1>4m>S?2?$}^Cy~H&>n3n@44hP_LthW9cNS?n)1@ng$G6ll7 zx2<>QemwoT;3T_6^1}G|wc^J>;#b)zD5Z%7E3#`(7{o($WE-`%n+Tk9kv3->=Nd9og zTCX)d{dM`H@C%{XW9z4&qqP#;H(T)jUwCRtT#evVt9&f-TJM z#}NbW6mgq0Jw;bVX7ldTt7MBZEYu@!*`22+zqmpxsn+rLo5}^BFL;-A-j#Q+)$5qc z?143B=N`+3KE6L3lOf1#XPt{Pw*x4HFBUsj?`63(XnGnmWp<-p(|X?in>_FrwMs#FT+hlEGXFeK$At_-YE@a^Furc-WpK1JOsi}I_WkhEMf~h?Xnd+hRW_`Lv z*MyKA``4d$CCtVWl6q$M5BqO(uZT34tJL+)f#zpX6qQF+Z!YKSIKNx3<%x$vW^{$w zZuefr;Q3E13N0NDnKIdDbFc?(GPgin{Db<;>tC z8tBaL&gxexppe)fQ1bTkA-QL3VrEU2kBX;h>;2-3L=o4;YnG8({{udF8ziuA>raX1*W^AcU^pDkBH82;W594e;3bp3|O_8JV_qsqTat`5V zG&e^tLycZuCWH9CSqm{CtD-sM6IntBli})yH>-1a5gYPP`-s}A zn^uG^@L>pqud=isC=_RYr3&JkReWSSSjXhFh{Mt7aVkd z23R+@wiL#d{nSynIcs`a*X;>TMkw_7+m0^irFOx{;xOA>I1r2O2H4~-*8{24h}%Gi z4?{&QWozmCDm|gPS#!Kz#!{EJbWBz7!8jb!!NpvZ*&4=;FC*T##vtW~m)!2=;8u$7S6=4Egt8jfRCjQ>Nve#{<5^1IRf zx-{gRGsj98q(pwjdp4MgtEO+JkdmQd1Go5<#;@f{aJ@Z#5MNzXc};yZ2{G>;gmSH4=9nQu>K-~eiZhkHn!i`>%#f3{tQnayGdn0@ZV)!JY8PDZmn?UGZAPlh4D09 zLPG|Zfv@t0U9H?k3p+ZCF^ng{r__7hp290D+J8K%A58zwL2o^VFtUC^4D(EZ<^i8M zTcp?i&O|yfRuyST6TU4zdvD%?hc(a8&9jHr<`b;gT)vA3cXRj2Z|XH^v^x9p`AE=tA{ zzg{w~c+?wLLJNe;7eWZ9B%B#x1@H$qw8O7eZ9V=l|CW!u4hJGD;>?wMh&E_0-0b1` z{r)%7faNF%cMAzOj3Nz5z|ZZ{piz|lq}*i8mt2@s>%v8M|MTcO)U2l}Rk+duY96)C zo_hu#!tciSC(v!ac0Rn;BOrGaJQL;ZT_gPHlRb;D3)$uS(Gyg5^F#Ze#9fjbjMuZ@ z3~OB)A&0`&vu+>peox@O$($Zo`Ail%KBitR?q-;U2noo?ES>Nf{=&(ovGHU=iQ^rt@|W?qrwEpMd*=I(zB9>o11)a$ zu$dNY%BK{kMy@gcf|%r6HH`}3vsoi+W`V-1uMsjuDt!v76smDqY6nsY0SSj1r4d^?K$CHQ5wOgTe9en%} zNJYG6LA6cY`kbxgnwZfPjp*BmmA(u!AN1wo>B-?7uIz zXG3(Lkg4yNHg)k)w}nyX!n9aEJbJ=`IxqrP`NGes9sTz3*NDrFhafwj-W1Q2g<>v% zy2}h_@!|UJ73%M@rl%P2{_m2+MdcNF}f9o9A&YCE2>z$sFAv3`My>O z2 zyH1630{lqX(wRa8TvR_ea3Y9G)oHz;qfD4lBh)sx4BipgStd{=WvV?9(HJeRvo1E2 z7yc-tv-p)#3AVIX+%`{NVX9BXR+LGUkROgW7k|*wGSjA`I*Nm&bXOk4Px9C896DzB zr@!}>0w{PW*zFwL$bP}shO58ccKZ~h4q zX)(=LkH$|({E97p|9ASsAFCa7!;DV|T3HLqVCVvVrB~}b_V;xP($4XpRGr_tjA1jI z@m%o}sbA3NFbb?M8^}Vb_eA_+E@vt}%sE_oe9t7M(t?w2e_C{4eCUG%r5gTrrEBpcx@rd!t{Vx|PTH=|2z}t}~B~ z9NY?AUu3$rZA*3f6FR{t<8gb@*rJUvxjBAFD&wc$*7yU!z3V2+;?~2lun%0(jlBJ~ zC^}^Ga`ovBX2q*O1XAXO183kVJz-QEfofERS(l>0-3u^&7BWD0)-LEQwRZKR(Se!J zs=qU)a#QF#?l_wIT4p^p_58%Yr?Juyj4t#1qauzO1WT%No1~RtY2CfzYVSf3l*x9_ zzq?{->|^7BUFm&$YQvuU6Q7uUpMM+z&U-Q;oBo#>LzjO@4OYItY(BhH{~Ubt-4&T_ zed#AP;>3sih2XPM2F{QERoKFwW>A=uz>LH6EbkI=S5d2vj&o+LzXx4^owRW+#~wPe z<2nb0lu)EvT(gvdWz>g~L^D>OL&VGk6}OOt3#u6JAS2wm>!8?fEFj*J3mtq45SqV9 z;DQp6Q@+O7*R3%^*fl=>wpqPf*T1Dl;|WTST%-ELJg9KG)LE&f1gnFETUs%N{XC)O z7**UkG<{2__wDBjau2_B8un`b#IoH4p~10Jq6zO68pk7Ua{LcIl-=16;mwUSP6a=_ zE=Mhv688ndzl?H(_{z{}E4RJ;s@hgsR&^03^x^1ph7b1^p|5+q4fdw%G75`h3WFkDVZ77y>s42H!OLcWNQKqrjrIN3A<7MS zTwdwFm>k4>@2x4L(S`ao&caO)e`FEC`~>Rp{2p=!-Frw}3F^RneO`M`lb@|0_qi7p z&6AiYVaihAIDnSw*Eluw!Fg6|h+9aJN|Ch!ILCp;X#$(kB|YiNKk9s(C!+$_}6(RFa@OzO3s*zq;5BBG-(J1ZTj=2(PspD&FYDpw@o$b!rD84f)CoI4!k7Br4y))t1Cm5S|rqu0G z62#@LrWU{)JZUzk9Fbi1I9e|sknB?(Gy3uQT zChuR>nrQ7(Io479p^F)=K>EHW-Nbz4O-pmQJuBep)cW6KI|sEGqbk@WZkclgPg5 z8Kbc%Xpkq+I(iCJ)2O9Q^0oa8CI2b}mz2(N+ilA~d_XZ12@v&bS-_p1rb$2}hFe8x zKse_|W+wv4STG@|XXNBaj?lsE+gM&TNUN-}>{>W?^qXceKZ zPPQo5ezH+g{u~QKf+~BpBzQQ)@*BYPe%BMY`Zq9aD6NSVkF@o-d6l5eL(;NpCBkV);75Z!7Wl+u zv9ty045N~VXY(;6)Z3AK$krG)F-IAty=ZZoE;B#$7qc{hpoV3+FC_|O$Z!8SI}0TI z)h+8WVfN;GX)U#u4|vf?l|)z8>t6ly;V{+4CCQUKXoyE*kgfWGO`h`QZR>&-goYjI zKl!|;$-m;iClGkiG8dYZOP_K_go3k5fHY+*9GpwPu=zJq=IEb&a6{lrpYLZ`4qbCz zIZr`3XH~w=8A?c*iKO4UBfW7T-7~aX>uJDWyZ>>zNH3vNnYjX)A+7Z@F)A*pZO>_6 zOy`W_AyLxDOA_xG@t_xjDs`nxZlg)pW`CfxA0+pBk60hIAaG1{31t`P%iN7-Ci;fJ zOUn|!qIc>?d=pFI)Av8nN@~eAG{i=iVtawV>NO^0N9to5AIH%)(W3+Ry>>Pd|3v3e z_o#mE;dmB?>g1-zk03))N{U~PwCd8bClD3haV6GGj1zH@FsC=q~b97@6vP#hfQOe!99W{=#gK~Y1ohV>=1(QEfpFB)kTt&E!WPt#8M|Cb9e zelZw2wt@$q>pjdmbV=Oo3sE`ylkFwLz!#Gf2uqO6hL|Qbi^U2UeH*y@QI<@^{?6_r`OT&V&{jQp?ZP^b6-+?Oy*)LQ zV6LJ1d7HC>q41NsGGvGG`;+wc-dqNlev1_eE0JobR5?yTs0_aFSAN_AjQvkNn~^(t z4BvtYULi@aJ{KP^)Tk`dxUY6ADA)fmNgRKKCO(;BhuSypDg;MbPt*h!FL^bRH`dN) z#T@72^y|#Mga__EiBh5xTzn%{>_#!!4qh$_M^JnfhMaUyt`}QPO%LOj`T*X^5z8q2 zW2eQ+=3Iw3*(6b(+^7xJEVC*!o|g?cHu#5pW`>us$M942?lE2L-$RRF!XCQh~wC~*{hj1fpO37q4FF#K$$zvpx0+D^L< ze>Sd5$E&{vcWX#~DqaY~_;9aO;$*1C`MX?hwql_XBI$aA+ zdt&vk<-F6QX?_>K`6U#3rAkWumSsq3Co+SNS=LBcKJi3ydi9TD>1VnVQA#&WB%^va zFX7wO=EIe{zvZM=<{hscr%~irUxyBj?d$47bz4}jJ(qB$rm0k(GQzDe4xqiH4A!~B zuZcG&lI1=2_FD!EkJvJh+#Ed5Z{7G$;wXDN!weaA@ff$iUMVaU&yZ`!$g+v&JQL)o zV5uh|3ifZjTyQQ`Zp)5aigRy-y_eENkH}dmY;S09aD3kWA-d>UD zwh=%L{})k6W<(%@kiwmk;BRr#4?wciG*T^q@99b<(G2-sp6pq1T~p%pIl2Y;*jtJ6 zl#_~^i=Y~kbTWAd#38jTDos8u!_M*(<|*9dv|bpz#unNOndQugwMyURW46H))FCC% z6O|yE=y<(oHVN@IlleR(toQ7iuHh5~#yIu%<=z@6EQLoPDB}KY0~er(9LPIG%*na6 zCk~l^gIi{Jcd;u#K@ZD{*dqH^{}&;OgPsd`30995R+;n`ZJHy37BAkHsk&i`xV8Si zwd^Nv?PK?Es-hnl&V@g%TS#l?rE#GDjrVw@`@vEu7K^q~4w#8z$C|yYa=SYx=vJ^8 zd%4b&k34i4;Df*Rh)e^Nq=_b+N+S`^IUW`Ts)Je4ie9LE>cu(gPspxFAr4SturDr9 z{Ztw)Te9W?n<>o|7$aa;djJ9;mYv}GW)HULyJg0*p^LLCLaDuc1hoLp6r`A1(TJQa z{I(Yrwz2{0m@MZ$ZkVMpVGzsa)?vUj(8Y^jlZ2vYv4KUVb_&US6|>DF?<3X!W_{lL z6dj{&YJUQgX!kFfTLs3d;h8AdhXSA9ux5|6a9G zR%t0J*@SZEN-?&)t}>`Y1HLYEavDt@LqkAlYJ+aNh3)|#lBn2&u*d;9r2?|u9=+u! zH`t7qeTCP;rQgLadgNlK@Sy?Jzu zb$SSLL|m8%nt~VK*H2eHx?gC9C)s=fLaJ6KNflCbGqnAC5XV-CqzpF zgnkh(diNHzy|`3~?eX~htJm33{T{e>KaS@k&3bMTwVX^?m*Ub)t82IJpqG0)xo$X% z+PjEfb@VhkocxZZMo{Qg5kxP06UZ*j=Y)HGeYlj;YSgRj<%L@aEnu~^}L$L{rK3`yl-69VUcK`&ACAbT}V zX)LL}SPFW~y9$sRXwDOu2x9?|14LE-aH5+ns78Cn5jDJW#e})323sh27`^t$i0xN4z z&T7j)TY@ISxB0*E>19U6xaf=>SZ(t9qLk^$2iZS6Bl~J(h9tspnU}Asxm`oGg=*L7 zcIH8y`I*>k%!%*=t51WfFt6h}%Yh3_YeTVwgwg^FyTEz10KMYh@DY!{wTbC`?^3v| zL85gOMy6llCjkB=+KjicNb)2jgBC^GDdo!>1w6<33?y<5oerNE{am)5Al)D@1LZGx z|LC`GSKTM9$lH7~>riR}=zjqWm{`%Sx4Rg*EnSyJuRQCtJ z>BLO7OY7W45#sM~LtJ-O?|QkJ4}EU&ggVU)1^&=@g`a-5gkb->D9fR?twXG%?k|h-4Y|I#DSw; zt*hqoDm#CE)&u%j>p^!ZT78{s0nxa*}?SErK3LC3t*EqFz7hpQ^Q|kC{sm{W+2d?21?6x3%_Pa0^@|lV~Rr|K?^3Tw}~K_O?&XZDIu@?s~n1 zo5+a?BvHLp>vF~U*D#!ob!b#cuxp&>y$>w@4jrw=hwxZaTYMWp_ zE@1rtYN^o82uJ-zz_o3LvX<8;`dNHbpY(l*lwswJ>l2)!>89*$w>d!^HHT6i(F%s| z+`!ech;KzxHU3q`=WL+3v8G7yH|?ELemfffVM_D9!uUnyjPmf6bGNYrp9YY+W~TL7 ziZ>V(TM~J|G1=fVF(}(yO}JR;TW^HLFyb) zJ^*CT_$M^1Z=3y>&j>XL=*!XybqYJ$g@ZW{|wHLo~fStspDsyFI+Kw5zreN?52 z!cdDvbg_6|z^Y=Oe&?oR(!}Dww#4S2F2^=>YuMmzc>i+q%L^D|gLsW3D&>4jo)}hv zxEuuSoiYl&e%KrzV@SMT{~Lz#EukotE+PpCl$)Tf7oRN`nDZF~#jU+4rQDs-i5f|^ zCNRlp$Y>ngKy3T=0)o$29P?Us1_ZZQ9`n%AU}99bNIjhQSDSSs4b`WUz~3y8#%qH< z-u%Oi1aLzo=cpJN%y~#b8ehIdHDtSoWfithbA||-a%dZzBLT08!f z={!L;5`#&9$YEX-g$UH;ZD$!HuT#%@oYtXR3!=ie!u%*NsGhRDOc5lR3Xu(l^#0^x zF97JO8I7ZLsvo?#zo4WjE?y|fgPzJb^+aio$)X&G>zxGjgx{+=Oe4lq8MkV5T`EPLx-{Cul zf+SAZ2Q+Q!%-zMlMS zSl2Gz%}MI=HzUf{lQ~Kp7MgZVJ_}|XA^h(rCLP`QC8;G;yJ8LI;M4a6`Vl3()6BZ} zgN#&WQFaer?^PMBCw%pf78ef-el&c`R@-HO=P<6c`Y++iwAplZ6ily8`tOa|EjP64 zoy^*y{irB|-ZW^|Fb&Hm0X80H4Vun?14#hd^z zH}}Q`YNHrW9BB;IHr5!#8#$&tq>*w=ww<`F{4n`P;|qkWp5ly* zx6${8j_OJN7X}oS2ccUu=o_zX+$EE)nW|U2K8ln5r0R&)i@ebp~d&6 z=qaU=Fw(iW`++Wsi$C@EC zIvtD|TgxTr70c{;$$x|QbpL|x)j+FgBIQjf6A4JU&o1=~J~}K`B)CO|AY=9UQQ@_U zE_V@j=3U{egTFcrC%X%%%Y5>;wXO1&CE4e7C(^JJ+}hSkOiK{p)Ofx|NEqGC(>-MZ z#xZWM50uaDs%3mD{VtwG7Z2x@V?o0(+onIJ#(<&I7;Mb`KKIHJe>S|E7`LopMC_RR zoqVPLDIB}iA2lX(E28vLgxBJqtLow8v=p`LH9aBsDyU5n=@}F&yS&brfl~|;lV9zQ z5y^~MRIhAEF8Fj)`fL_W&MedE7k`e;^nW-XuiET=^8I^e+nN7|d0&OSH$Egp=R2*e z0<*_L!q@l2DJ~aZI}Yv-_nuflT z7<#v19<{sI1Io)=R4Z`+?J>qPy6061=O#>{`IO8{I$wa{DTLj9@hXT#c1u+mE<|vi ztSCz#GWBH(m#hZW(997_a^e%(8_*1Y(=J|y1ZRXYak}=|*0Y>{kQ_HHz$oSvTnZaI zySJyMsl|H#%3ryh&;qMM(lz}2C)40rO48Us=|KPVi4Ouz%w}+zx+SQLrT=xz$V+f7;a0+q0c6k;H^$>%LOG5BAr^FOP(o6gPT zQ9G(3zXQ{Ixm)yY2&C+qc^Z@6tIK3&tLT2&`Sk@tkTOHm^6?llNZtz!iU_bCkIja= zsP(yKxfI!b(&mp36M(az2wlA6*4ejB-4CER` zUXYX>;K1QwR+)Jmv!*w0#JE7LniOT1E&_M7{NF!(d6Q|A4uQdN+VEGg>UwJa2Dxt{ zGo+2d48(`VPX~opG|lxwvHw1HfuY7s1%BbOlW!~Odx~{EgE+akE{-aSYKXcfdFH?_pJowimY6+4%9* zhNS|x{=Zt)jC759mZBW108@6}iBjZ_t4+up$2W7lxR@^oIoLr_hvs6hxhWru}Ixd3vc;nT9lc z;wg79#sr%=zQ!6rybAi_67;ahWse`&Do-F1be^aVT&3kjBE_s`j3tJaoRl8PFw%Md zH;O$fT)_v(AWBgP6j`{w)PD98%&NergNAK>K)4maqcO;0S?Za@)F5)Z_zvZo+|3~) z)KdAtXCtr?4@Af86KYnU$rs@h3RHRAZjIn+N5?CuoQ#ms{AM{?ee={^d{p_X=kCtaN@8M$~Yn_v!P1< zGEx<|QUYioTqc?2BK6+0R!Ks1vNM;r`1G}%%sd93hkVm>@qOdY$BYuE2aD-Za2-h! zmCDbJMS4yO-$t09{kDXqobBykkf7mvn)a!8BQ>f=FDkx#4GO^j8wiPZaH}%4@ih*N zb`TikNm;60AL!-Yon139`_egx*`(5Ll1MnThhO<6i_wrAbn3Ctf6ZurR`e zz_7bfu;IHezC|Fi$4@%K$tJ*wk}e49M*(%7qEGSifZ`H&tuxp5xCCAbk)Wne_1wqA z=4K>B^03Gm`#GkbTra=ef!=ao0DQoH^)F8{!FZdPf-d>_e`H+G4Z?XcIX$nCp-eS= z877_KUkwOk)>MY-in!fk$1s9rfZehGM*7FLM-XNaP`#vr0gX<7}?S zU&wbgV3P7>(0r*t=YpE*0gN1wh=zEanIJI9Hgg2#41J^_LK*iCBa9> zm9KiY9HVLja@wyV9$l*jIe72c z?SqsSu7Q3PURX`8_ezHWqJrbCTpHmseQil^3z5qV>xaY*(pJU1ep6JR8}kjR5<1)p zhQT)inlaICeYRZrphL~ekD4hWonqas%$|xPt6(Sak*fxRReHdFba;McV3eCJOEcU2 z>$u#ZFEfnIOhjVCIYx7e$Wxf_BQG6SlF9oAm>ii>0O-rLL)`2>;{xE$l2fqOij&PJ zKe5YMt9>yujqTO(s0*xHI4W5CCVlkX7G<<&AvJy<6)-9-$}>vm-KN4wK%+2u^-Tdr zIj10f>oMxUmY1;P^v#?6Y$)o%3`lCOFNb@ddyd~lYRbM^L zR!`g8H5oYZJltdSbv(NC0!>#a@EpN~{P{Z3WE&2X+t-XKQLvBN{dhijaNjL0_s<}p zq~K!Q_I2;k+VZ30oQ^ZAfd<=MZ!|bO+RWopXWe#rz<-E1KqmT6oyER%)SfDga^XC( z&Zrw0v@dSI6f&zFQ8Xpb0~#JX4Qkany!@b}vwCq;h8^~3d}o3!gLE%fgsbd(jDYi; zUafnt&OQbU$taU;S;vb7y1}LcJwW8(x6kdtnd*z=z1o^kamgA`=f&3LL*;p`S1`NT zI!qcfk9Z)&BHM|uJ?Y0~&6q)RZ?0}X;%a@HP2O5($w|_BVQuz%1MG#+)W156)EKFi8k)5j=aAKc3STVBQkWuH7SCTFa$+z*5IjA7 z&<^T6B{zQED-JzE{9!IsQtR)y3{foE%u zc*8D#7An3~{xS=d*W>`NZ}I{O{3R)C#cYCCVL`SG<;QhmFV3JC@h@SN>7eWgX>+p& zJw2|dSsfDP^`$6$XX2K?cCt{ypG&$nE^GL#%TsduUm?_WL-=gZ&q+&O#7J%` zV~TKtM|eO2t&F8^gqnTrQs@LFR=`8zvu(l3o7tjJFzLzMH23K{MR?hWTPEea={N`d zZxm3=L9QKl;zJp{Vf%zHF>YC}{_-+Lc|x&2fkq8j)K{G-AR(?yi_yoGYPGIevOwEi zujEgvV6@YHXW?8&al?4n*QPSdSc+xbnmRpVcuxI&pY+gdTeHZ z7z!I{&`{WEC`KDbfS&O0e;?Xz_h0HmqCP_y*tZ3$^L7RW9W#-*oqk*Rb7&_8KZ*Ne zn~vmmv|DCNJmzUCQ(Ea+&!{yPLA=Jevc)}F=d{+Jr7yV-Gqky86-rb;#Y-Uhl4^-M zish~B^;f(B44{Ox6;o)a>IwMJ7cfMa<8Rmck1IkVEB4m;oMj>oNWM#2=X3*KzS|5J zPfyb|k6=9=-N&y)WriwQZoLso8FX7>BfJ43(D0aEf)SfCCU-^1z}${byfhEw!&6=k zqn)%2dLnFuO<))gsI{Qm7W|8xM8m-$5mOjbggjNF1jF7vwO)bhZ`FL#f9I+LjSnLo z-C9#}ka=|=LlJV)f?(1w-w&UVC~*RlweZ&EKv`kIlGw}l?%cq|*?6MFvize7+n-Zk=Xcy#B3?*o1i`if@AmFb2g6U{k2fTv)~Ogc z-pYaN)k-xvS#CfJW07JikSuv%DO^5OV@29C&m*5#{OssQQ*t5Eu=OSvZngp&z{yjF)uBKv87p-8u_;Fun+t-s8kaH&~swpuKolNa22NJE2|7ejS`_ z34So%7cW+}6HxzDW+ElghO4%yLF1HxbbVpgbTScPFUzb-zKsnmE}?WW!W5Z@3l#F=dElU62pu+zCe3L zFI7L((j7LVd?lgW=Jva-tM9#=GlHdx=|TL~A#F;6{qifN%99@q#L}h8;V|4>x-ble z*iucF&he_F#&Tt-ZN3r=vyG(+5X9nt%^!xZm&;%{behV7#a30kPDLS7!XM6xvdMyN z{O$zcyS(eWm!(K2aS1n!I-~GOmZZl1 zO&BQBdaAY=ugk4rI2z$dzsrF)4tzT*kpBU(^j{XgrotU|NJEu$s}ug;ZHr5}p{5lY zZ0dCZ06ab!aZxqc3x8Oy3%L)Qh_v!rF+g%Pv4t<<2GbgaMdojLQT4I=adjtlTj}dO zH)^gLgccgWhGDMX2wx|MbVr;j1AR`?gfs2mGWet=DdJ;5h3``^X65Bi=?*D`%a8ZmC7tUoBd#wSnsh|<4R>Yg~rqn z8?S6)<^rZ0bGeJJx!r{!o%TNcv4V2MF`E=*)65+;i>YHfx43<+g)3f}>VK!koXexO?s?@nb463u#<$?5QyB} zYa2gUMKRL`xZB~|;>KuzW&Kj+$Z*(=wFvK=A~EvM-B?LzLSV+|JWx(^ZE) z11L(t@%^^U*1yeh!tkhM+rKGu*5P%+@!L+@|K(E7XGlv`|0F~{1OehkAhm64v5_Yt zGSYGQ?dA$tvn}mfg{uJW1a{hXgxE$h?LY!6b{4J}KHEpr{)D^kl*QLybK#!}=N}yQ^AkDq*Pm+#951ISQdaQ8wW7BHyb6ZXx9w-j8(CkV{HiP3a#Dy5; z1;($}*jcIBhHkN`dABqP;>DSBjD|9Gi|9O}lu62M);M94X`^q9@ykee6y-gEaWoYm zJX1vS1{en6ksz0v`}Q8}2GI!uE~9PA>rnv*5R?4KGiw7WYf+B{#vn9giox%Z4GR)U zF-NAy#sXquVtO~HrlxooBy2s*o~fTD570mm3165zt{9B1JJ(^jz^Fh< z5co(77(U`fKsGzr82l61b9?Q_w0@_yMTVKj@Z;3{T7{1*va?@iU$n8iihOfuluB|~ z+T!b6r#pQWc`KL$GE*Ide)>lXkEo}jY7kfnVQ;DD&G#@56teVB+LBKH9Q=Xg;dF7p zR`tfm;G}z+bf3RATkd4mV8x{W+v~PMuhzzQW^8v%F!@#lNAYvOp?j2R{LU-+`Bv>@ z;$NxzOukC0;|tKgHRQJ$Q#wh;U)Z4KL3UO@LRR3vFNP7LoI33*XQr%Rs-rC$Gqzv* zPrbBknb38*?+xa+v_1PBO~moxM}BY9=#usD%W;Fzsnj>$^Siw01EtxC%Yd{h{|7(h zAW=0endtIA5)9P8-= zUBv(qq9D}Y{L}H;=zV)c)z^ch{fN{qCJ2X1+FY3z3W^{)(wKbcG^y~3E>swEc^SRGT>6;Ri zd5`JpgT!Q9yTcTEp}G}l)H`=udTFurgmUlfF51>XXK=@?JXaxWU+1Rxhc{W8@%9ec z)f@fc9v1^!&S>yI%YrE0G3jk_X~w0OBjbk?H}gj~?mxv_3{HYlwd$`#@|H&IizN=f z!uny(qe|sGx)PvgK8#+&9?$P$V)C=Y`Q2E9`^~NqZ;?MrKIR10935xU`q35M*v}lm zJ2qWJJ6mx5eW0eCX0}=4NE&ZQRHKKfm?^UklYR3?3!88{7C9=7dS;y= zJVh)m?ug#;M$ipbS6z)u9!sBdUiAI@_boNiKuYvL$}roU9uoN5w{MAr{RBW}U-@i{ z&y|y01|L_xvoJ`RCj!C5bPDN2KVj>u9sE-Mjk;|*cKl$HW6eLOE z9Oi?Sa;DyoVE7PHduf&M|IHI@O}hh?21fvY!d^m0QSg!9J^m$5CkAGYJU0ITk(h=c zgtCYg_V^l{V3S^e=D+v$vj6}8<^n+p`0oSb`{E#5NHC zVt!0x=t^DT`_92MYdhW^x~q%C@bcXIb<%>$5y7Pq0qsgx=B=k)f5>BgP!LQ_o7hP? zexJ;}D*y3=Kp%UuS#8XdrFrMDY8c+qg6yoKm9r~eMyL@fb81RM`}IIbvetGs5Cp5| zH4y!`7xm)1c*30bKLs1?%#mC*ltew^uC@RuRS&WR&WSR zOw5@o6SVj5-zWIpsZk*M%nH^y8uu-nnibE^{&`sRP`q_Q1`%oDr|Yxm#PBzi1FKf# zt;?@%Sp>;rcO*H-V`IqhcqJ{vbk(osWA=cwOx z@iCvilPkX{Y7rv)_;Belc_tU)1DIri)Annn0P*7Co*UA6*r&STwx70z;$JE0(W9`L z3y@=*52ipWlFi4xasR(-rrzMixW_GyPBqZ0e<1bW77Jtf1e$bW2{cWI9Z!o`QAKG8 ziZ8^u-<@Sj(}4L{DIhs*z2oU?IoCJnR7Fb7^wrP6%Y34!l>hB1@gy+rKr%Kn69zJ# z4QlK_Zb+8M-sQX8!_&ZR-{Y(Xgb?^170@O8->t#XMkR)eYn*^EUX!`+R*zi~z#m%CMt*NKhR|zwP>e>r|4atLITTN>K$TJggy@0nb9jD!e(yQhz-WEJc?1Qx zz3dV04tqxZ2$Bc8BXAb|*$Y={R>V}hP<=Y-O*I*Zlp^^^qyy=9;Xd|ucE3QRgsN6f z<_JidbG-VKoa_H^59-09AUX&DhK7dH?m&1q!Bt~z?Q>xC_wR(Jz;naGr2^FAo1#zK zg7Ma1E(*1R@STgm*@UVG$m_23yYqUaKp%Pt6tQZ={ss3@CrCy=3>ChGQ21TG6uLbW z14Jw=3e1HOBZOBv2{gc{eB!hHEH4B};eNq$@P-vU7#{tK?x&jazOF%&2u@Nt&q@xr0_piCnf%mekYgIK; z@dfzh{H(LQ6!Y*vgMsQ1k@SdHe?*F#52vSYubC2TGw3FnvFJ76+f0>;s;c6(+$hXTY%jzI@Y|q1ZohXh!w3)+3;}7=pL9Mp6AXJAGy(=w@oHni>dehUY;;lbFajn}1e`-27Ic6m$fU%jju_WixglXg zV4@z-!6D}%au}Kv3W(^TUC715}Idf6c%8qkr3fzuVouzu)KS`}u79 zzF()!`uM|{ne_*lX(=huzWF*hY#SW*92~~dGUaD9{3+;hk**S6ItI;-!SbZ=;ZQ^f zosFKj`HRS@@XKM(F8`^EX#Ch)IlfPJQ3wsfzIsn~xIy3Kch5Xq#BrP{UEH!f+yQ z!%HZ+6gFK~D(TWkFLZQtY)44k4wlJe`*L!6`YU_15t{@yuQ2S9_}$&BTGXluQzhBLtnib*f;2@dfVdz zb8;0wSqTM~y2i#=8+X7OlBXerzoP@=*-J`p(aN+p_{p`i%eb){MhlUC(Mk!@bIEG* zmp$G+HWCP1FfiO7wanhW?00>|gBrM!_rS}nbvQ&TA5IfO^5^%Z3xO0|~U``@jT zN~NPe9QFgQvJ9kW2kI*~?=pCb(6FcbzP_P!e4Y%az5qR=>3Y+0_w|7D93i(jYp9^| z7Q4Wuy}kWZP{C&h^C~#%(Wt%j9YXGUs;7v^@?_NlhRTD)TYW-WTFZ>l$jhI-8;?dV zz!->{`=wFeA={r-i(&pXfXl@tB_$OQjoAz4cRwm$`Jr<(#w*p6GczGVNV3a{tV(8-O?KHkd;3@+Wkq&Gh|Gk@455tdWUtK3WOH8E zeSg2_oZlbkJbvfz+|_s@9`SX=kxV?i@2{Ue}?!xF@hjx6cuFE5d<59AXrm` z_;5wIcWe&+!gaZ&s7VN4zJzAs@H3Hm6NJ4Mz)C4^wAz z#NO1^$!nq`6!y<$GS7p!M5z$-NKE&xZB@=8U{r0SLVaHd(b1_XUZNGCv zY@v4Hc*OMoq#XX74CXtbaufMHGOn7*>!ZlJP0{&)i{{QKZgW^c7(=_%UZ^o=XO_jBiZ+zMaQ&qXI>-f0(2 z{r-I>xC!&+Im4rtaLON_JorN>mg{YY3IkR1nsbyB2kiOl{!pD4btn|GnEmp6vrn0S zWZ}$@#l9Tn?@O+_+2RE6>{Qg-%$;1@pwlZWbl^18!*0*3; zw6Wn75*9XkGG*9vMx zI$B=3Y#7z8*NFWYd6UcR~ZX?{_<2hj6Y@e-@7?3;xKmI_h2bcx59=-YJZNpd{PhoB_Sb!4hxZiO;hfon^5L6 z_=ur3^28#S*g&$fvxRBn*Tz3R8vFS8N0lS9&2Vv%Ly1Nn+44aCZ1{}0rjioz6HBhsLiLW6w2yDT=S;-qQvYM`yQv~) zevej_Yz?zH)q}q9&pTknSx2M zc(c9GlO1q+f^PkJ5xa2FN#Fj}V>3(3M{8r%R)YmJr1&^T$H%?d^5l$CzPjV`gn^fo zU!se&@#l{$xXWgzV9(S{Yotisk)Z?YTOFLh#TXr)px{Aa9D*a#XNgs#6=$qQw2w=4dNl2QbF5D2B*N|GN z^Vyvl)G@vzBZI&)T{UZ`U}tBKU)fgsTH`c*>x110m3JKzgp*cSV5gmBt;oZ+mX=KY zDu<1&ty}%MDrGP|y618`Uh^qsrp@@BDZ(U8O--nkc<-(~mp<8R&V4`r@iEQ|4wYLg zLq%%Ya%TpL^l8DFTbhCiiln}PNnW++q}$uyZyFtCz#$+L`cxS|FrZl*#kFpKe6ZsQ z{-#!9BsuU_4F~SF?5qsOzI^$Fh?;|ig$2RF!lDzjAnoh#7jFBrvC&v++Fa_p@0_~g zrm9Nzt1C6{qzK$5`0ArCQxIk?)-54Q>_LeBUiL`d)7v}C{M$?LO|9gT-f9<1iT$}3 zDfafGmFS}0z5A@@cdBDO8jDLBPKX;1Y?ojYNcj2rpD{}t)e+E~9wqd6fdwUryKw~t z1x+n2MS$DRwnR`{v^{(NyjA0#B%ZXiG#x3v#NJf!j}O)_M*QWYnOfPRC9omvPfeM0 zCS7fw*(u^GRvE2wlpiiOoSvV@V`gTCJtvTlV(eYrhR`gN*FT+T?B68VBN6R%&0^jmb46fJ5nXjTrK5SG^C-@QUP>gk7vK=)_DsfCT&p{7Jrnn)z;R2 zue8@!p;9#M5>__t60`b!sGy?K`oX&Y`x`}SPfyQG|Ko!s39t7feuHsU6W%2C_4VyN zJvvo&CkHE#Zua(OqX+FfIy%7mnxs$n?|roU`GIKCcl!(kqo85qfC~`uUf0Y*-rAUZ zaqn|;GY&Y)XL9Ky3?9k3M%Y_1F);*FSjh8du@}eKZ-*vVHT@~XcoWEdx*we`!pA(+ zZifp~+da~4W7RHY=HJeNS(t82HHi?&-f&-Sd=xEdF%%*SL6!Z|rR3wXp%UY0_`qij zqQ#w$7JmJj$~G_=Edy+e69mZ=yXWOFn{l>~3wL$RgU!8g9TTOTM zA@11txbo34M1tuA9iyAp3u(N*zP=l1%4%Z{Xn7^+qclF8;tjyTk?>-v&#uGHE@Y2s zu*0`+Io+xZr*uDcNH5o>jwX0fk$6R3k^RFt$q}=>~5L=mRR<%m6*r4 zo^hc&y}2iF<$ z{FNV-_6*>CVKA0y7;#R$D(vy&sYWbefg1l~asR{LqUGXD7cM++Y?S$6KRUF$yD?&! zF%93Xoymged_PtvOf-tP{JLvU>q*J2UD)`PhC3!E1_Mv)s&j%*^PG#7kBN&5f|z6q zS#I(BYn&%%7;=|Ne1CP)|5+X+ff)nnJM)rP{|WVFRg;NYFYSan*5O=qgT80pM1wBu zAqG(ag~h2VcWC-3@o1U)|}E^nw=XO8$6nPGU3gHW1eO!S<#8AzN+p z^x)*=XYYLPmggm0pcMOqbyn z;h(i}28vtZM@PP~sj1xX6mG~tp}kgTZxN!9PadJTeDBrSOL~=d$q0*x{m(ZeC%y8_ zM94$%t7Q>{74ju~Xu&8IHFaoYWF)wne?*De7_0{#f+nW&g}%N%+2`klTVND9BTVPd za{)%++}hqAwD;Lvqd{80NeAt-<)bV2{10~5`5%0+dM$Ob{H_ES*=(gXj@RvGlaC@q zW+2fH+C@i4)4qE3>LILxcAfFX5CJ+;6zz+Q79ahNXYSvv^4_t*l4)sewVp6mPze7; zck$xIhY;+v6LYR-_i{SBxU`-eAL1d8mj|wNt>|rg@fp@=A{4OI{j0lM-}p(8hdaxI z+KG8!0IZyxZwLJ1hetm?&W08G`0;xAaM_=Ix78nNA)%qxQ59w#R9G@cVAji(kaQy9 zJ8|#eQo0LZd>U$MHh{Kwz=7`MsiqG+a$TP=l-Ov*Sw5*8XvRNtJ{;l<@x_q#?(TCF z6B9GPt0vDOs8@zk+&Vg551?IJTVs`!G$g#(@|-~w7r6>fS^i$?Xrnm`LKQ17ugdt# z#Kg-0@!8G`S!KS|^STGwz3=Gcb0`VS4l?33=|lirB#w@bJDVfrmb^^cKgu3qW0J&N za%+lU;juB--B*(=CIKTxLL@WrD>M*<nN^jJ*lj zApStSx3?F@;xvVU4XCgkCWhE})w+)zg_3R!hwBZ-5H<|_?3Ny6rdjUq@6S{W8&NYc zF+GF@uqe?x+SZX;0F-6ZeFy=Z$iu_qs{fIvoBp?#ysg3HQm8oyw=oKsVBYGKyz;JN z7O_hLaCsISa9PUB%L_#&=e`;04d;S)P%<*|{H(NpSH1ftCnuuDZN;WIq1>_ub`6H% z=#J)=-W+AxuD{o$~6{t0~9v*XYCi5A9DE)e)cW0+nwC~5g5|dO9Sc?hjWK9Uh}5eAyoW-79*ON| z7N6p_M7lT-nP?`-soB|VDKb)0+3^}T-Y5&62&$stBK_gjYM>HR)eEnhAvFzlet~rJ z1Ty4oPNET4MSsJsy#8eKXoy~US7sqg8w0|EY+uw9{<3s413om*R50~xR`9~>v^ z)#X&k=&#{|Sugi6IM%97KYB0=iG)td_d4LVfrI`!9}fiYSWpFg5*&$EN=nLeIX_S6 z@C>t5tRs`fY+}3?v>_NJA$Jw0tQHC!`@vT35F<={lTSM8=bSLBh$l2 zk9b8$uqp?e0drpjvWqzFZJ3%_SUdzQ3L4S zq!H=uj&!gTP=j?!FJp>9qL$VoOVl z$xxvll9@Z2Ds1~?anTATIFyu>RzrpKDR3p=XqMUL^@|rTD9@fXb7unygl}wYECxJf z#KGDu$*4qBc{5RNWY|PN*Vi9NfBUy@8uIeR4=RX(C_no0tZaD?NSj65(8>NqI{ccy^dU?JrVIl>v1&d%>!PrPs5yeX1c0vzq*<5{`?(ZmVN-^ zVEyA=_w&dZpa{G*Ua!jkWZ&}7?@?Nro5qQ8-PK#3C;JP%V0OR&zdg)g)?YS70n+?e z8ZZSiSTRo!lLqUp;a?wpcKOJdrR)Z~HUXwk+|p$f%Qp!5RwK2zG6Gim^Q^Zu*#bKvIFC{In~Gb{yOz$oc`7J?P3Z4^pHuwS^W`4_o&gS?nw~b*mZzqs?kY~0uZu=m*;YH-vWoNW+Bhmz0XB@y z$cTnSQ(){`m_MtisJOS;#@t(Bs{@9rcj%54e_W+OMF$y5@bE5&N(wHrypS~(R^;`3 zhRfL=tVrmmehH6sYfkikT0Ea|qv3p8Vm>(iuc6h#SX48l?EhB%-P{bM zxOJ=_ec^`ocI~wlg8TO^As{}5m@5mh!LsKSh$`|{R-Az7DQRiL00C4ud`gxZ2kI@% zty$$T{=GCuApRS*+U{c(RGBdno88Zr<5#Sjj{yGly8jEtDDv9awA zC<FaX-~pu#3}@SV=AW56N9P_bbo@PI7E7aR%NPsH7prvTjr^;+>35+zkw|vAV6gj z?~WIKAj)Gwmip{5=@$mbZ(`N!B5tMQBpKw`t}0pEfnq}jP_8}$qX>mKVhTYOFqJ3Z zcctSx2<($i9v*EF8%l_t@TR*|_5lf9%#QvJBLMC&w(6z7{Mz|yL{yZVre?f&djZ4? zPf+?$P!9T!zMBhB4iN(b1E=3Vh07P>dFqRX!$3B$x~7?vep@L9y)0z)y>WHt-ONw3 zNXYQ4z$zeD58B;RRxUrFd)Xb`(jq$sgI|6G+N#JDF()BY!HnSj;%}9V`t*Mu<=>V< zRNkrGfpL?dVj=1h5N>Gv4*mp2N0W>w(g<9!eUtNExs=Hic9 zFBeGnGfD@eS9Y}bk1RTqOv|Jtkq=hA*fKW_`IkJ+^V=&r{xEX$LD=Kt<4Z}tf^2Kk z&eOd-Xh~c+Fs7WZ{wII4fe-?~^b(ncy3sCRi$>d$x?BfQNm0;BLs5B>fQ+#b5*rHS z+zyKkKMl|+RCm9N{Wku|Lpl9k=infXVU6oeGc$H%3X}#l^7vP;9JKZ750<&RNVIYx zRD+^01L*G3nxOltE~?|Al)+)_qneF|y%$QX_4M>|RPRebMj^bo0omiVMxLsLbfv4n zt83=OpgmAfQkwRk$>QR??6jG>-f*&0;>=tQ@k&8ixfz0h@F;{b$L%tdcL2P3_~jY> zxm5mhdQ`!3$;->X>v}!3JX*yMX-S^(x)TELE469iKU@+ukg^wT4swG;z1DGnRe%8kcu|O#)V3ov72NmYrW%&}aN`-{4NM z3h#ptG9av>x>1);M|*oCz~O<|Ci4qu5G^G62K-S-(B{W&uVTHSwBR8+0TToSY6RRm zGQigSEHL2b$`YbX-isFp3iYTXY5CZI_`i3Z>fUrbAHwr%2&A$&{a;|KsChHsC>Fp#M%6RZVc|=pFw3 z9SJPWr0SaOD`5ILBxs4uz~Fm{u{63>>vg+KsN{Y{ zBUf|TcGL!yRyUf-(I^9q^FqMUCIKoOK&nhqd88Hh+-;jY z`70{%=lg9mLBPJ3d8`|eo);p7Iw#eFY{1FKzn)S#8WG8l%yakccQSGLi(1a*{GN5FZS`Eiv%~piJrMP@QkIInX%o{%*MVo?V zWi?!U;h7v8D4ao1h@9>cos=w z3bUd_*GfK}6M^C|VRgJH4D8XYt_xEcE*$&-(c3xOXP^seqEuR4e z$n!N;y;-3sfA+33f>DNBX838NmAVv4eJwHKJ>VQ!kTO1h`=$)uDvt{IN1(V^uvy#h zOMp9YJt4cm1+vyf&{tbPI&6jTZO?>P_|bX(DNKR_Vj2xN#^>Q-9f`j`9{oe{*+vU5 z;1!~p2Bd@c2By|;#tXFG$%3cIf>JAdS00O2)OXJbOGc3R$DbXj+<@y*BeLLXLZb>7+D256S28eG+cy#pes*dm6hpfH*tawXMNm7n^1~c9RB#N}|&y4_! zB?)d~ff^nsDBU)jjCY8#F&@8vUXqJqyaHkBl7yt>ozs)!)@qjpp$1!_^Q~a2sFoDk zWI~ZH?ndSFclFBSvre14+I^F9VquPBA777502VEeZ5cx4nhf8q2&m`P0hG&v8aOIs z%z%8|Tbl$Ru|M>A^I7r5}fBEgtmgZ0T zAF4npFan@#L58woHl_gzh9ECGh66-J?GWB-vH8jgIM(L{o{c@4;{R{Yfuyl ze1&(wM;rFMHU5f*5L>PCc67keZG?iq;pn}E4_E;%qH>hWeS{RG14$OL86brGkgG*K zO%>~63*}ItTdnnP%$UKz8;^G$ess8SSt8tEVNB-TPM0y)*8;Hf>B7fT{1`(m=jv>vkua zL5*5lWEJz1pL|P{QJyRb2%DcaN zeEBC+3wYn1PxvjnRIc_jI%g2SKrn~-rnU6YpMU}$5M6U#=6s-^dzaGZk^3QI>kmz> z4yT`)(qUOn35(d5I(O9@anFWb3(kKfX1;X%qvj#u_#;2W#o4(tmz$IC`gL8w7anej zX=%^E8r65L0V3vf2WzDm2OJO1q?v*Wn4y#&*L2R>+S**zuor4)kuO@b35bY@QiQMY zS`Qbei+f!ePhG2U`59|go2xO10urTkY?YE>oza&v^3{3RGH5$Zf%mHCO=zZJr^m;j zO*L`TF`tPA&OcXBP{4rF-2Z%$jg{3|`xYTceiva>Kh+jV5pEDbnhNlmq3vdIf*M*s z04H3pp(ATTN*Mt}QPR`rcRSKBGdm^^p=|5z9Ki97?O?QW)T*qfx6yD zmjyB$7)6#%(5@pH_C>V2=bsC#tQ`UfJNhFjdjEg_W21BUvuCn^vC%rJJZ&fh&2|d| z;$rNN@uZZLYCFw`qJsZSv(FS!kY&&Rp1`Emnib%FfaC@Z7*g zHcDQ%eUwkV{!vKQ>|Q5AMX9*<;H#lLZD9E7Gb|R$*pRzzr-Oh0I+-Yjw^P!g_cG5V|B^tlmqjs9%C?sCWE;~nvb}q5IE;tk) zocewVko|X57yGMEld#z1$kDkICF5hf+7BynZNd-0v__@$qbY`(4xU_Emv07u&-Z* zt+Bf;5e4>Cn+u4AnOP>_mc66x9$u*i5Nz(glAs6N^Erb0GO7)rv`nYiKyA0$b%`p9 zSvm@>D*-x%s&9qPW<)q6$Bk%u8P;U26{OU-!5< z8^Eb_S{AVZWv=L!a`HXC%;l;0?> zf>eryR5?z{xw|(xD*#Dvg_7NOs5?_dC#R=BhlzHPQ>wYex87utRd@NR)+p*YA?xoi zg^hzFJc@&Z6PuJoh@b$=$ZMVkd=jmPGoS7e9334gN&Gt6-@aw$w>owP9-J3~SV#m@%5LEbh_wHTFZw<86Y>royGraP8c!fLL(=~c;O%prUK^%`c zn8E*e>ajd+Si8eVDnfW)C3$987{RUEw<&38pxy+v#M07I;tMyfU891XmIGOx+m+F| zcJhR4u`T&!LSFSnDzQ&?;DK=uD!P%}yu4YM&+;sYJ!ukNRM2h%Ix8hwCx@DC7EtQN zpB3CM`HiEvb&5@Z>(F?Q1Hyvf@gKjY@ne{i*(Dgdf6DB#bgpLHxGgZ};`?vM}eIgo+8{o z%H$ZtPTl{8c||JLmkx;=cqV6YQ$ZD?K*w_|6EWD~1yk?IN79^eL=aG%A0fJAt2m1i zcO8Aa7>N|AdI$U<2B7Y&PHl(JrkE^(#gKf=fkwBulvoYZm^aT-C^rfZ(J=AvJAaTr zHOnbFOv8>)5Ftgr1^OZBVX;^&icgLG!{@#rF>k))3n*yAJK$Lgn&F~1|NCGC^;y~P z?A#Ve1iY-2@j79PmlLa191RS7j1GB;LlAhK-1OQ72dT?J9QjFiahhxHv0wOSjy!}5 zx7d=Ma3#NDW@qpC`eChnnYtqkhEcSqpwM)eP%$VQ^PlGs z|L{87u)Zm!@cz36qPH1g){H(Kt|moc<0lba^h&b$^t^EQ-$melwP%7Tycc*+&JsI- zKJ2Nb)OD&PGt)UjPu6_&c{B~bA78VI3$<2Rn7Yim58*$bX8ox9E@8(lr?Ue8c~hH@ zmt6KE;#+2x21FqW4y1^pO>1hsCmXw7(vTlVeZ<^9V<6Am=44uj57o1}hIxiWy!r1{ zffbx~B!>OI#CBIiCi>!-Hx?qqNie-k&)T-)64@v*ddrQ)b`)iS+8+6Eojntc~izmg@wp)&C5id~eH>mYqnK z7#TTDP~d~nI;0WAu$x`!?^+d&H9(i)Iyp;6IOLLlR-8#DE;i>uH|!5Bx<7n)^4F-& zarj#dUXCNdX>^`?(AXWU(#XyhsnQ5`ppIIO0m+XD0UQOY_nO~E2x9(uhD{p&6Em|1 z>uxnc0oBB0|32p_0j4u3hBV&0xF3rzl#=D0)PJr|{(($$<*c>~Z8}+TC>F~nX^IZ` zOR>xMY-#9ptgPynxA+(il}1QZZ$86%adgUI-FV9)>;6Ur>o$Q7tq2x1Iste+tCbvP z?kL{d@h3q{uuS+I`f3X)^2}h^F8Qx%0es>Yh^aA*$Q=}EunR+=roU45N|m63D2e(e z^}i3TC1-IAnmfY1D~N$Xz#ujMS)?jv6m$OfsT5d`>B-UlU#@H8kK%h{9ExE_B;vhGW+fm}6bxaR$jRaV zta6O*kv<)p*#=p#Wv=4|s!ya{e~b+kqubEI;sk{Zq_NU|6vf&=1jb&q=K6$Ftyfo{ zwLs{RINbOpKEvz8NeT}fqWEO)pt%<$HtzqCOc9gKnO3x>PzF7Yof*>15Q3JI`NW_NI!(9%NqGt(?e#T321`#6*0R+2pqCeg?{zw^=3j`!lu!B?*H<2cW3 z@M9ouhAr(fno|(UH&S=BS_@*Jb5xe$_?}~RK51sa%TMGx^p*io3V-^v&o27u(_6Z_x}i}QC@BM>yNHE@ zBllL@#Wpb1D=Lk0VDW)jUWm%&Aa|3%@be3o9(%xyBVZoX#fbbBg;>=16RA5N@S<(no%rfyY;*D)nWQfAI zuWlw0;q4P>l!$i%7#SC_r0yj3B&ZByt>+cRLq$14k7nF5U1piT!L0P0JM}C$Z*ehb z8V`XqUJ9w={vlGJ-1Jxs*V#We8xxs=Nd^c>1o0M@dEqrKX3c>S@9vg>A(3ncl6wtM zHZhtPdRZvCaTK=hQ;(;RvZP`n%+PR`Yk^ds6Ht8fmWYlK_ZIsVG|pYZdF#P{oyK|V z2SH3)#uDdLiV~941?cYXf0Oei>a4p@QoA5AW&Xh$!1~WS=NoUbbl- zMn4%f>e9q8ZUYkergHiu1tv6c+AzzILlE{#D#k19U%G$0XHX^_ayZQ5kC%E1X8Reo z*FGtBM|E;BF1FJ(D^~ieun|$%?o3~wf1mnQ;PB{NOj^2t9tqu)g314RQ|hY(2EmbW z_P3!H_&_g%W`|%jJGhFIMt>fZdB1&~A|x$UobZhR&fB~A>E4Ne{PoaenJf~zL?VxE zIe!gDp}T0$HNby@3Bl;okC1Vmp%lYLH?hOj%0^H?5Hq*>R1S;w4T58{h=qvuyD)08 zv%VLEB@unW@OnZ;?@S22jI%?ou`kkSdx6AyiR4ynnx%fTF%b$rh{$vh9@m@qrhTu+W0yt3|gN(oog6+)Nq1O{Kq7- zpcYTmoA2^E$I^w16rN{`g>#3#aWq-XfGA9X3SHq`+bph?*I+W1mJimkGAk1a3|#R> z9nG(iD6r$8wh@@cF_kq)0oLmxc+M8v-3>kuYvL}r%D?yR@ncqA$1y$?#lpcfc-$0p zx&{-7gT-JyYj*1+6IVx;C$#{Ay9cpfj4DZi_@9SzKP)2xU?sk%_!=(Lvc#T+&mW7w zOW3H^$R7QW`XQpdKsfYx}QkTbLX{KOYww^1POY5@cwSl#2*9!WoNpr&8 zZS`xud>oXSP2*aI3 zX9l|mZ4Ai9VTBMue0aMQPZ1ue0FEj8UHqp?iy226?NshsSy81;iCg?IN5;Xw^5FY78VKBYOh<1CZCd z{^~(lDMiQ{?R|&Z6{Cdb{cT^+5>e=dmb;*>5_+Rbbv8Si?j9m36ND@K=kPHe?^g}k zSwC@FU!p)ZcECBnUFHUWgC@ns20{U17K(dSdq+@YMH?W{;|G^j(`3;02((I7?R;lE z4?3K#yDgucnVCV4!5B1j$2y|aDbgY2rZ5S1qisGZ(xI3Thc(CP$tng3_9exZQt&0`XfizLdqgmokf~i{lEwhVoOJ-c_JO_Wpriw=H}+6 zKtx)Evp5z$a4HO~aUhM*v^vPb2l^ZcL^2>A|Eq04Me5y&5ESB;ClIabI0t)3Vt|Rn z^0TZ>I&U}r3{`0q+n0L-p^%-`a{dF>6v)D88w+$Hcm0Jwpd$(!DK@HmiWWSfTe~eg zTIwl!UJRNPxi^!bP}7)x!($)=4zF#)S*`AbH_$qm&rK;gb1)J9tuSs1Z+A!&!J(s|K%n_ecr;Hz zN$J`*;-6gqQ#L7#!w+0^t;q^x@mTx$Nmo{@_=8#_^8H7{lyXn?8`m2NAiQ;1pc)`Vnv$!<r}Sz6kEmu>DUkNvk2mwTYIo!Rv-J~CbbwF8c_RJA8*Pk96!MGLrNy< zN!T1Qb7o)9F%>KHbT?-xTL0yx zQ={RwJu85GtUfo!w6(R57vBC4_yH7pG-hZ>abBGPaax_>B}jbo);U3#m<|~O?B&&o zp%%LYLnwMbRK4EULXK>>Fai|f8!|%-2N4L5vfU_CR_+RlccS)v8zGHM(ZPN7oExmi{ zDaGJ2JFl%n(1qJJaA4#l)+`LLy}a5ue-U}+-|7#AwFUnL`$pD&h)*S9NNOZmfYH6S zR)!^a9vZd5G7BXBJykd>Z{#>jcp4FoOqTkdcXhUHdC;Y4|D#E_e>U^S`*Wp1kUADn zG;iLeLV+B7Aj3sgPhWI3&M`jK#=c_DJ2ivF0!XJD$3n(>hLn z(jd|@;L0kn>`G*=no(&SK~p`TiGS@)_`)<+W?2KR%_7Om5Ffv0BF4RKIW5}fZU9CKrXB_YlO9X~OOO)MLa?%n?z5m3@t<<_vvf zn#3a_A1XCBbc@ZE+eLEjxpd9idaPk1ulCw*K0Ca2ZdBFTiBOAKTLst9?yQqLlMePp zXATr?43S}DVG=(HH~oq*+%)^-jbLm&h*L0*=5YQztH(J?ra-E2>2%r1?#f3yK-g(0 zUD6%tG9eb#p>=gw4dXbOKrpRp3qoD>c1Z5BtZpyv>Sr*wynob7@=TXfQ_}DPMVXsO z(hJ9y^$2T@wugiYgJBhHQz}=%v_Us}iL-#^O17->J>{&)#T+6!WMabk!ErM6csgu!+$&x^ir`P&Ow}bO_EY^j>u*P&cOvS-tnkv(0PpLSwLfVP zbVr}S+P9@0nS49N!Hafbv4^OXQD(_(Y&jDc&s})n3!;?u<>J!@0S4UsEjl{@V*6&)bt_ zZR*YcGx|b<(Yt3elv?zrv*O`j6{dJllfXwKg1S@p>)K!$I0e*x|C!gDtN4{9Pg38J z_^Jdo_GFzDzM-PhI-@^~U95F|cfRY{D%TZq$`NLCVM?7PHI)yfC_+YXm@%ogO;e~n5DPk+Tf_@@zqX>gb%XLvSKl!xXa7(2U&77ZK0BYq^-pDXr*~>U?TC(5r$&0+=+-_84d;rj zaBEP+h$2&+6$4y6XDCOg(V6@t)h5C+`Gj9l>Zwam{+s!D<3?gnmXB&Li^&RCKputk zqHAL-1HFRMw}Q`cl(BWN>DD9O)I7>6FJc>Ww?9Lvuiqls1ut? z45gN3Uv07RF&1t%Q$=n1N%o@lA~)P)--X7^(v!UCt9*uW)V)r%_kb9+dY~+dC2=#- zh>UFX;YPdP$t5Z2!XJw!tn!>aj7d!u#P~t%Z=Bc#Vg-TQ^%WG*jN15kX?6uha4C9e zHvz`@kImx;YFdy{Y;5yMBAwme+Ey4;O~%W=)D`S2|`$`Rf_-D7=+|BV6bs7 zpMQJ(;CGW;g_7UNEb~dDM;?{H%IGB5enLc{W+3TQ#SJLN%#pq!LuO}Z(cdM2YI4v% z1l_9TDDz-1;KW`X&Sg4Jn@atC;AzGT*DhX!oo1{QW_WFE{;PSnu0S7=;=&oKDZ6PivbGN7Kc1~@6jH) zz$O#4C6A0r!fI2utgH-b@K&JTY0;*xT}@yZ8) zYdPiu8V6UFgXmhhJE*6V}x_sPFab*1NZI{+FW3>Qhr4 ztDIvVjVo{;ur7uFI^D8Blpn08gcmU9ok1KYY9rwE6LhuPR1)ChvtGSQiJ&Jg>|lAp z6z)Km4z%|Wpr_z2F}c9hpsyk70NNsqbJhb3w7ukRy&PU>W^P$5zUX=KI<7%ztjiSP zBVLZ#VDC4r*68U{}g*a*$mNsre06 zcsP}KjCZ$W_%^h0*zC7IUL7Q$K=411e8IRkp#b(o+F?pJ&1(``5VpSKVs2@E4fX7L zt$f{oUnI)7{`uy3%F7zZ-m^)SeutY^t&~%|MQKljJyqBXqW<5RPufBky=JSBz@_}x z9?f5LY99DTsWfg0oT;_jBYRNrXI0|eu2#d*58a(JYsD#TS3F$oZy=Q8f>6PR+MePvu#dkNKIw0j#)PM{L|P@TxOl10>xsFO3(eu5^Yy_k|YZ_tal4kYGn z4O$ohrCjxuLJHUjkde?@}A3my=)oS#%B4sNIlv(VS0Z+x+NMPW|7Q1vy}0rK;` zgub~-kpF^%YN&rkIAh*}T`V{&e+P~}475Rk8xMKLBzYb^Hwq{D;fTyL=t7%;=1r^q z+%xWLV-av*mblIVI@O`uB{rjnk`bcJI>Wi7nW4_{^0$Z^Vkoq92E}opj$K@wWpV&r6X)GBfFe!9uU3I$ zkzb&uvI$L;T6=CIxT}lHIXHenFg z?!)wTn*A;6-t6RqQ+{&eTkcE^gw$H{@H=AI3u2t3LJE1#9M_h*8_>G>X(34`8U0DS z$Mk7W@Aa;xTE<;E|F?teT;9BDq(rYS46MfDHc zN#Rq9F+muA#ioPp_2&fnkZ-I}Zr^(EG2K}&LYIac!P|<~3SD0Z&5r%6maY>?(%N*5$B zIbwc0QLVqlSps5W)5C^E(;7!l#eT<_{|mDbjCvU)eA}yO&|_*uS2xqR@JwyIGqzS>#ZAh=oF9B(y2BmviG!+-*;b z5lR)OTr(E#zSF^SC-p_<)5P#~)k`Li>?9n@rKq$R?65hk#hjUBGxm^wn2TZz>xHs^H$kPKt%Yb1-lDfO7){?5zx#qEjA z_sF;L-|Xopd%A32@%7-s>fxJpy(7DO4W->aMW1TkMUuVnnT%ezA*v}DkV~IZ4}!!} zLNVPBsUI`zr>~Np%oFfE98O;On3@_bDw0;JT;1UgUSsf+F{szFgpKVJC-RKD^B~zq zcl6y@iG2!|opzpy&_Tl7UB1)*AAD zn_%s{{!3TjLwf7A>a!00pY){c1FC%)hItEVctRR~KVvkgjg7KSy*Dy89K)4S20E+D zH@aJC$uIIwr1|t1kfF_9wo;2LF0JuRLdfda4|2rmL{8j!J96S)RjAKZoB1A#27Q|P z8|j#UwwBqhuDsPPZ`r{M1c5OvTvD_tlKbaGC_jB%oLbHj{KlAG?@geBV`Ejx`*VlD z`8l@ouHpSE=sx?Ba+O@mFEC&Bi;@kt$eBtehvhJc-$#FS3Tk-!&7$g+MJupOzOiHw z|Ha~|@e6q}@TK9=%;Tdi`|{(cvlh6|<**W;J`=xk+gbduk!mD}{$F-~zju&gb-p=o zG*jg`#WQ>Ocv(dG=56;^l(ddElhbrRj#0B+L15neHwnYbw>TZnwi>2tRHnVD+OU-hzw>mxuY{}lKF+;cADTqXPRDI za>rQ4%FM%GtAg8eY3nW}c!nBh9c`_%3-8`y62TXsquq!g&QngH#mqdM8a`jxX=|ED z7Rbo^IU+>*h}^-|SqAc85}UbxBNEgoH=^9AznFQ)RGY!u7H)5w+01 zvy5&DH(L76<=U**flcSG+eQ1;sIlWQbemeF+45*A#N*Axa$g=u5vt5W{tv3&I;^TE z>igb@F6jp8k_Ks{5kWvgq)S>FrCX5hMrjZM>F#b(8V=o}(jg)F%=z8#eZ9}~7njIB zXYbiFYu2pzexlDi&4X<06d^)B*L!**MV{-a6uT!W{5-{|C}g8?KfXkvyji5-(JYqjH_! zj?u-$f-E2*qs!0IUF@dkTgC+S#&)6Z73u0#ZL^JP3wk=7v9caij0jyXu@JZYi=s~) zS!74!O?Gq(vZ;)Rn8MwWsF2fH*c%Bd;Vm?m+8M|?JLQ)-YH@4M^YudQ3O$&1d5MG^ z;P(M{`Q!!xZClkZGXHRdWHz<>I5+Ip(+`OSnmO*Iph3U0Y2s)eet7uC-^0fQk}jx^ zL7~%wFnd^;posMT6coare1T8)7~`C%29t(#p$vO>&YLTgl|?Vom&U8ybE8%srrVtt zrt5H4$kU_H#ljL3BP_T#6A!OtML8nnXX~ihaRcEKwuxu(&!=2VTYW~Av5v1vZL!zx zL8>;y+{bMG?|P8=SZh6=7>k%3;U|j?HJ(|4*E$@9KiMyh)y=`&0>;J}LF;a(L zGr|<{CFx;`eVi5iyc)&#+1cah(9x3Gm2w5#?gJ8CcdlDR&^sN<>b=(53OAuvWS!lt zvkf7d&JUGw+Ggu;sg0;!GTx*9v&-$@PYp{Nv1po71Xan;J59{HKNZm58di51Yjhfx zhz0(t5cqa5mgajLsuhuDvbrQUuF5iPX)4A-dAzuImG{n;+LiKxHswWWXm#OsZYJu{ z57nZo(k(12?nRE~jy+q(yVg#e`?~|qPy4t}&Q~O(1Kb{S@&_ey2BNG}?V!l^aZcEG zu~RQ%U*HTAO#7Owd9g)R6lh9xtSo%0V8i&?O&x3_I5m_O*kBeUh@UEJHDGc&s)%xRqr9L=A7TlRG9 zV6Eyr3=8VYPAny#7hm4IVFGhP*aF66M(Xjf>o*ZMdkR)oqG}CX(oL6)iF}EHcO^Ha z{xY*=Ymc_gSEod$aUpW~mto~D1;_NmN{8NGVceH)x7E>?tVwTceeBA$OZ$+9Fr`P^kZl9 z{%^;T`Zso`kL=@>LZK%{UH?R_$}$CIt5n#a<;s}o#YRCQ=Dykf?@R?-?$m>MCG)XA zTb~<^eEvpUt@ADNsWw|}&Y%@5SREa?g5A0O zIfpGTs`g4*pS_-We|q_|m}dOahD67@+9xJz3tKTRz^!&qC})qOx0XNR(Zs-^r0561 zRUK2UL)SLp&8zW;x_d}g9e)=|9gpWSH$~D)b2dfrl=rP`pLy@~>fh?ZhC*@X^sAZa zGEjP8LyG*eMH+c}Oy6y}gqE&$tYr571YD%8hhYo;&b006YvZ(;W`D0}g-R#>WgoW& zBK#l)44YZ%`8HB&etz}qio-o~wOOx}KbeX3&G5h&8izIB+s5iaip}JGOPdM8n&5!!; z-&Wk?P?Q?7nRY&l-OSY}jVE=Iei~CHe0(S=8673qlioRoYush+Jdk|13lnr48zF8A z(Nh)_Uz$5is7DhLT&&(79BIn4{zS)}N@w=)%8t$qt@OwHf}eszBSPX_=o*D2$9W=?E2!n)U8Zp_xy1y(ssWto}I< z_c;GOxh(Rw18q!Y?-$`|$M2zJ9|Aav??u9E5q*td0*NVNV;7qtB|a|p!gU?8b# zZjW0>-+oAn^@4P3?N@IrcR+rH6N6Xh#nn zbW2%xk+wUP!H$rUs-HUg)A4na>mXu>2y3v)(eLC9H?A*-uPWPjzPdfPYsDCO^$vUHF^^{+}oPe1V{Wvsj7QyKsLktm7T$w8~! zQ`y5~67-{$7alIBil0pJI?W$m(^ok(`#$XWXJ1U=NX zI{R>NAGM9b1n}zLww~?kzW`0wep~3jt<)uovU~hG#KGe0nDLax6 z-pZdiyhoDfJte{8QXH7dGzso+PRikdN7&M&0p>@ZFg8V`*NLW{Be^0RS^C`sADkF| zjY{J?e9%_y%^pmn8n>T(y_`7#1!B*rdM5sPU7Bo8Qa^XH$$aCs?^3zgUR#cWJxiSu zop9Y-w7S`u@A)*9c~RN9rUrM!BlVl(W>iBW&c>cWYwyST>xi^5R;%a!tiESkY|*=E z^v#k<`}jmGL>CX?mffGH)(+_BD=hV;qSTxujU!m^-a$WS4(4RuW6zB$g;K<$#?$%A zDbdNRRQ>$0h{fpcGf;y)mxg|+Q{aQasaT4{ZbGC+= zr2a0OAZ?R$;Qh3pNr$cXC+CxPdC-sUb#*;K+>}@5`ve{}iUmWcoHG<5JHUE44H3)M ziHpP%^_8Y*3$Eocw;CNvpqOSD>1}oUp9}l*^_kyRKD!9iC_2_X;OVH5m|s=q4E;>_ zXsfswH*SuA@s)Yprici2W|-SoFxg#{J#d(rDE-D3^-5QtYHT|g7m=pKko=zAj-@18mFA<#=u(IaOYgp_^T*iu>OQp@W^ohfj^^+^5 zjo^-U6nzrMzRQkgmZ`W2*PbrBEv26mlN=i}GKa2b?*_+i9OGe#8%<7S{ufL#yOgLp zl;F`Zy9jE&X)tl_A3xH0yWB_^(p2|u3sjHpjp z^nOKt(O4kjl4rcdPe`^M!#t8Q zxwfH*D(~NRZvG5B!X%ClN{<9pg#`5XdH)F7)GfRo|LQj|a(r}E*-}kmmDH9}4t^>r zl*124&4@_ZBn`LS?TY&TxEw?-YI^1V{bZv3q7dn5RA#>EoqEIJgI?)^ncG`p)pwyZ zH=Db(Mk8%{P^A#>+1*~|TB}u>_&r(0%acU{`7&uz>3k!NZVT*_ijfwUJoN-`(?0f< z?BGwS_HRn`-@ACL5!5*E&_(a1aO=(%$hGYqa|wx)I!VObq|I&?v>)K;x;!F#$QY|T zLb;1&A*!brh7G6W+xY3k`)=NN{uEhOSYXiQ^7?9XD8QxNeqxRBS82lihWC?;KLnM7 zmaGh<84p!eHe{2Ea~oz3j!KsegM!Gr6rX)97U2k*YS)F}k2_Usm+AF#67P-58DQz) zTF%Rz^i{Yr{yqViq0=YeMPGjMA_pVqxqeE!bZ9`s-@*B++8zf#;P1V#;Od!vkxQlD zQD{Zb{^t#(DZ;<$iHDAbsW#(g!@a%ZJdZU_maHoU*2YX|P@x;U+E&xLu#i-*5xe${JPlsdCJQew9%qi=@!kxOhZHa+K za_(pQ5{~$A)f_dF%v^D9K_q!Yqf;CG<>yRi#4HECz9!+0Knek`gtYFv_L z$UU$_4=o?+T@-1){YcZL$^Fkp?yLIa-G%)OdWkpq<{r-e;CL0}Omy3U!m#D*z4j#a zP%J9n1c$(tIGkxNtiPG#!SAeua0dqAh5HK8$eA#nwXjq|sz=A((*_c?^476CD=lPH z-HH*bYX0vSg@{f1{PI8fuqgahY7RNmug@KFr)F#_WIF$6H%ZZ2PfA6NJ4(_zDA}vb z-TK(3@LK@%f~>TwX3(WVEFS%NNKe&A61->@f;578-wi^?gDDyOR@T9s4z>aD#pi~H zTt9&4LfjbovL|ql>4~!(8Pnh&KPT$KWd2o$wjPl|c*j2t{?D5^t9Rk8Xx7-?7}sWl z!y}&HJ=`2F!}FvVn=td+Wl5!bf<3N|-DI*WIj(Z-d6|}}O)>jp z;9XPM!WpmivJIz%I19?V5{(~!6J8vPSC~A5*KpQi;w{hpU>qIm!w@S^hNg>ADOy>q z0s@xys99B(UOyI_Poz!Z|5&T~!(B#+Eb0e6ekEs_v}yTMGwhjOm4`})@KMY@T1eMo zWBP`E{`26zmtIreGkx{J2n!mDb)3iP>1icRf8!0GIL*%}#Chj-q&Ikyou?W1eh(NK zjQO^l)V5rD!<; zLUnduNba~x9e$g_z?I_HD3aGteBFY5 zG#OYSs*L@hMD7^vR(ow84*WTgAr9F%kj_X9CbQSSbNW``{>o#V$Q^~*@Xy?$GZv3LrBBy9xYZPy}FlYZP zao8coVpq0CTgqfJZ2uXyrxiz3+~B*CS%x=R=V6XmNeuZRjkJ`Je^HQEa$MR?y}rzg zk`~&kU5$7YmEPuyc;tE z-I{h3H&QB6J|&F@+6L0HsdT;si)xZR94tnQq2b)CZlwQsDh8caJXRI)QjFy8qIUTX zH|1)s1sAs--|5zTPoNkhk8yp$=f(N>w>-D$Y~*GU=Ft*KW6D#*p^RrQv;_6`ZSXOr z8{BC!p7coS`Z42#*CwHNRMCWueBcYwG(Q4SpJRCaU^~qs8S1iT(1O$?!UgxqZbfDE zDN?S$dv!rE`!biR zAxE~kcc1<`h(GZhS-DW*nF}!yH}dDW$O)U-j(1fy*dMj*ShA;Jf`Y#|+pFRHNsP{U zu)9}1nmgg?0=q7Plqq0raQG|sCy=u~-aO@dur#`T8ictypnfE^Xf3tnMtSC^vj2n^ zxwvqRrZ$fHnvC}uPIaOi&AwkjWhWF$Xfh41xkvD`4O{C+bPe(Z(7flQdXIG;ZKp0a z`Y-CK4Q>$>0?uSq>byM>H;TBnR`gTW^WFmCdezh&M8M|p7u8n=OTT@)C2y{8);We9 z3`wPFNi$U<-rD=DRirrnI2Eqny45L)`XSWPdkT}Pl;6dUHa7ur+ZKED5vCs^ELO9DHqE? z8)RX}sJ|cUE=oYxgRTcLCQJExqku>lPX)JQ?YldVfRA+76HF8PdF2T{m$hs$arCgc zmhh1-CjN&U$&#c0C_j`5zTifys@9u7yvtesg>pE5DO;-A{Hictzhw^x+bKr-+h3ti zOZ`Y~i9gH(DktpE%RJlOiT1Nv$!iiGbvPPZYmG zB6EJ5>4U?eilh|banL((F|3O8$pRRUxtDKIrLN%~mo1n1CLuv1mOHqPqAr+Hw{ToU z$l)es71{Z`M&k73;idb^kD5WM6)uwA*2c=Y4#l;*-PX$`tA7=>m;D=AiAF(})!%UE z(V~3HDC+Hbc91K6w&1fQ9g|86iE?DVq6j}%`i|X&(2xQ8M%WjYp7@xk=_KTt>0bTca)d`Zi&B>fw18)YDHp7m4qVGs*rRL%!tSgY5(-Mwn z8{uZP*qmSFosWo6v-&)~rSk4r2yHaKdRBF7L9&u+gUu?aQ_|QH!eEknW8=joDeftA zLfzf7>ND?C^)#JJhs*LhE^(R}dn=4W?QDBYke z6wECUEi~Ovz$&|6&);fj^So9vW@(UmjQd#O>hQ(w;T`|*Pk4PFMoqGEAEeD^Brzu< zXB^E$xyAL5Gy{1-sH*0fINU?6e|9ydzhPDUzC6HIz&Mk?%Ut}x;XMDudsm77O415uPFfz#B)X^LuU?#Xb+{GH*}}o`=)* z$)6FqfY%08&-p>y7fQuM0pS6E#&O6{@w_9QD2-sSDTrPZGN13v9o)G_gGTRh2rpeRg%RENG zi{KZ-ENS&y+ma7a7C-kLbDN?gB6x!hTAzXK!mrVHO3Nd>YNfvJ?P3+lHNz;N` zJ@rWn4ZFnEOpd*M3tn&;nDYs9M6fVpsh4W~&ZG>>B>vSYNfGgzU4txvi;Ssa|BZ*U zqHXm+4PT+=Iw}~wvAU)cuREuhR^n(wm9LfLUFf8TJ$rb#YqTc!Y|Ysu$+(yxkWVVj z07hcDsH1}h$k=+`p23;n0YD@lp)iC18z4l-fTUy(5c$#2*IyXW*^%0#^>3p5M9Woq z)~*p>jI0%imNig1^3+~$!gf0RaZ|r>qDGWS%)xt}Y;ub6f0%2XofPjuoETXxdN;&_ z#$8`WHzCU=xzWEoTY|6?bO_#G{-e7|{L}xvHHJ&5HP<+YxXm&%?p6E!6?eMem!iG% z=ShJe*u^Nyhw&LJ?0F&KG~HJ7J9X9@m$wxb$`a6}@44z-PFWVi&%~6S=pL-cCBrm4 zeR_FptlMqfQ3}sh}VK#c%JuVkZpp!UzM5W}Ib-E@ zk~R7JBiSPW&Tiw^5B%86+WLVDGjsw7a)+-D;$=XUM1$phzmU-zV)WaG={K{C7Rwl`Bo zUW)uX7Omqr-;a4J_+}Loo18GX0<*s+y79-4I^f-0E@;eX@5?))C&A zPg!N;>JIE&5?7zR8aXt1+e`zZDE(#C?YzbBYD~YM>J5V7(@!dHfV_Kc>&-wH0}%TS zGX3`flc>skFg7r@8OU#^|FD90j%Gqn*XZHBTrry8 z7Bi{y?A!5h2mh1S9iN;<|F#S2Szi-am&dVDwm7;QVVm&Iw10{bzLZVLXW3m-i?W(B z>=i0|huSTXF(!P`Px=iBwfWUa*tJSUO|2gk%0uZta723cO8yD_)xV5S3V$?i??w;cg0>dz;y>#s(pEKaZHQOGtAd#gGnIcp70O zyKYlU#=%y9b6jN>#Up7vWg(}URD+Mwtg=vt;z2=)jxIA#$G4Z0&N7Xp-h(tGM=I?V zE%pG-_2l~i+J_{xf)|YV0fB!%cQHf3H&eg3!c_^A6dMe_r+9PF_*#<**rw#h3j|1! zt#+0n!G>A`mv^nXR^w)BpKY{IKmJxq#dlaB=jkagyd)&8dlE^PJn2}J13J0KZ3K|) zXdx|aZEs!F+rr{v41j#8N$oK$aU_HDEs%gJo84UAvyiThhJUY`*BMaBk zri6(7ouSs}nQqgbZ6TYt+qbuKMsHE$I$F^KtDco z)BHL1Kjfi7-!w3M-$^~wTOYkC@|%fs&ev?nhJ|YW4a{DGjCXY(rNEiB3iz^1{wfetOXNv~ke`WGk!A21n*xZwMQ5%HAHXzLg_mt**Xc zTA^g?n9bq2!iA1*tffo`OW1F&+WoqHi5UjoG?GfU8^;9&AtAIAfnHXl-9OOJ;tV)M zXl{EwJ+j24r1@XIpjp-a#StWT)nEU?fltPJcy$soXkGFu zEEnf&+2foGyn*s&isG-0W`E8J?Sab|_%b=9rkH0J>;5Dn;rBd!_n9%bf5Op^?bTQr zqofB9nQL0-&Bl#1H+ijnoibar@E{x9$*Zx|$Avq!HT85qy`QyS-+VKGjNG877vHW@ z`&doZ#ZmG;6q5$Jxw(mWO)ksifBuYujz=SJYm1N(3qn+^0Z})EjGW76jIbFHkYeNF z>Idu+(t*(Ow zlJ;Voog3Rsd&j}{vexTr()AOcDqOg?Tq^uk0Z!^G@~Ot)y$<TZ5F%fjWAUvgLLrN{tUv3BVb!=F&R&Gbg0IJ&9)m@CxP zB3e-Wb5Qy6kGeI3%0wN#uHPjR3*O#VH4lM*pOV4zBOwA$rvc}(+dL$YIhQ#`^uJeu?}xq&?7}cAC+ts%1Ecg8Vqrb90~=$5E$qLR7sxiRtxfGaV{3B4)I+lg zW_?fMSmZHl{lD}s7ifZcflzM79 z@(NFfDj9x-{NdQ=Z7LZ$`cbK@hLalWYZ%IB+8epnFKY+g2W7vw-?(_v{ct!|jeu|F zQMID_smgDy{b&cFpWj#Q^PQzWVHVhN^bOEAU51yNXHA+sHyH_^FbY}ON=du!9(??3 z3j%G zdRxn#i2wPlM}-O>Gn*UiRK;(^&%T+`O6W^?eUq*&T@y;KO>=TRTk?E{ylbvNLQs8K z)KexEG`}_>JP){+e6Az3TWUqD|{MdD_L$~vs8@{t#(}r#{5fERV8L zvLa#UDll{Gd}qlDTl>DdrB|G8xx;H+?O8I`uU{?xPfsc#KJ2#Kh`1zdhy$Z9kNVsK#nmMbA`@h6Ue@!LuM}RkyP=1mKXYM-exaQZxB|wSGp$Pl=*y%`jFs*C;UaE0&i}^zp!}3q0 z^Mb8dJjgNRoB1&zq3^Nfrw>u}))T>J+W73mbPig&6`)m$21FhoEk+bZN(hnCb>_*cM1kU_xDKVI$T17n>-% zZT~U3eoAhezVHs8HJa21_TZ4NmR~Mt?EN5sAYNU^5cv61MsnB7YwstM@WLpa5 zx3vtP8CQ^3kHWbnhS4<&hFteE zLedpGH%dWj!`-^n0LPN@7*&!_nF5Py#{4f{Z?t^dL+@B1?ZAN-OYi5wqDq|+6AFp;n&5&H;)8+ zU)XMK)avb&3(ZHoDAgEY;y0GqBo2U_|2^y4v7ZUc-VFTyeiRNeGJk&dQ!+t}@zM)& zUhjgvh+cOyy+10}lGTwbj!ZkbwA~zN35?M~`(m{ALpLegw5vea?czE;Xp+vDCcaIb zd{wH~JWGZF^Mk(CQ<{%fjN8$NjJn%uq*WpcTIE3_`YhZIh4A9>GIhgkj~A(Xh1_9M zO0Ihny45%wi8N_~F6Lt_*s^`b8gfr-)#_$F@spRvZgiHyoq zP`@+!r!UBILVnBo4+vqa@^y^&rPt=euU`*_Hku#hJ<#-)sXUH`eOF|_HWgTlYn@_T zkNv~Ud5}{vRZJ466tH+Bc$pwSI7_CrfmHEwj$7b!6=@fCv|D=~7KC10FKwuwVWM3o zt2Tnsr=##up}Z$_BTBbpw`}BT)kU1xa)OcFFRq9gWvMJl? z$bQS9oD}LC!ocr-x)In-vtyK1u$S*(B9}1JTBNYMP2-M>hhq*z{BkbL?bx0JRt^zsBwt11V>fuopduTN&7&1bkq(- z0mNPP-(qf)Q76r-DU}L6$}0h9(A*^6gET3}`!XhKuFCGN%<(}~B8Mmu;45klM?T3? zC~U%s?o0|%D1DnS;B8FY9Ju%m>2lOT+O6dA#O7ax!$lU4HyL&eP&|YaK0AtZUQ|C_ zAG)1;wU3sBvVr%I=s|H=Jw_@;oh`h6^|1)m{3Di|)5Rp3Yj&@d&NEXOyB*i>qy+NB zUFII_>x@b-Gh(n+K&;%eFpMQ64Cvn56r*BVxWK@>Eyzwi&t?JN(5?;<#5DYsEmrmz zq%uXcv|}h3m#f2-X(%G^_t^XEc1v=c>KH3nay3fmtOaK6rXyLHN34zGRB8!0&NE;7 zJ_OlBJy>xUDs|)$?NhBZvHR8A1|PkZ_CRjjamqm4+rY@)z~>Ke{EHnbA|PobUOIg# zY~5z4?ww(m>M$%<6yunUMWQ=!#Eo>=4FpztS{KK_6S`5RL3@d$VWOMYidfRjOG0#y zb8o|kKGa`ryFc$CYW(YNxrH`0xqIHm+1Y@|`HRH9!;~&xpuj%Amj=u zSQ+y_>-w%{lPu2kc?4zl_f(^R6%m4adBD{ z-_eCs+!eE+ofJUUkUFXQ$1m_YwQj!|Q)y_P<~`fT_T#yShU)vXK_$p+0|q~&O8Q9L zeVl~Hg(|e3%0nyJCSQd*?5Zm^P0Qlsw@l-YYN}4LT-?j_3kjo^Y?3Gl>^+UWXS^O|Gkp_zs}H52KG3JGYl06P1_zqME!UgFP-8vPgT= z_T*S@x;HDFp{?H!%^Zf4Dt+xjln^3gR`adJh3M%prEPuNB|NGoFo|j7=Ci&QC>Zn8 zRXOXvgENkzwN)pHL&(X{Ef7Y+$ zYW+fW|0rBqRLO*&|1h7hGX{qvvoQjTKtgc_+wvEQLL8@Q%3hxw2Vvl_i&&gG`6G?F zIr|H}$7h=5u8J|ld({ro2pjz(yJPl-uXS5 zxr`!KgcRuRo~#*PZx7~m*2z+iDdP%nX_ltg6HEpueQ@`x;46HW!{gsQMKclc9OM9H$AT-g zewui6bbo}Y$fF2kJqQFQ1}(72aX%Ow@O_(fyoLjv8QcN7v7rOFnKirZN=4m%sG?1V z)^2)Sg-bo0Q_tK7T`yNpqQd>ZyNGdm$$JOQP|}!YDZ~i-YtXgcA$+`Br3fMGtD%GR z70niFQ0S#HeE7j}%okJI0yh2T2rN$2`IWbP@ANe&$g5GELGd*b&p__zFgR14Rb1Tc zKKhsCT;3fi!{dPQyH?Nsu0P1YAvV5o9|aN+z3ws}z0iX5^w49QqHL<|<~uXuT9Lx} zx61v5wpl1gP5l{KOv54#g;-=zkbPQHA_?lgC94vU1(6P3|1+<~wX*k#>Q3$1UX$g4 zGN@oa%1ikPSozPyz($?UHW?74uoYMYZ9VokupCgrM0Cb#>a zVFu9=cTuk5B$7&7BB}cxp8k@gQ3-mPl*7Gd<-?&1d0kXas7Uw~Kvy9Q>9a%b_ShQ_ z^y5ASI%1oqN5&DN$ap>_M`lwPMJm>a!jg$P@Vjer3vT{(Z3nY&5H}@jbqp#j=|{4` zdGayDMw|_kDJVi4+qBJi*M(@yj_Qit_c^|%{4WzE0@W%d8Z+Sv&vx>2V~6pts184G z=*bH{b_yCPbHf>@?6o?2>ovQQv8J`57@g&6e@@m}(fgvy&f-86Qxqaqr19KWtu2wD zNad8x>PG-^Nl-DT=p&Y7qX()EviY}kXVNU;>XUpEfjg=CUpB-7A^cy3XiOR7U*KQ! z;Am`6a1{szYZwbqLLFk7dA^$`ymonc;(9Pxy(M>gp7Y6AVIEQ)5|ABOnz4 z8&pA^U>RkduyhC_R#lRh(W=I{xikOs^AF+0n|fSwA;zO9h6NnLK-f5`8w6V~d%Hd@ zlb*PW2{~{76hGSj^73+I!TfEjH?L?Xkv471ns<(+P*3iwO_f){@LMq^$|opmYS?d& zjyw3F(Tug4KfOk~1LwQHy>D-}V4nUpRaF)9pWiv~Fhd}Ju@P*tl6&-$pgKsuyk~bR z^uslOxcd-jfgy*F>Dl{tOTdR6$%^1+cYXf4;|xfvm;Lxmb-Jkie1Ug$rxpqCuA&cf z-dDyMqd?y?#?g7oxN(19(*NvW)|wXmmd6y25!zl7o-uyy6F@MgIK!TjFBYuH)to;p zsEUs=L0R?FsE(d6@GT~zHhe>xXNsa5{Pzd3V(T*f*?pJB!yKV}p}RT{ag@4l9nsyCA`rA*5%Fh!2|4qr@Yt z3J@lL1S|PWr>x($V*W7{%a;&^Ab4omZT`AezZ`20l@pzVt$!u6L1yEP4EI`bSoRh{ zv_8uK%F!y&41A8Eh>FGgR2GWdBmhg;j6x0DG~B4`yM&IRMW?EbBQGzhg&`L7 z+AVdo7j8uFqhJAsQI)Qp za@MLOTACndxdSXML`*_LFD#6JuG`I=;tUDN6=>5B7e7tYdi{g>ei!m)d0&OrrO`em zaZWMu_<7CQhUs?Y1`Y2(&D~ps9*O z!lP!fq5*m?W4wAS@>vOzhlCFlBc|x5>~SvB??BbO+{_Xk*ImoM39|HX-614$$YBP{ zQMXZR&(HA*j+o%a-=3Urt9rSvIJpL}yX%Q{&#NRk-RAozQ0`^nqUp44u6AkF1>R-w^0_4Rj>xZBg3B#_iO;X9@^K<)@FnZ576p28ky zl%NS$M&)!1d_*z+2}zQLKoq<|=g;4&pgUGGPvW4|@JAL-+GT57n~=DoD=IxkP&l)S z_Zy9(nd0tHl_=KzyL?U2vHqO6oW7{TTP2n^aVl1c1jqbvQ=vcV%X53Dl-zU=e<&Hf zpO*iHWWG!BB)lQauuCJdQ;AOh5q54OopcZ#27?)X*gZK@S;CF`TlXM!_#>u#B1%+^ z$W>D52!4K0dKE;+K-6r|=6S-SL#|({F&v5&QkX74;4;n@u6Yv!BW`pm(Bwvu!Ji1b zT;Y24728=K`k9Ez_$7yzK--2`{?T1aV84O#42&t5P6l7zg0bUlL?4=AA408Blm_CA8|WxkHTTe1cA1KVA38Uz9WbWO-+dod_h9g^DldKMcBG4NNm<@ zbTCDT2`We(*M*c|VE2T2R;3YQ?E#<#rKxNxy*B)*S#6+Tlk=LVKhm~t<5MqGa z_UShOKfy%^5OFS0(?_t$NqLN^4@NXpssiTpBbe~tViKXq)Zw+Iyr;*Y8}NZG+6y7% z!+?abyrw20g3Io6(2Ne$zScVga=>TryV56ZFoKtA)#ipnk4?88lb%frpg&XyEENzO zMqogaa&AySCJ&UdJd#x+3~y8gmJJMryhW~7%do?*avU5PNP`<{w4V!$Q~`R6n3&iUd`g8Iz?XneZrJsE@d6hlKv!AdG@Q@xs`|ToIBnTc zfRQFR8(Q8LWKY=>3xe_up;Mcp!=$rgfC;Dc^SA1%^B1i=hGejR=H1+ujudTaJ*s z0~+)&2%C(%58z_{gL?zY5@`7+)xG)l?VB?wei)${Gpp`0t8Q9>PfveXdx;YI7CW~t zYfKM4-X1X$3E`91wPmPP{cQ!>C2P4p67(`!22lI!di`p^)l^C6m8tUW`G5(C$LNqL zAP)hWF$Q?@xP=9Mpa+81xa^*>$4ADg3)Rl=UBN-NNc$FNB|Zv9y&%|s=Dj_1Pyv?^ z>%(q%S)$eSE`I7ZE5{rSTAF*zHJt`LKbcF|VHU-5YjFNP`Zg1zapke@r~!TBGn(nd2ObI*!! zrL&V9eed&U`~4{hSXmg9gl;8MslmvT;fMi-YTb1AaByS1*)Bak<5BAm_`7H3*Td|t zF9ctEUTFC~cES-N)>#XXyzZ`#QND=yySj+?pdX~~K6X(Lf8P{|qN%2dN>C zcA>#PXS$>i&t)jzD_mDhL{aLYea?|1xl8fVa?M@lkb%KcmfY zs??mp+fBu_eN2Grcr9ib(Yn|nV#wXPx{u87VplQ6#xDtXp#JFYM+Ub$LFvccpGD2g z&c=;V*sM7}{(rN+Vv?L(qRN)po2T!IrBy=4zObd{RA}Xu5bdzATf8)kQNyg zRSkfGxv_HKErWG|o4o5un3qAL$2YWnc5Lbh-Xa0)12EfF8fY=E4k(Aj46r=$lZ^7rbqowug)jD7)%X)c< z0*TNnpnd`HdV+>&D*yA%?jWBROYwNXy8N?Vt<`G)x22c(--KP`qMWxO;$i5*8*-Ni zpavFVWMIvJfe-*S=ZN29Fbn`w25F&%{`Xflp(v(MJa+p3{sWj~#9v3t{9k|l-v^)& zn1j#~^c?BG!D9Fnu_HUao)3}+At52f6%`7Al?ZzF7H%kQ?I_o(1e#Vjn$eWyvg~rc zT(tyNw&r!rx>4JiuMJiUwNw^WcL;H=G9t`q_rjW&TD<|dL^N~vw#j5MV`sfeprE1w zP0(e9id&xX=3=e%qr8tofB#7lULs&kBH*j*9XS6zB+?q-g#lE?vnC!!&UjQ_3;@)D zL13(TGk(zQhCQGn)hbE1S&^E17j}9hC@2V^(2oH~E)3K)3P}YtUBqqM72Ov2>^(be z!n^qQ2FM;&f=2Xn z%&+k6+h7TgrqRkR4~HM8&wB1AeD~`DT;pzSXfDa*@6}_@Y(mYT;N1Tb^As~P zGcse;5kv3&br`hss@3|i1ZF`8xd3Gh8juZoudTfD_Z;Q6LcUDaJSUvQ%&#m7bUgjf z`wn0J{p^3{1bGOGX)o-L;WP&_tWA)9eJ^?Y}mJUvefnaz&<+-h3U@g~-u2bc&q0?a`2U=%un zDS&w;1yE>tFhM3KCxK`>69R!*e+Mi**qU^k-!c!xK8ylP6(DZE*?QQ4z4?kr{%CxT zHGD1{4SQAMW}=J5#?_PvKC3@X6T8brM&5Q6CR$xc`tU155rPCw#au8YFr)09RM0YpiR>S-VlDbdVKe{ zNNOUwilpT0*B-E9j%N*=5WFEM5HLMM{(Ut=ro31W8SwYaby1Ti1Wo-amX^$bP>Tuy zxz`?WOKiILfP06%5@oIs-US|7Mqi&2fy)I?mYkZp@zFXP^TM(pMc@G3#_f^xW_CE! z5GpkbLfsZX)d+!LY7dar?e!DoLzTkTDV@=+HbVsp!C_$dpPWsiZLEo24)^%Io#_0}=!g#UfXBEM^}{bt=ukyKVwQTy55mWgGFd4@ck}GYB8l8!=er z>WL7Pg6W1Ki}RnO0%oom&i`lTg2MTL`oez}yALEuBix4p^AS-l284U_K|Mv&Zbhr} z&817X5NQ-DK#7V>PE8R2?iWy0%PlKYsrr$mCK&Tmow!h z9|th5o7MYUlTtsSp!kCFfm;Adg;JjXJmvv3hgVYI0F(!o&;<8ezJ|eoR$NxL1vzI3 zJ8g@^ZUXee0FdBx1S-1~4!~_1E19u%1Y*TB*HZU4r-uIi9fKzu8)h#`XWoJlw`-4L z4Ts*8yYOs--H{q{1l7EGt)4-2`Xr!fnokDsB_ISF2|^&x1%!pofn!$TfIvKeNwGIq zEqCk!fK)Lw65eZ3JRQ;iYB>SOS>H27i2ykZb03{99I^o)ZUw*{fDs;7TFDFiKRit7 z{N-L3Y6RRAaG4lDT??eHp#cL0P|3-}-3>qruDG~Z-FFY{C7XZ~iy+lj+t2fJf|3}3 z1?>gKg=}(68vkzc(`GiX-lm`zA7E(}#^K=PyC@Tq8{s|%Epg$xg&{5E z(18#ZqA~9j+yz1`dk{!vCnP0p>Surah*>%l3Fa~+Y+74d%9PHufQBG| zUZI85!4lk>A(A((J8!bqYxCqoNC|)c{yn$iBiR2#5L0Je1h>|DYyik0AMZD==KEY6 z1f8Lxq5=XJ5YY>^(*r&R!sHMzDzHh8sR}QnWc&yPhlb|VEI?O*#sL(NP6}1#@|i)5 zPj+G=A<%Y9Sn5O}2mvmGO&Ta0+du%V0nRfNu()-6ljiM1hb@BU?Lk4!V_v6~CkQ$o zc)_4 z12up?1a`Lyi%{NRTzfOydN`38u!#}X&Wq>4ATIM{T}6f^d=@Eis}mHq!RuAc%^8IO z;}&Rjr#$|svIIaKO_P(F-7bzD|Ez{LlarI18bkx`o_LH&TsQ+PpdqLWy%=JQbAI*e zmBU{p%pO&PSfAGA(Vtse&H#pMPvs{TL?;^&xhYb?h06M$cZ5P<%!c;Ao9q7>u-ge~ zmI0ebz~L!i#fMsE0K3cJ5=98qIRPR=HstCNZ)=N0;~xKK_CFISbvz@vn*j(sUHx3v IIVCg!0C4l8kN^Mx literal 0 HcmV?d00001 diff --git a/ex_figs/quickcheck_3.png b/ex_figs/quickcheck_3.png new file mode 100644 index 0000000000000000000000000000000000000000..fcb05d314346d4e56f27aa56b3d857418a4835dc GIT binary patch literal 41804 zcmd?Qha;AK^gn(fduJwF*|V%9lv$KyW>aLZ5HjwfY?8=hW@Ya!s}Lb7E1B6VD|>#= zrO)U4{rwNWo~Ply@9VnW@AE$AbzbLn&K-RBjxs3`0}+BCq&HO*?jZ;^20^eU2=L$) zvF{_(@Yk8gH*RVXz%Orthe7Z&p_9t}#|T1hhW^INmd&z;7q7S~>bYt5%7_}c~OP@Kb#vAE7yjrH*v5dmGBwnh_fnj}TuL_LA5 z&?SY3K?LO(5?mevBK8}U{H3)|%Erf8E^^I1bd4jnW9 zJN&W{mbUqTD8MhRGI}I9C@6@59#0P5NW*)H^$LE>erASTKtE{u|F^euwvcLCf&N^w6!{5;8?I;+D{{u zM8+FKjl*%%r?AF@AC^!Op?)duHZ^<=1#DBqStcZ9Y9R?S7f% z+S;1jc!LiX@-<$pIa*!j{>%(_xN(v4!-p4Q1&#El4hkJq-wM9CmHDYZPdC#79;)c- z>YCG2P|%1!Q9Ht+6qX&c(Z@BgGtlpOW99f@wT)C_QLYtzZUeEWQYdePFD^)Ue zPcnx}Er`dw|9)i{&C(5DDLBKYliviN#iXUNFArCd6VviHjnzMOo@^o9-P?ZDtcDFg2p;Ex0(zHKU+hZn1*TyczfeQKH{)hMIHM6vbaF^xfF^?%yyIx~wleE-zXk_WKm554gwLPA2V2eK~H zDi^r8riL9GTW31oQZh5ItR>R$>A;oP;pZhj=W6E_JN>?4B^rOP$|xycjO6m8Zmq*0 zNzaW7&;9*HEIO!L_qVTCb5hgLd~9pOr{Yv=A{MH{z^xt(i=)$VA1Uadi9?v*$FWbpjZ$)t_7@=>6@48{qDm zS$BdWBXyT|0$F@9;EpH9JF?OT%Vk#I({Ri61pu1z4#flwDoI9b-TbSnByQZe;RN0{ z(e>ZuA5-VW?a*t2skkyMOnmn)LY%BL95}RAO1aLn;#zo$9nGcqMMN-Nh4s_bdu@H{ zPP$6d^TBJ2ms34CEGUS;szN{D4A##NPm0XzR~t{5tH!+eu(7cxmX<=F;ozG? z6ot4Md#@`(z$adLj5qZ;T;9}_HD1Cof-Bhr+tbrC#I3BfRM>Ol;Te4VW`I^_XXo9e zqDJ0L8Q=Ydte}vPPfgEoI?Efj0+noRa+@DK-C1cJDtjd2JgFePKcB9muKwh{rXCIW z#%xbYBpH*GTwXsrKR-z{=k0rre;j=>9&XVY8*^~Py?Weuv~94tJVYfUBQrlza~Yngy(m-b zwlur8XFc9f-~7maYq9@sj+W%pZGL3#c20M`K^589*jSvDTdr2L-N_Cbsc;x*b))T5l;8nvuE@& z-jc6hzouT&>!IYjlh_2|^F3RGLy`VyI$Hf(q0v=1ZP%p%(xt(oC%+DL8Ekv&wvJ%$ z*Av9;Go~K&zQ2tc#oNc{d%80D@~?_cv-7ftYdCo1%p`t(et=$P;3s5r>Uo=+u3;>) z#!@Mxo^wemH*ad>YYq%XB_)OX9W9wy_2<%vGLQl;ua7+LNzKmA?wh-lBEyuLno4m) zrIXq3&>1|5|9+{0zWd&W1^oMPPWB{c%~$q#_rZc=KSUUysYGUM&lrS6(DBg$ho~s^ z@$oUbxG}#IpR6`Ji#J<81$lWtg`KbA`fk4FL=DaX}wTyQtia1Uuj1Odm3@+6p-}V%q|~0+8K`oAdoK_Hy*A&QYz@Ul)`uRY1oP<@2DP`V zd@r?7EwLNA6crc8je=ZizfCJgq%?Z~#f_}q;6KAv#K`1#WfmHIa%O7u_m?kUZV<5f zo*X@)q^7mj!L^B_DY9Q z-b=D5kP_=y{nQ_i^z7FJ3A&S`RlioqaIFAnEz3h?trFwj90h+NH|xUL zia3l&mRR?4mRNMsikP<$r0V3g!jWcf9m8eB2nv6baCx4!IQEY0AMjS2&)6IMAU6c-2do*uYdXk;)%wOeCbM>cAkzoScjdB?K-d@MFO*{ zbKty3lOI+as7?;XPZWU3C<2F=2LQ~mc(v4<6$t>G13#wgm6*RkDFr}fHQ@IoBxb5> zPq(RIlT_nAJ4y4SAF9^2wrFy4a(?`MvNFOWeeIeeWWd(_t))}P1~BB8mX>b$9DY7? zE5A+Q;lm6_{UPW*^?(5bH7ct9%6>kIPl0hDj|UnxCHyBPoG)zw|cNFC0Fui|Y)!$AKCqyPsX>f(ZsQ&KmZV(`_YG-~*Pvh}^p5Z}-Zua7hlZ%*m?5~Q5(>eT5QF9`BQ-bDzXx%pc;C}s zjg&op6|n zOCmNqET@U4XE9zz7jc{V`m|ENTeTQH8O_v7PE6#ltE+37{t}&O@%{UE^S*42A4dSz z>z{~)xEtp4xlESczTA>$prm9YA|lF|d!La(fJ_2Eqqsp}R-zZ6UqCC=u$$I77!@7O z0okCn+Ifn)Gh8XX`uEH8)T_~3)5n16JOfG6o}~mNByUz6N2)at3P4`X{GG+TqS&j5 zeq$Y^QTf2OWqEyl{dm1Pj}p}N0QH{i>-+flv=&s2<^Fsb7#LXXv7Xjki6wb>Udq+# zzfRx7U1!*NyqIc(g z2YB(`Ev87@#lHgy!(+Lmt))GRBXbLamO7>NH8${CYVd|?;48E!efXJ`hx{xuk!|&Q z)SXBP=wS}0aU*E5#|mzS1`Lz0Vaw;wA+Z7mw{}*mr^tShHiUAu&fT;nnY1=)32>7<(D(O1^YVykVZ3XQG zAm_sLBq6ib2+Ec4Na){LMbJyRa&P|`#s?_q%~GEN(2A3EW|w-h5(b$Ikux`EhYAA& zan!nP{@?+Pk1fzWlxybPH_E)n23$48XZsv2zaA#V?|3M6(FZ&NAhES2n2KigJVL@E z!zg>SNd-2XyaE8CF#_&=)#dSH2r?YxXW1iigd49-@`DNOTOkuX zWiej=R05G?#D@S;03S^fSJm!=L>394`T%IH;NVcL8(+M*eTZ4}OP=EapZ(42kVoZN zI)U1xL&mvNWpVWM&7%u`o;xg5RQ|JH6I6GO4iCRo+8ZqFx?6mEcl~Z&{|~D_U>_1D zDeC5zq|I=Wro!}2vt8#hGBQ}j#Vh+<=%}epH|M|qtaj!APWj^LN=;T*(a>?6<9Nfh zwDk1Mo_46fBEZ?Feh0F=Y&Cb5d~yVc2n@ZS&DaN8Lf(f!P z0m@Pr76>c!!}tEqO#$=!@o4ZrDy}=M0s^G)R{*4oSP7dx0?5oL`C{s(+@2eZ=2C}4 z!`T^oT4e4C8IzKVO7s23uDNeggVqzgxw(0%aSRTyrL~pq z!UYAX*<_DNLi+H}E>AvOO86R5N__va$NdO<+lXRWyn@Rz?e?R!G+=dkC}Wt$gxCV) zce1OiC0oUThmawe$$7}c`viyQ6xcf>kI4s$Od^4`znL=P`EU446CgbH&6_tR-IwVB z9>d0c4@j#8xa;?y{ydTB8`pRN?{QcJo=r>b4H z{m7*K$x87J0bfaTp86ao?Rn|Pj~@tio?XEfo^(9p#d-MoHmO!+u0 zVfWww3T=~;WH+1WjbevhDc*r(u?!{|Z~6oYN2QbFqZ0F0BJbVbfn-atAx2+cUm&fD z?(X8HWo491OyRIn@8i9>5Gta(-akSkB6JRyV7>o!8YfS^D~0D%`@c!7asR_`Pg9fW zxZ9?J;r{*mGy7qBC(ITLRa)PV-p=g0e}+n5V$_xDEEN@7Kof+K^RRuDS^FzCZqNV$ ziJ%N3h`IFwKsZ)^1&nP9bYOnGQFecGL34JHSWVGVcH?n||LO?aNhEoUeb%%e!`yz0R4OIcQcuv?NW)u z5sCEfg#Vq!Hq7K|7{G#D(IAJ22xUo02?4l%mBZ+(xVRvwPkzJGLsr8|SE|YfmzL|D zXGUrs(~DYBfSrmTemTo1>4bpucmU(Udb2u$g$U@E-+eRsi>oHTtupVyH=rmYYu$F@ z91v`1>hx&z+F2oOT0i3bbG76`)u{XVhU0x(YHI2U2ze#&7bpmx6*V5$cCCi$Ok(o!{mU-+ zL2)f={X?O4ywT4O$|!ViZILW(_En>J@2V}G7Jg(x5}+tF`k?bZp6C$@eHT?@O~1Up zSnc#18(4b)Fw%4X{rBI`a_eZVSkOjl7{BO^EeJ43ID`TJ11IOZ0j11RQc0}P<;%Sb zQvi4fKPoWLTgTm=9?6Tl@9Ll$+YWIw)QlkYH6(w%O5cIZe(&4LeNVScI4 zirT?d!a{IUlib^3K9GxTCx9j$eXA@bmlFWmJV>_i?lstnOH>m*eY7&)uy`-TYHxZ#rIV=+asi$shz(CSz0&!m* z6Tr=dwL`^%1tk=)zmUiG@9*&W*>C4}c1Yd+0XoiIEv*1x*?icUKDLJU|NJbG*XXr7 z`e-olz`&QJ!^HnimgXU5eRER*LPFTI34ubd&HDbWgAyZSDE}o;*>B!e>JSP%kEMbt zCnzr_!Rj&bW=;0krZ*=iCyVWe#mnu7D_e?x^J*I?|7E=DPBQx8iO}CKHMN_o<17r} zczHSnpFm~FoSExMX@iPY*zD68>;FJ>gz6^{q_m06h29_8GX{?JG_;|(ptHTm%Ho2G z$Z)}>!ls`HM}}>H zZ9o}me!=!8RI0@u>*isMSI(k(InWI(1l1Mb&mF&$!0c@9*2fzg8^Z3(dMIvbGav;O zsslNe`DBce7|51<@o9Xv)9q1lvVJnYhwI8YmZQdohK%BlV1b!1@TBl>x=hYugBn3w`^L&OVJ zFhb7*K$MxeQYrQD@bHok=Lff?m*2kKMJaroxP3hJ3Xq=?&%c%=jFNZPTbI-TD=E*O zeeSk2@OvREB{nwY>Ju{M2*#Z6JX#=SJ5Pm6m>=zL!FC%q>&?XzO1!pg;A{tew+%ti z<`B$-X7L$H#g1z%_FxB@{do>iP_$YozQ4bH9_qc%3lHFO|Hw%4G0!=shZYt$V{pI( zdZiBuLArCOTUSzS=UMVCFl2gURO`y~H&SNr%#n^|*?lRxx4Mzc*9=#(_}jMMUH3<* zBPXl6xk#w-6%PhdAm5^(%_QZTU0UN)p)U}o#L;D0)_Fy$zP?@rm;q>rsX%okeGfg3 z$T0wNQV_S_ppHOAQmD^4T&0)<4XZKWre=Uo&}Ng9lS4qP3x?2m0OAS(A>nn9g}>!$ z^YKGwJUZBE!(id8^=Yc70!M4_?2MibDBEt!B?Pq*?INv%{(?%_#zV^1hs)fhHIoN{my)A zjOxWu+7>_RVIBcBXnwGWh4W5g5VXA>{46$$6M29K#1rUau;1|>K6Gk6;gCzev$Em+ z0)Pda+vw%(N4YL1I`A$RXc*6VG~aW-_g;iD1Z+|aV&$n^w*dsF?qKD^k3SnIA%#;F zw&~*zV-O<(GF2cif&Y|F$hZY+XS7ua!C~5+cm;GawEu!2(5q`IwdfSK?4lp>J@`W} z>nn|R7eKE}lyW0NRe()J@kiZ^sMsZCVkOFe(y!E$q|0cd0iDR8PwD<4*6Kr0}|#l_v-or;8-NH5|^zN^Kx%BMiWuddcn0Z;}~U<1=>fd=f$ z)=;|Uo*s2)63AIBpxIt_sF|Tv1EC(2YGp9b@&1yK_s-}OXf+6BSTA(s8dNzzWeyf* z138aKU0ogRkideJEG;=934}q7&3gGV6^r*8R=K`?uupAgMn_=Go9H}i)8)*}%-~1K zo=WAQZ)Nw}0JgCM3lgyGNiLulcmsktia8+g;UZ{PXrTm_8VVI1ROR{vfFn2LoE^qrDb7jiCQp^<_s@FBN`h0Zy?{JO-SR z9|>|8t>uX|Q2yF*xX~FhGZ3ZUBXf}7Tsh5=wek0FE0wx58+tWV)&WIDMd&Gb0L(Hf zF+#IQHH=>526VTG!<9Io%hv*-O_LsH7_Q|e?sbWSL!RO03~&aeFnTs9UGAq^n@&zm zz0fbv3A;u0X^Y@KTY#j?bPy`~g!)L zHa4EF9QtmVJRJGMAPRjjYAB40qYsRlvd|bh8jf_Ri1_qaT;B=OB{l{Sz`s1dxcDxr zG-w3fcw^5%&lZyPfDn{9D1oCa>72xfJfQV>y7JrcqLk3(nuKmX4N%D^+)JPhwnFC& zJs{XGE!q!&Jc6ozAzQw?zww=+J4FaagLoD+_FA}D^6<;+OX1kYs8kA#v~M;QX@K>U za0{9g?~UN*#|0b{Go1kxXm(@gT*G2JQkT1-Mhw{G)?YMOi?87hIkd&i2Z&S)6aPUYdkQO)srew|7qmJ=B$O?B+=LU58Ui0|TdcUqoOpcw)xcA@i&HIr-K@>vu zo3DS?d6prpWX#gs1-e5TO%R&I#Kf&Ysxo_~1gb{Z$yj9gQPEyis6ON8EFg3%5PL;n zWvC1Z)gmpVj7jv)4YCDuDZtjblWl!iX2#tV6%{8z5urp6h&SdI?=aen0n;b~ctH;g zZBc|VU(;Vd)_QY`;6v^0G$~G5Sr+Ju#7Q_ZTXw~147XaZp`9q`E6_lDR^adS2_%j3JuNsGg(~#wP(5GI5V090fW+i36hD)p(O znI-9^%a=i-jPyO)H=F5*0pGla`5~*z8`EKd@0EMSZR_BYurLM62mz4-p!z|(=HNPd z#irMDaQ<6l7sq<5VQq2;iXB&}`@-5?3%yyYq%J6#h2Dvt{6*dr(CV%v{rF@99T4ai zJSC0I-tOCKyjX*_d~ZdcyV_gwKp6;gEuN~vwyTs;4#z~6+JBQS9p`ed6wTfihdc*SDdwt@F|rnYK`Yj=|zw zJ^x?Zds`1kSwT40z?t$5Upt*@D)`^ArERSIGoYDuz7^tNWyNsN!>rRH0oxg%+cq<| zF0Z70#FqmOjXDNG!Ng=#xI|&x;C&8!r(atXASqV~GP_-||2GD2#I7%!5(toS>Kxvg zGYGc~CU^1qH*|!m9_TIQxpO=|krE1|GKkL0iXjPM;aBJ&Rd$4R`)JjoYx54+LD*~4 zDj+aWKJ%U#$T(z6-Wy-Ipq(NH(Dwbq6GtGIChHy!4uX(7!qIdIvZ0u^E36_-Uo#8G z^!jxyw0H)g`sczFRD#V=B#AuTwnH5UBDM(%qNvxep$`mGCXCm-sA2Y{75b&G5)uqF zTe;B9&}><ieue6%AS%| zpl=NStL_x|5V*Lw{5+!un^vBloduaU`?{as-~vO_YiN$34~0@flb=Q0jvj_1xDYvL z8dA?gO~Zn69T+`vuK9{fOv?)L@O~R*6KXuV{$V68Ihm|%C=AF<>jwN`SnoxHtN|Xf zN=Y$X^V*aHl20pS90pRvdkRlB%Ks4>ix?-Bo3*;m9>g=)j}J9Qf%Q zPXj;|{esF{oyzEAdKc|UO@UiOEzHxueLwU4d#OI6v*0vh7q-L^8&QVMmC^IiA_|#- zKY&G=2*iY-b8y@J3!D=W5ipsfy3>;)8wr90$At@LG&D5g#B6ClN3ulD&s&W^Pk$Gn z^wsOv&w(bD-a817j8yjamhQ5AY+-_p`et}uZ*LQH2FdBAJ&Kz*V5r3$LJ!o`cH7cftLlV;C#Ai*doIDGzeMn$1N?DWRM$CMgw z4`4LV<#Il9A#L$9qM|VEZb$gd+>GHuT5cPnhsB7irqKVzykt~7PM}ct0(&kiUK7y- zw<&w>osH|PUK}{Al-k$OVMB=ruf+}))n@hi2`T$^PapNZ-IUlT9}8~Wgx=UPB&-s# zD!>vg6@rV{PA}ph$9SdV^7%SrO`hzlz}Zt4iDlg zvtyvI|DuDFt^M~6^bq_XQNe1{;|PAWFvzjzdPyfxP}ZE=oAGlimBHA+s&d)?z2r%1 z+AzjFYDpzbsZEo7I|!azM{cUraJ}{`dLi$>Pu<65{jst6!`@REj6?E2oAUbaMJ0or zfACv>jd(e-=)TsZrcO+R?i*{M-6izmNef zG6b=y{o?ef+7FP~Vm!DB7FawgAxZ+PvzgTv3C_+HM$W?eLqivgB?$t8a*0mgcNQB> z8T^@YQ|n`s0!@L&X1modL^RzDy30OZ}*`)TY`@si7!>Zk$rr2l-G*;G*{#wI27W z{mq@D7Jj|lzrv3u5^ZX9cjuk8ap)P|x#^)UPSc9Iqf%lx_IAHVnl+}X*`@x=bL@J; zzmRlXXXGZ~T=95PIas0z>)#8@^!9pj!wkHN--oCuY$2z& zFWJW0d~;Eb1wMx>HGnXv(I(cuHqcZAO9i0k7p`ZET${$a`-f1rfbPbjXowprC$ZY> zYpw`Rb{BL=7d|nEwfQP3$w$-}!L*yQwAOk><8taYzYGq#)dU{2zP^r*cR`E8<^wl` zxn3KlJpnT;&XbJI$=;${QaJYT*)uVWf)*CoUD$0=3&m*AdQZ{eQCBJFL=Q{w=i+X& zIG?8g<$0ew5WSHzU0^j{*`i;92qoo^iVt#4i{3?^{^bY-=e2>GR7yr+wSICwcdx(0VIqdGX0>B(_zP?ziD-~@5|LKQJj;|G%~2gv3>uFPTfX6MUPFQXCC8J| zLQO03G@L0P6Aog>E-^CC7xSBlb7y2+zfd@GGIYJ}!<2em-Fkre9=YFpw(TPy51an) zE{@SbBUGnuOL@_dSWl3*Wzg(?>X?qa?menfKDx`H2y)P!!51qe>%@!{A89P1}; z6HyPNQ zE4YHNUK%B8{+XK?b4Bqpk>lH-5q!8>ElG+7)bJbk?%lJedl9sF&KB3e zb020N>k(woWrj*lx9)v+1EHj{IIkY_rQqz9e}_vGY+s3FN5|4=n|IJ2aGLH_{`gC} zokyj?)N<-L8G`OJRv+z|r%91^{^A#9?DBiq?-U2RmfU=hFBOJN0TjaV_K{!aAV1c2 z{zJ)#j$Z#FdriMU3&qbH;UfaCaIXrflY7UwQVQAalJ<~T-;^21NM1V=oq?GzV`q2Z@KL8=paQfA)qI7W-8R5?v=Lb`O&<~|CzlOBkY{Rnx2^m zTErK}q5_;WpljN=*py9Sa_A~3Uz<`S!It+eyLz=?yJ_|9vdqTJuU$2*fUGkQKH{`*lU$ykfpsC~oRONsivKCPX>Hy*zl}wK2xic!R1t4-dRLwZA##%} zIX_&M2+Xe}ec6(W+=vk5WZyYGhnF1&IP^0YkYv>|+$Qh(o6a{S_#RQ+Ie&`?gIa{{ z)FO914-h5114;g?j=DHjm#*S9J$H#%v|y1-+kf`&p~blLY`zSE7JiT37~|+=3z@uw zl%R3+FU0cyBC%D3dNb36x-M`0<<#7yS*GTq$Gf^_UQ*0}q zxE9E;{MD@j3p(zt=kDzvo$S1h#E#Sq92r2J-x@ z>ZR9;ev(=IQuaI`IUDu#pDV`XV4Vr^f1vkns!z~UDm{%*kKoz!rK6L>a^pD4VdloK z=U-A9#m7`1c{Aa#YWVzHw4E+?j^x1-HjbR-v+E)R3XY{J6`sDoCi9-3@yer|P8W0@ zu0hv?eY&PxIpX3AU0rh%wm4OTsT8R+M*poU zH;qv8)0d2IevhbmQRlq(s@J0f`(5-Lc1Qg60qnE=J}7*yOnYJHw{x53A&z44yL<^Q zf4H6ripFIi*|E4yYf6;}x6t!}P8{F($@i0nNrc3eN6!QrjSHR51!udT0(WXhAA;S% zI;YmqeI~t^({%Xn^zAP;>f~oU=*TJdwg)@)Dp!T1%Vza1s@_zn__rGZtp;*dBmjTt z`AB&q=s5Tm1HtRWc$1437t((HOCAAzXAEopNK4dl9Ev7e z4fe4RuPn~4c_epp8{yqC#^KJ|)=HI+lSL*fyUp$)U4rJ|akbt5z(Blmzp>+Dll$$X z^{G(#cUv0)k)L~iuS+Kp#x7ZFj^R@vI|&M*A#K^by*sLP44 zD$^q2mZ8mmgWHeL4fkt?d)wn&uj^<2cFw-HYI;HK?o*mTwQwWM(qJXwcD=A(Eepdt zT-F5QyR--#6rNm~R8WdO0)m1Sj7B8G$i^>0B1$nQ?fx@%j$2bcl(g9~c<$2+VU^aV zhN4)*W!vOxJ%g&9uBE=u6||lRBiLv|?pv06$}d4Y$|zBSjF7)9UW&L=b8jE4JB@QA ztzwuNQp~eKTV&>~Ki{QG$k~2HD3W1pi54#UoUM_LvJ-{ZxrJEer2~^Q3*mRTo(t~P zMAW~8kX?K)#f0oYV*#W%bod97AdDor=*_{GUamt8{QJ+Ja2R2E07C&GZ7EMzuJG#> zzk45|rl6I^TYc6rINqr7&YFct%F zx{3$f@w>LpV!DC-vUDlLe404fm*du3KN453ho7;2sxW=LxXC>c#i48tqNE@T%yRCm z!}oYlp&kc8#~7gK#+NzT)P%Jo(7E{pa~UPRM_$l^ZU%J=lo^7H`v{4fcSSr@8ld>@MqiSb2Vzzg&5Q7)DX)@~qu46SB7PnaZPBotmE( z%YO*lv0`fTvcfd;qARKCO%gnu2x%ueY&0 zId`1DgpZ4R^906e^0ve%D76FI@XB@i_G8=_-kQpWH(z?>gS7b_UrrlW)9RuF;LjYx zYl0d2xUZJbIOd5BeDUHpOfk{?ng!u6ATg0~VI0KyytHfug|j(?)$uk6R+IYDiv!2f zBaDD8sp(%`DsggCII9tEdpbRHCr$06z?x zGQ-SV_1mf@kgV2 z{Wp7_+hQ2H%fy+(#>~9av_m!k62uMCZ=Qc`6MFD>y|eo7=b#R{#GqUS2mt83ew_GN zNgUe){S`zQIu!YaHD{qW!UA(?YtUjuXRhXdRq(;!{Yw~m)Uj!Vp$$~Lg~{L0ckkZa zvJVYX>2?>GSd73lwfGhYmzd;IodCr{)wI3ZFRje|K?At$|lpwKE0u!?Bzv!&^^*> zv){*K_&SDF&mn*2_CF-r69*`j;aaMG-L$@twt;?U5vWpp(V2Vq#};2ViWg3|#|PY| zrT+@aWF-jA@T4`?$Nnjm)5-GIo|T;z3|-htz;d9!)ars6oU)2pq+11DV! zN0<2=mKnsG5mod6>EbT#M_`F~(fmmK*Y@_osXPuk4ek#Q87cegq^^Nhq2a?Z;=@e; zpQ(vkaOC-~wAA!=x@wo=_~RO8=_4{9PU+neryc1ps0~$75Jbg*_VAw@DF<9lpM8Iw zWs!*I4~dt7LiPdA5Q4egCa7%-mVSs<;TlZ}T1IyMayC_r79#nodjD=d9|Qayz@_xz z*VfZ-j`CsBF~LEYVW}8Kgw*mGc6|ZfU1p#^t&YCUg|dZaM)z)2yon4lJXMT1+{~zp zV*&$-lH&2Pk(K##D{H&)%{(eGw@C`Bh%vH<|GiW`StU5G|9z%Aiw=skptfTnLIb}K zhV{5y>}1$%>@HkxIfV2`gLgi;`=Kw5^Jv1iFo!pD3=T^N3%acCG9csp8JePo=CR5Rqzr+mu0IDOZXqY_D=cC zIC+pu=>G8_;&jZP@IOOoEp30eo&(6NC+qD*p@=AGl^f=VA;LbIoT6ts@OO)$Mp6dG#??{w@*y zz-b&kM7yg-`%*S7z6I5M@Lq!bPpiDx_&m*#w}S-nU9{kz{jC4WMdIsj^Ezzv zH2o?i{LK!Na}Zj!_M*hXSZDhgPd{~UFaa~8bEko@Bq=^ExDgL|Wle|7h+!;4U(L!v z__aaVqR`L&%^>{bPS{6(2}LaA(uR@A{#UCK%mcaq-CvHXE;bJz%k6bB{AIY`G|r8n z?EObz#VCq}C5 zX}9v4b+MgWb<^|Li8nN+^b0Tbo%`BxofU$9hyNM@@-2Zs$gfzp;%#&`n;EGot`oZz zd#+$5=fr+9Tw_bHp>Z|;K`A%avnZ2u(>J`XyU~PafBZ+Lw@O$HgHn{7Wxox3B3dI7 z!+0+|`mz@9vP*nK>$;a(TilxpFL$})%aaC>cmayiP1`rOiKjID#)#OfPCufwIMpv) zZk$X&z7e&8FGY&ELAkG1YHgNpCTZe$BYAnlSwYJgrLhhfGTk@?h`{-STUAMTa-kTF zfo0p$JSx<_J~9h1 zeq0)AXrdADzmi5@Tkt!qcRq2At{;>YIL;jK;=iO9z{V>JmBBFc&RbOYZaKdr(YXCD z`nD!#0O#-V3YzTL82~A?7pZ1kpr{Tu}C}J|N>y;Wb)C0h8ozT11=oXFsCv zBvMAOlZyy>Wn>j_-~HOeruU4YU*~pGw8K*Pte2G5%k0^I zqJ>!kLGkr@=I`wb8-0dJlgIzr?xBh9(-$O|H~6%a6fM`gTrAHm)j3O0e(P)R2k(fF z51+NJgx9_?3V<}sV?!C-E_NEQTH}Rh?!ATrfGIXJh635TQ;0h&)fw@>vjEZ$zmvz6 zY)cMsW@_c~zby-0`&HFAwHo&PmVIFR=>LSI*Tq+_1MsYvVqZ8 zO0PF^=&euTsu_Q=Zpili=L!R+y_gM_cqu~AM+%kq7H?bX8Ft|oZ0C<$eX?;+5ScL4 zA1%V_>OEI7>qG?D5NK^veHy+x6?V(IgH8=b#8nBMf+ z`Bjy*_CtbeZI@0Na^f^f;e$q0II7wO?3KV>&+wIji}#x<6tOEpt)iy%vX4xzewP6+}HM4Qr7S11lsw_ zjkFbHUW64X?a^?Lzpnz=jR-%XL@2(uD?4ACT-^M#h3tGueoX&Oro5}?swWS=qn{rG zBK%&tx))32Mv#|Zk~Q!7W9e*6j%L;*kUEuHlcHD(S``dGU&t};TGvl-+4_=hYhKNd zF1pO~NiW{+O< z*0aV@kFUJ=d{22URw65N*uJN;d7H+ugrKf=;ljS)&x`de!|KMZ3wbp&!-W0k|7l&7 zsvpJr2=t_dOs^}k!55I5JiS2c*uAF{c@<1th)FIwt_lOb4@D=pn#{B z5+iS7x@OQ``M}vKqKX{DB{XB&Jv)wtJaDwb3XjI(?j~USHno+6U}TM1uvoZ`NcnCU zOGOW{Z{?lAEVFYQKbZ%th|i$jv>v5bFO(ue;Ojttv`hVR4fJqO@qd|1(jh1we@~@~ zo(qrrT7<$p;&dwVx+$6X)l zp}?3=1h=xk+HDhC@J+owK`%U2XS{iD(CJHYP6?J`$#6v ze*P4Peo)L|l|NB0A`opD+dO}6Idt~=SV-H(CWMQ?HOUy3s=A(+3NiGMxGj>$iIiQ(<^&{pGQ+cb$9Ow=_$ z*dQ!FrC8L%ZTk|HV#yOw@L~%P^jJ z0U#oCZfU)@W^;1EL9JU^31@pX>b?dJH+e*#6QH=*U8spW(2=WTJQBr zwGCy3FU@uZ*+=fI{&`l=d+>H&8qO3l&OxW3Fb~eV%ofC5aiBag527K>hsrkfV;Iht z1(mP8Q*G0(Ae6+FPYR1O9<|*(ClE4MV_twO@%ZC&GCAQAT<#J|~WKl?Vt?p)sWu%dE*_t)ddBn4S|jm@w?UHzKoVapLMCO~6L;b6J;p z{4HbcGD^Pluci3-nf0Vtd$=DJkqyU4P|?NQ#mRY2R3pVP8OzudM84C!oDqCwYJcgf zlem#anU(CtE}B4u+p)XS^y_cx-~9eV@ouC&yFD+ijZNRjDkEBvTBlGr`3T{t5L3p8TO$ zi2W6Dtl>{~ccL^z7AkxiZ|?o!eJf0-lBnx_uxm3>cD+fLDsY!4kX`7k5H2 zj8fSoLkp~F+g}TrfOh)t7e#)}Pp_V6_4Uy(bfG0#$%*QXPdT<^J<*ZN0xKULJzc(4 zX`eD?oa}zPsRQx2U+M5{EQ27_>%*@u?|Q=jaPbafVjYL&_V3FHeOL{lXc{`hDqFF8 z%rcIh5_zUn6Z4#8qK*6+4hy0<)bw$8(ye=i`}kyYVDiMzn26q@cWT~#(B*wD=#G6& zF;5tXa?jaeyB8XSvMy{po=m^9&rWHU(tW5|ueX=Y#AP7Jn~8UK%q||BVD?96?|;-B z9-0>Ep=CrQWPXi}KSFFv8e~1SuMo7Deojkz^lDd`m-%EesG0nd;_W1(6Io9xx2W$V zK46DA3m-P(Xk;r_%vfi*CJyMZlI#>J*;tCJ2^N`+<(@4|L4Dr;9C)P7 zM557Hh)rYxIV;t>%OLiJABo z`h|Fgf~q&Rw!)Knj!$IeLr!`=;HmvQllaTf`{Wnl@SnQQjwIR)tQV!%AMMwjIIh+; zXYj6W`=wY9|NKYrdApOJ1uM4ByW)G8Cu$(4VvAF<1A&5W?#><>0PRvBH|Y=Zsn%)vV4q!??zb>|4-{dY$k!1 zsRhY|I&V~H$ziFeB+KKxnr2A1xwZbIAnSJ>;gLvn>_er-`nakwVB#E!)K}5 zv&9?h;-1G&{?|=|ueg6TzONTDd8@aJ?43a|akxY3Pp{7xq;ThoN4eOC`O$dgUnPpnxB98%ttzs_MQdg{TU*; z8yktNOilUE?M=^4ZEE;94~6M>T}dTMx4mWmcB!}1P-Nui%G-^7q-nYEv>bP1(yx;^N;W8MTRDMwrEE+}qRV;{$T1<>rJw^eXEnY>yV% zND4CU5U;YlQpo*@T+OyFy-OU#-p7huWx%>!VLb3#R1$L=!&^80e;9kqsHncMefZ1} zLr8ZsgbWQzgLEn>Ej56FNH-`jNQ1P2fV6ZfokJ_qE#2KM`9J)g=UMO9_XBJ30nRyl z?|bjNuIt)7&d8BN@F~}H-9n{F^nLUf`D!vx z-Xt8LW+7Z-!Uw@ZiT#mk@n_$_c39KcBg_u*R$$&)VxW-uw77g$AjT*=zm31RjT%bn zdLsR1J9heaU=+#0ieX?phK(CT2Ytud+{5K*cdHcux+S-X&fpJjuFB%XWz^ig3^UD{5N$J$jN-iUG$>LYzup{|L`#*!QYaH*)jBW44h)` zSqoXA11s9Ck1)>|?GvPzT)3QZ(Sd`Q$a)K|S>j79+ob!!wnRczPxkzuGLe+|>6i2g zQ+gOt)Klkka<_+Lbhq~7n`TTk>6}M|QS{n3p{5hG+*XU|Wt+=pvm{w#e2Z6KC<%Jo zeYr}l(U3rK20&wDr9$EJ$FA9brR6NZ-`Z*Je%EeP0@Cz2l3hi@t9R zL_{+IBmU&x)9;vv9y%T1ql2Do6UMk<;Gy}-K>dV~(NU1}OYn7;LcWfG0e#GhCz}AQ zjSs%KKl*uTB+ge`XS>sI^_^!(dzm~?@Tv*pHh*qK)y<`r>sKb`8DLw~eV^anY z$FG$UAumYqrQRoD=LXMp5j!oC*Q9tY*6FtP3v0Ad8pM!CIoIELMuJ=6v?CA89x@BB9!LkyT!QwBt@Og*WUq#x={ zZ(7cT2XbZZE|nX|L{}%9HGgfp3}VuvnWl^3{~ExrNf$I{?ni3g@IrIh=-tcQ^2~`q zy0vIU_t%h{xehWxCF0oJd!<6WduLUD8m|dyLUM8Q{0#Yte=^+f8%uO4d$dZIhbs|FL(iQ+$1^odE+rnBmy@3X(#l&-mu~ z4MP~u(nhZBNlXv`?r&EcASW$PiJ6U;oTk9i-EF+60?n>*v~KbYX)L&fkEx?;5EiLP z8>XPakZ4LbGfGCoakA#xnt@o~H(6`-P9Z!WVNM~$0-ah{<+mKd%Lc`75mrPuN4>)f z&Jx+3L%@F2LzTrsvTL-Ypdp6WW7gXDjiJyk57sN!oLjbe^xL=>xx5gATcKmGa~gCF ze!!X|MDyZ-z^*I$JTidSA0x+q{_ z3|1)iX+|5f7HFR>xoZ4Fv2f^}wYs$-WVm%+cj!M1O)nB)IR`D|x*Pu>e1J?lw_+eT zUxBOKB|nP#p;JFQS-DNOY!9st#al~Axtbfm=OqNsEp<^`w;C-(!8Gt!u>FqTsrK|X zQRJE6($7G8>NqQU()a*ub=>fV%*XRlmnhdgY^e_P=@6ujzRmQdT zo5nqaU&i#yjAfo^o7t-`l`|p+Wx|$q6vPT|y@+||GxlI!z#{y>e7MI$aa-I+K za}i$DJMJx>kF_Ul+H_Q*WYoek$7zHNyB`$5XpWJ^maq@1DKi)$+xln!CeZ~v#gsUd z3N$-92M4`Qw|=HK#WsM!DCYr1}i?@;~0P`^XF3X#6;iuLc54%Q?!)JZU-f?vO zb`k1-6ZK32&Q3#UIXJk0`iQO#^si0xh2LyluwE<=DN3K8KJ)@_Y3AKR<*R)HI>Y4= zHD&4!)6;)=V%1L3^s=N(4KybR#X-62FMjR14pO&q(kM2Q{9>NY%AcVb6Pf*~Vf_NH zye0=`sYmK3KDLe5w_}7Vv=ou(BM!s_tNdw7BwnbVUuTH$zKM^@MASHVusIG$+7JrZ z%H^T0Pm58*6c;kCM?RQt;3iji4w1)7bZFNtq4{}p?!GLw1p93H=jOiNXRp?W0sZ3T z_g~I6Pj*{foxxtUExFA*MN%<-!C=hAp!y~xz)(*BQ1kX^@$tDqEPPl-=3)rGzNIGz zqjtC)CoDFC^}4;Uvh#O zU)Id~QBxo0ko6}c=zUdiRaQlzz{NI`>!};0EEv630s*05#Kn zgOQ)<;LaJOm-Q13igmk7HyGuk;*6J#dpaEhqtbEV`TsM3Bmaj_q<$vm-{QdY!LI{{ zbix1A<|(Vg(%X#NVF+7Lc6iYlwPnBY!)0A!=ypura9G|u zfW$XV@Q*P72?>ex0@mss(St07R*-zWzvJjnr?0QSG3hJ+Kv^PER^p1w z?<7sD?1jJeWYyOl%&f`)wY?Hz`NNMXl&Iq3;zXg>q<)uEbI+RYgwU2gn>e(9ywGTn zKTib$NI+-@KFE?y-G4wqL2;+g^&h+S-*N&dlb~JaA(M;5L-wkbncSG>wNHjl~0c9Nc|=Aa8{N$c=zfaZYh{j0s|u?u~z7+ta)(v z!8xABkussQO28;65aXAQ~#*Bo|p3))W#-7&sFiXi}M z)CGb%sM3LNHNw;tBB^dK<^gF@Z|3gE>DPaAbgc6@^-!7VuXl0M>x5n;ciU$FYu2Ha z+uCy`#euVZTCM;<`3hE$vQFn$vbJ_71QR2Df#7RAekvp+G~Xq2BY3BN0Lt+!S=?Sw zfkIhB#XBWN-$z^Na`%IPIi~EH*ZFmA{-Kk*e@|_N?l)3!sYtFk0)9C83;C zntUW6b%M~$6Eun6uOVKYG_Tr=66-;7rBfS$D4Toz9YYFast%5f1c8LG?2_bVroEj< zgur>f%ElXchpV4zF9_3V?}ac``)~pf1-iuag-0RaQTRF?oMuw_Z9!T;TRi>HbpZS6 zSG&ZOiCVu^5rK_KR(Vxv(5o)KAIwV)t|d_4tYTlK%6Tx7vJTLM%Uo zfyE5>4&%g3$)QKT^8nZve?!!V?>02qUFrsE+L3tC0Oal<74%yqw? zDUI{Tb`hrQVX`zhP*JSS(Ss|e4Un3&zddQmA)E%In4X<`;oTOZJ8j<0ehlMRg3^Z1 zsLuH0fSm3F0#*3@mHVgftwiOV6$cHT(%&&u|MLaylv&K@Cxuhu-P}+99huZa0LNXM zus7b6RM2nwNeRIcKeM2#iL-dWAvyNevgZe(vl_I*e#VFNV( zIGFVZ|Ju0$O#ZG^pRZoCQr^5Yo2=qIgM5&xdV{Hf0eY3k2%uh8+5oauCcq}Cc5E{E zMXMeok$t>Bl8F%$ptsppof7#jjk>c=0nEE|og8JxoK}dsw)L1shpT5U_8MW`69B#3 zuYai1J&dzvYp5^0-!Te>VjxPwWmV@gYs(+UgK$4YP zg6Ue^-nm_~EOM2l=yCPQ(l@FBmCT83k{>a|3?H2Vtd~V-;nK!UVpkv^17qs}7B^*JW0T zqtvEo;X#QRci%Si$(La&<1m-AQ8@A1A0S9^@J zv2Zu3>%gh0Tik;7M<;ftf(m1iQ^i@IgcN-WsD_sAe4dO3Y&3K_Y?eyr6Cy)A>$O98Z zU$Mmb;}528!0pwTgXt;S#LIE+VE@ewL0n`SWq)`+Ndi__2d!{JWC)|HA@gTBQ=p*Fo==qC0V( zEq0rYMjYi}=+>2A7_BEFff|hd-XzZA6jQ0B=clH$vX;}EQ!z$A>5!1y8 z2g2m|BozY@(PUEs0(Zc367s(f8NYGHDDhclC^EW!-CbqrUiy+m)g#^s$7omqjbi*q zaWph}{o57TNhwt))i4pRG@)wCF2+$z90G-i^WQ@SRJ>n}>j3E6#-Cu!P?mzF4oUpR zmREROY5-{kYdr-!WyB~m3O2dq#ZbEr&8yAv8`tEsrHd}nj9w~Lo5m_QvR=nY4;2(k z-YSg}*JKWka_4mgj4|UsvGEnMC^26k#GLzFla+943d6?Zk}$+k9#C!1W=B8UhXAr? z^6B&gN4@DSf=SSvot1Fm|F9U)^cFrmujYNn`(v&0OPC_&#cjFApOtN#$%;0?)q{{`I_v<@=?6gty3}5%_WmFd0;}L` z3tULK=VEPH^O%stc6!+Z(#pr&=V{Hps8X2&3!ckAtJ(HHB^4z-4q#tITbi-z%SvbF z1QgK!IfKNd>>tJ0u7GQ3$tmQX@cZ2!2zd#6Aw9Z;^vSl=?d&T6^+j6hV}-p3YzDGz zwk^`gVSXE4w---LdBj5C-1kTP!Q?`S%)4aPVF zRy?V`0U3BLA!nipS{%g!lN=81DMC`W!{T=3eCrfdUTqW&Zs|s z1Owx;>iLbRx2#O*P%wcY#*?*L^jg#C>n{4Y?In52X#eDh@x(4ZZfkV3ETawLW%?4b zzLkPGw5!`zV)%eoZVh#VZzy}l7tIHlG(dEzjR@H(T9AKaWC$|nmBhj^gvrM%I1_q|HS@yV8&)T0na5%~=i+Cjyn2E9O0*!4;_S098 zC9rB`;I}-CvDh8MBoYp#m(>_A3AKpJ)RREdzUdP^nbnDpRr-y`@=%qO#AU-`d^@2zKg+dMc$_h31z{g{H@MXZ0COpju^n~ zWy`ybM+Zj7S2ESQa%us?6{)PeB(MQ0oX53a%DI;9v5?vcV}-iUY&@7jA7Z>56Q!`I zOkk_i!4zx3y6|+@!(({U*zMM5!&?ffZPA*6qr@y-+x0Dx%w}7k$`LVMW0W(A6aKys z?Cdmh>}ECjgO!C~zMp`dO`HO{JjwLQ6bYf3a8QTj)#}lMq5`(f3mka1BB|PbiL0?K+Mml=&ts}8KT3Ue+S(LL~l^{Ek{$go|Ma#BSQeM>$++SJQ59Fj6 zT{+U|(ej?Qx9aiNW}Uh=bYDgBy;8pEqeEDT#o55*3oSm?}y7-as z|GhRdX74dpXY%P+>w<%2caA5e-WYQ4{$*4PF1;#6zV&_5eacQjZ?}!svdC*IVJ&v% zSCv-0crC-$HEy1qo14gNlk+YPn8!$T9&<-} zhy@W3*&_f~^6H1~7T*rcH?+y`39@BhjxdXCOJDZ zn4*ZYF}R+rdm=Sj;bGJ}7WHbjgdQs5ILCw&!fh3x?WIfZn<;7Z%w|patN-Xbo_o-i4eF@VQY$%JCsN}u7TxR!l1YQe`7!wtj)Qs;Q zonD!mkTzTun@TfybE<GlbBiCufyDZ`qUNc!oj$zEB-JKbQSkj6;wYM=uLjlvL}| zfgYW@CEAYGb!Ksf2sI!gqrqCIK=v)v$BPh0+{jiy50YGQrLs_{UNQGWzecp23;DAK zCg1gX8y^udmz*|fH#ysHaY60>tog~TOr-B?8x0=+{s_fVzpQi8TfgO(ec+n}k7cTw z-Bk@-0IUEz^+6n4+J;v5Y+A+KRE)&U_pI_bgO;r8^d0ckT&u_d| zp||_Byb~2`)HgoYMJcZ0?Tq%)!eMuZmCRLFXdWLgTC%!->-3_-QFByCj5Jo%AUJnW zad|6^e%I}YWk*wr@_M(Slh`SDzp}N|cETf%hT9;tAb_f$me#MoJIc;h>$bq*kT zHwx=&BrCrMnJQvU$97(6+nY~P5uHa`|7zrMk>piC`zbiqp$h(1Jv}It%cnJ9gi~EK zcdg)5*T1&gYqCyR-(#>IEk>ZT&HcVbO#ZD62quHgM-JQCf2*Z*TsFEXd#Q3>E~p}! z_c}g-CuA6d3(T6?uJ7_)Q1}}O77@VWo=@kgoDg;VBeRyo&{b`)TS)kB@2Y%Zygk{A zW!E|r%0VZv3UqG1sNOR&F+z3(Ch~t?_k2wQtttnUAk~F%J%~LA5cLzx__XnyA8+Xg z4iUwBG)M4W(v>@p4?5vYFvJCMDt|3C4zBDa@%mI?AL6UyQ2H(R|p^BEB zr`pK4=2xELVdLS_!&Sd_D(J;ux&aur0=6KelOq%1^B5G zdhIpO@TJGNf;Jwc8Uo-w($pjWN+%As;QcKB*(fnXgb}!g2a^-NLKO_T3b?mk#0+S+ z@0hA^WCDf_A5>8D6$#e-eD|W$Y~f5f=KCK@Yr6WJFm0*rC3%WRcR{pUiU9l z0@+>t2^r0@!uwqelP9FDRn&P9qR#Bdn=1HXV>$D3JlTB8JDZ|OmKe<4%&%+AefSLs z(^EvL+AT!3J8qjAgl+S5uP(MRgTss}EQPNa-q{R))jWIsl0irx&#kFAG`#1 zmKHJ-u%CtWDt!<~b#}k1U*gNTD;|+u)qc&Q=D=kmTiR@Y$6ZJ-Yr#xiNtJ<+`tjt( zm^H*wbX$U}kt&P?`Z-w{6t|w6g#kSbx>1~gZ>QdIl@phF5q5CtePiV{PdPXmJ)h2o z7>W41J)h%MREe7kAA(Yu5-Rz~ z-@%yseIHC(K=f2%mJO#HkQIz_Kl{Bd1;!gwSKqyq6JC(nzdv%%Hg0mJ!)N>uW+@{W zL~Cywc7D=fD~KUm&`EJQuyJL%L%3~g(^hIR!-p$dxRpJy6O@U_LwJO82f9m>;XS)* zu^~)voEjbD)K;)Z_AK5Xs|1m!;mRN|%v?}D*H#aveOKAX0PYpxRJsUK22gWGF8~+% z8r@Nk9GrmSAjk61TYAtqUOk>#iwUfNzkC-r^~GpC8WVs|~CoJk++$EYkKT(&gF*Nyek86u%kQG0DCUwT&aQpRzd0Lp5Qmaw6 z)pW^~I6kaQci@YO9yGE-Nt|P_0Oby1S%%wA4Cxfi)jYBkzJldedD?{lC6POcIfH`r5*POaCg_BU+YFHa& zd%tkvhs?;a=Suyhb8i-%_S|-deoOnnV6D zy`wWZ6`jNrydL$Mf++Jt1=Fa&bClHsx_^ax-4^o#0$S*`M6JVV+5Q{*o8x}f`$5b6 zcON?=a9P4Kiy3vMP$(jxthn}PeX?DWgw-T$)uYkW^Vi3p)i0+kw>P#6IZ|PRCvT3h z_S5PV@*G9hptSvGe*{Kg}6lm5f}^lDM?t|Nm}>y`c3^MHKoiUA%Gm8S$nc3 zSA^rY-8+-#c$%pd4ohzGud0Yv)C%{K1_Kn4=(3wy5LK2UmVPg7q#F6$9Tb`fnD0B` zatfwn*jmr*Gvd9MScvLijFZq*DlbZj^4Pi1BUoIyAEEw;7d{)#Y&9XZ40KcU=UnfA zWwizE*0pudQYE4%7>DXhHY~}+^6?<@ik|(iJ+l3T#!>Mp9d$hm@xm&Y7(HDw^(K7d zlN&NYp{U3jM|V)A(7Vv3V&XgGO^naFDVZ!PT9IVN==zI6Y;3cwSF177J&j(Fmp@gD z8-g&vjV$@9S&l9*1Imo! z3^wtzWR6d8uv;?i9b&3{Dq7#B^taAxBcZzaqCyaRcalKipo4FUU_BNhwBSr6X`e9 zLCyJlbJ62Dttl-n-O$5>+^E2?fc>v8kfpySvFHq7LFU~M>-x-MD9Ws=p%#TL4=4bh zF!q99>^Jw9>SXNE-JWMzqw;_zlkH5q@x4!suj3fdHo;^povs!-%bfwrt|jwH_My(b zOmi(aOL1o)Zc#@u{4&e2i>tvHeC+pN145+gh%v5gS_a-kLB!vUGD>A-ID1JK(@1v2 zOYV&OjNujpi#Ov{OM$*oIV;)3Wf>$DqsO0&l$cpBPUBP0Nc+B0vgbLP7DKVtqW40H z{N4CU#Jp8U>`CQSA7p^JO#CcNAi{Vhj1{I$E_sJ?B4NN)_fy3hbh&WzI~X#!TFdg8 zK0JPzkXHwWl2w+Lg8@f{a)B7sX@F2_+@V$V3$2=tn1;Yn6QU)swiRUYlN6PCgjikO znCvd7PF)6`9oGz-J*w7i`9>Ah)lfd_wON=ORJ8!M3yPh%#^6XW?eSMVhsvqd{C+=~ zf^mMHT<_c;Y2w`CIR3TDng<%!wfFUGX7vHlLkC%!`vhD>GW_Iz)Jpa3UB}!Bsay3- zZz{m0VWYc?*pe)gmg!|#5A-0BmNKE4>9RF&5%*sYUkpcmQYC$DceGS4>KU3wcXUh$ zDjkIggP6Bt*azsqg%Oi^HJPn;TY}?WOAu4jbC)Wx&8TzP6Mkt`NuYxW--;GlGw6X7 zA*%Lp*ImS%I({qc4=iph%m&P&=%H2e<(yDK#`@*-3+NCLcKM?iKb*xlyT6VeH*W2* zrR6y7wsYU2CvJsOg{$sgO|V~?DRR5jEqovua?Jr__y<^pDA$kAB~b?yGz}Kv%-;rvS6kxIu>);w{{Hw95*xYA!VDVF`76@T`qsb7#&E(g0Z0zjo zr=QT8=0P~GWu6z9^V&$?^H)_=q!?vUTj^x<-^i5*J{WRBp66i3QC>D#*6m2ffc9Gn zuG%7N_V-O|@Ed&qhTOZl-^~N#u66CxMBidOjYcL97$`NfDLs5RSxF&~4ac-UWOKI? zN1=S#%gV)T`WC+`Xpa`k`U%b|hk)njbFdMHj-cSa>n-Nlu1<=u$-o9~9_8b-D+7!a z_ZcwtG+aCsVSJy&QAs(Lt7fH8V9f80!}&>7-;PX;2Wm(t zJ-BIFs(HT2O#ar?=(4z0Y-g?Tdw;}kjut^DJ4gi2y}kNX1rw9auEn{3cPSBki`X_;08;I+*DQMt8Y!5^d`cR5hb*l@zw~ErpVYkckA;$>80PVU4`@YIHYzLD+?3Aj!QXfq_Jte#g`2qIqTo;DTR zhbA3tcCB`ze_%{pmlb?_1NjA401L6}065vdonJpmz&IZaP<`@QE2bidX^SxZ(>ccT zfJv~JdnuXeh+C2*F0|cgOrat~8&pb78okm-qt&sjIsSn7-ec&bn6f&X!S`~}^u?1t zg9mh;Oc+RAOqLo~;TNY+?W-I2!F9vRnrCYj&xDjI!ONl##oruYXTLG&ShqCME6t000QvIcnd@j)P5_E&;Wa& z`U8+2m(^PrUSQs0E-}ya`A`H%Hbq4pct@{ z>vS}9?3%Tb?Q% zy3N($kQyMr2H=R7P;@W?vgOHeq`ayJ82ERD?LPhLL?#mQhRhl0=>P0xdOpgX2H@1b z_fT{NPQsZkW5S^Vh!amT#~y=ApS=S6#KQ!Aqkp?MUPNuAg9MSeCy~+m4#qF|V>X19 zbOxrh;cusjQz>GE2RYg5YQP4dz4Yi0Q8@8seRYz`n@@B1>pl^NzW25+Z0qkxkv)K+ z+A@NaoD<+5d=}I&H2!5a;+fX?j1A;na;M5zubc?VJ>P`#jTuQXc+*B%rh>N*iroze zgc@SI%Sgkpx${z>&E*iNY$uTK0N8N43SF%?8pmTEef zn~k?LHaU8Y-2r;(0C!j*BYMBDbKLcJ%f=f_7k_3Z7X* zhqAvQ$6dtH2jl^YSreIGScoP3bWA%X|0Lc;Yp%dg>T)JC=Dc9KY1+&D{E&TQy;jG8 z2h+tDs;^_5A?%MdVHfRipksrnb0AUQG6)P=QV0Z#a?f~<3+2Fiv8??IHub!@>{l}k z1wlsYJw={XS-f1%T2p^9eZ1co*4l6^9);%5* zPOfkebx!zwM>vUpx1CVpAizYP*L;JSC(!KT{KULJW zbOMvua1t1x(MoX~E`8etn_wejbl)P4N7)VNh~4aCb&*aV)Hm5LtW5MCFPs=2z-F}? zHfb)%@N~9=7`=oax)ouibDcSo9UVyELrQLzB*9*mZPD#<*p72Uwdlx%XKz*8`EPvp z&sk{2Y$!Cjv)8}~7aftgzSdEY`;DZ2bG-Q3 zw`HV{w?%I!B#AOlw)!fo4URQb88TaMoBW#5oeS)%5MRQQE}&q5l>`=xt=M4N#Cn}K z*2)VNg}Ud(7l_IaWR;XE?8)^*pS~*|OmSSt@XM^mSd@LMhNm)R^59xhgZbngnI0jGm9by^J zvT_B|YUOI3hB%q?i|9P)0M+-~^iRNfQbuiQoEN>U5E1kVkRXQ8MmU#?Tehg7a% zVhj=aN<_#V;Qr)0BXXXOkUccW!D2vz*#Dq$2T2T>=I2k(E5$kV7E+(wpL8gchCpgw zkLygGIr!QBTwdHIrVW%i?9_a1yrn#lPln2jocwa<4?^y2ul;=#hWt%JIo`Rp>+&NV z%#AN0Ze!@1SoP-_pIY03r7LVQ_l*d0VUWH#@d8*E=W)3NC&CQ3 z6V(y>T_-4VttBI|Z9t*$F`M9b1YVO^@tB9nN$=_skCV7SS*z*5a~>~2methVN4EUw z#iskDks~3`;hMeSpUvg|YhVjRk`zAc7)aCs7%78d5LJH7tGbv3)}`(JA@K_{t5wAdjoKNp|EnWhV0p_WP$3hRr(7^=s5%bN%5rm@{|@ zA0H07w1*~yrj5&at3gNbOH?jSJ7InIn;7mPn6?LCT^-_3-_**)QGQHkBft4Sra`V;sKU$ssIl0Uh;jlmw7#pSn`2G6`KmRlSvU8NKocS@% z1S0-JisbC1@F;^U*mTCUC^U@X1ilfn?|x>%T?6ojxgX!V-YF3Sk?YPZd|SrWZ+Mhu z1S6SXe4%ZJ6UXli%5^GLS&C5~CW1E-S{TwKNjexgQX6)cKiUTp%UgjNdxzV3I}5QK z4o&A<2HQT*hIUWzp5hrU4W*^H?XBXuPdf9JAypI*ZXJ@5G+-JOLSbm=3Y8AEe zwxkRg$)@iVv0$DqcwW()Vjh}_>@4f9Iejz;g@HMh7}4Jyw;-k4e`Hp<7-5R8f7{L6 zBtGW#Eh3Y*ga=8&Sw6An8Bs|AmE5+0!IQ|5VW-pJ(|#;*#C0R zZ~C<7?fKO*Waw={j#@B0lllnOtB)pBYN=hiT%ddP4JBWVL1oh8DEuJ30jFM4Aj(0vJ8BJckZjw9>*RMq~4^ZB{Lo_AS)j*ayxuKv&& z+;Q&Na>YNr3NM8@|G6Qk-J=n^XPqFQ$HE*l<`2n{5m@t8QW;(cplGI&lnAhk#`I(@ zTHU5`@LB6IH}}1@#?EfzB9NyrCAotyUndW`8d=*&buegWyo+nC{(M%(ham3HY$b4} zPF>L`>ggxke-P%a1NIz(O#@<&K#>0DZ;$XqQv^wcUzBq=-ynsEpw~NW_{E0kveFJ~}j%d|o+xYvL zAX4AA&Gl9hE0>u9T2h!iII@uqje^(RW$#*ybSu3)iA(E^G`_y+t}{@h>&1EDj1AAu z1>61|VgKG`q6YCXQwZ5tqqvAanluW%C4eLGVFEfYqt?m%?C5o0Wsl}6@0_eYtdKc) ztl!IZW#w-7F`q>F=H;LwNRFepEvKscX4HhdcYoAizBS?bcqMv8Hk>)=3BDQia4C6p zDnnyUdqE5nbG34LvQANC_s`BTWN&pJTH{r>LrNmY7n_9uYH0My^?iJu5o3MPE%G`B zGs@IK=~riFpq*$1z~^~#SdqL=ZgJTNyza=E&qwC&A1JJ64>3oriMINry`v-VEW?Z{9_f&|^-)@MOOg zr3nv&o^Uc)f?wZM9<)+ZO0j0>E$88OQ*rR!Xr!RQ z@#gwsXGS}re!enfonrNSK@unCGzDy6LA%UtR~6(c1+!#`wK{aSl!O@k83(0h&j^E< zJkRxOOEWA8A)-?_Xg?ACjPmu(tMOsYyw(>Fu|w0|niA%Bs7Y-x80nC^)|z8EydDY( z(;1;TTD`!Bk18slt%l!VZTvPPEBu{hw)8!ij~o~7Wo^E2EdqCDUgr7Su4>F26q||% zx?AgQS1RvEY!UPkq{Eemg(2lui$)@mA_6ntQ0yWERc~^xc2k!=mty?L!>0)NWp6o| z{rSO52*?tP`fN?&NF-U`$zw zLk{YQxhu&iP>Xe;1|1e~Mxy50ZMKh;HT&zr!-DRrY^8@mo2r#Z=-Ji#0}|h{Vf$m= zS=o;_HW684rkYB1=I&4bLPLn)!~O{0Uf7r7bbGyK6X6I#F=ejco=FE}A; z);fw8#yh-621~MHF%Ugm9$Zf&Q*Fr1wtYAzD<|RL^kOogd2$|w0D9!}%L{{Vo4=Z56$jKHC9y$>BNUQ6Ifb7xs#N?R=c(C0Q~OwjNJri6J}lb9U+3c|lr! zPz$!g_;_V~e^R+hcJ+P}tfPJPp`>%OIPOhe#Q?jRD_A{sxfl6JQ;gi3u_}5vL-pu; zvsfdgp{E?!@lxfJhY36x6w>JSCnLE8#y(P5{r#I|*JWv6>e%u)Lo{8;xQ(g(^R)|3 zZ9O4U?juXAO|7ZNuLRlmPa)x{{RoFHFmis6G-A5k-vrrysqs!0$)e2RO-9z!nQ+Zn zgxk~_KuhOTm{jCdM|!Fu>dBV`07hef`PPDyUoQRL((EiEIc)!6)pEei?~#m|zAnWb zVYQW1mvZ=a!`4{=2mTiX(|)!x1fYuYZJHSzDmp2mUWLYmEWhoJz`R-3g}ftV@PGxc zNyHJ5j8RP8>PsvS_<_K|HtD&KE#Cq8(@!^L-!&W|k`}S0#l+}0TQ7Da?XFBRq?I=- z9D;)3WHyfsoOX!Wvr)D8P7jWH-A4aA?=iqOh&{rpe}<`u1Fyxo`#7o_EaFr$6Dy}s#qG|BzbA^r8zmTsSw;LA!r#Rk5dit+ zjqlSrtG}=TqXRC+z;6!=`dKwsRZAQ_Bn)L5ZKO(Md@=n_zW)~Um-m-yT3)UhKL9&H z@u`tJFYwHMV2H^hJ_>-OVMrndAv1BK_|IrBp6hPB?{9iC5a+EUa(7sY^U*Kza7RfP zv8o+-)&(829bQ;8H@-bv{P3_3eRoA4sBe0AALG~A?*blJ%(B_q^6FQj5>jU0q6f^x zgVxOhqCR z3A$SZw_7ro@2&GvzES;-fZxGkRZ!LUwK4CMjLtuG4>zeW(`R}u_NKn98&w!lM+(-x zr(Prv{x%4qpD9&0(xj~GeD{Zu9hix89Rlr4g2Qig+lgvY3{{Y0xtnM!_3u-{_ohIw zyiBxQi7^3Q$E%)7k4L6&Xqffm2dPR7WADCsa2O?YOt9*P_*>kM))L$}wk`N;Ch)`V z=h_acVF)^F2alZbnjfd4KyGDP8hujNUGeoQAPh$U@5h2ZyVsKBE34nz+Lf<<#E8;_ z)wAJAE`7uwgb4Y~t))&7wFTe2fAqS_9&{{Zuus;ew-4@Zo3UNnls|SZ^8^%b(}Xi4 z?kZM$zc%Xccx3-Sn0O{7Z@^16aj*kHHk-zYLzfa*OpVr=4ztSO=?tq$GZSh#Jb50X z2oZ-g7yaU$`CxH-YC8S)wJM6PLqlrBv&xh6pyn>bx;kz>_)odOUI8R+-mR{BNimLp zXD+s284C+7>9@#a!V9$yhPXX3o4jp?K~a{Zs5PzsSJ|1zL)rIx{Gu!+B1B}nx!cGt z`;rjJQX(Rh?vN~F9g}5*QG{&SNeih^BC?h>O;MDwG`3NaEMprQ#`#{(Ij`qF=XqYw z>l}Y_`CY&3w|;-i=kpfpBJpC>_}115R@5f57iQ`?OfDP3dcN#u9YptV7HPIYVJCuu z>Z+$_>jRUNxxryZ@BBLW=nA1=Z?DGnK$KRQq2$ViQXo;vB2fiIRga&PqWH_(3FL&h zmA!iE6@SJ?+ftr>&$o5yXa2LF+0y^gp;)cc-St?PBfEWNG~Dlc5_u(gJ&32qF=ND{ z9Y-7I^&#}vWTa96N)`2{r^1AGZ5F*I-kn20*GM$MW$jY))quNJ1i zY?c2KZ@~0!J20@T{e4+cQhb<=X3a9oi->AIyciPb3y(O1`&l9;*!a z`BrDNotLdOKih;|LOVXElgZ(kGZTD@Go4XS8;S!o+(^w-=@tZ)8)AV6HCH>l&J{!s z5I|l}EsfoalWhX1wE`ev9}OgRBhX3^PM_YyBmaoI5YbZ)c8uBS+jPW%ZgBmZN+ zQ!|d8bJ7e|gNtTt_}kZh1ZGAbE$37?^)=7+{PEqx2ADdq~oXqp00O;FkShc^tO4WPanr#@GXud3RU$)SXbo{_@7i! zh(#@4EwJgYot6`b+#g+7edgY?4~U*lxKPI0s%>MKHs12ZH(+cE0ex~`Y2Ez0jHx;R zLs~jIiZbNVMlU}3<94~e>Ta_=f?nQ;W>;)H;S+}U zSm`}y6|bs3iej`D(@4}$N5MbG8^FOJwf&Q;_v&>8EWOq>TKZT3E*&$fH2?Oa{a23Z0S5z$UpCu6PV$2^QMY|;ys?nVRFz*uOk(^ zEUaP{+t35cSYnahDL$({eq(d~CpVrKRQ9=@+hHa|d+20W;IB32d$C$+Nd7ql1_w<; zj$n>lSjtWAyYV6;WqnHwx4v&$4i{5<=PJM$~AT)(HoZv=`s6+N3D|3#3_Y{oOK{WjfS z@OHXITfkzgIE~CpwsUbSz1J6C9QkdGD^&e?t69UX`4ivY)`U>};QWdxGBNUAp?{+} z5p+9-<}rBtbLXCg(wFBJay5f4f`k5}r%zi@iuBcfMTg*TX@G|oLaDKAqF4mwrvtZo z_-{JTHm@(;*8WTfH1N@0V4HM-TX$GHTn2Vyn$SQSl*-Lzjec}3BB4D{qW6*_9DTi9 z`;qeu|J*}KvSFyGB@&eJql8zIr$z$DeqmHHHi(s@>%4=*@^T3+$IACNz%sHl1w_p- z)@Nu2d9ADhs24%v1mrepy5LvO$H~dLLQM?61zcI@wH4|LsgV&aajb^KKnc$1z0Rzx zEKA3{EXRvHZ1R7=hR!RWX0_3~Pd~;BckTFg^JdfAPVI7`>~AOD#Gn4!Kz^it3fr(; zqBIhyF%sDH`8@ND2fKth(ybIeRPs}+a}Bo4(r?Glk2lIpqUWctase4vWUyH;ETo`^HD3jCYgv*6(N%&XH0ptZA znMv8^RgI@n@t&LQdpK;P@m9Z(z{<99G*(Udz5Ar_9|8mrB|^4$?bkhVz>nLz2$4}d z^LwAlR+%$Q;g|vOGr%W!cu@4_<@7CrabSE>xU)n#G!N!sXqamLpvc5M#vdCyd3CA# z$?HF@iAH`%qnal-V|h3SS?JXzEn=}W-ZC5N!gKK$e4~>-Svw@V3-DIx@{(l_u zzKjRm*-f#~83Q<`5=uWQBBhT13NCEz`Q#zne~J5X#3_d6yBEsq1DuW%YLrB&xp)$xd>z1?1GDaQAC%arf60%pdY3GGMA zMf5;nf|igVbEp2#Y|W;`_#T~>&co!~NKFp|ew}KUuy-7~1~pk*++r|$Md3^JFM{_M zM{&um_aQ2~BW@V(>=2B+V@_p5Tu zAeVgnO{>yaJR~e3CB%ofI{_0uk<`9Z#$o|vhoC$uf_+98*i2DnV3+*CqH6E83ur>z z9B}*pHi%Z0>N((z>>U?uK?jCbwCn%6r(GBny_xz2J~J1mq|)w9<+ZQ!kt6q>K9%XI zg;h?}ZMBj0?9FDPZ#;CA$@st1*@dP!zGt)k*d>Bl;TkwvlX?H@k@I-eB?8`0e8%Ii z>MUsj|0_lNTe_p~7P-b1Yk~(pe@-3M-b|?O2W>ip>-uRVFp@xEMf5y9Jqa}QYb0)V zfBc9Cf%wT;BO?${v8JV^A!t-(>!=6n>~^?mP*cZnV1xu!<#tS~s;Csae7PGiv(vs_ z#ya6aYaugXIz6N?StnZNkdAJC--NKRumW~&WG+-otN@bZI({I!ysqx+yqIZGPk+gH z_kSkEIVdMBP3&9tTdk!`_|X^izL%~$G|VW~hlVwHdYMGbpPhNBRJo3AAf55j71qfo(mT5*mMim#q|m~uuCDft z)l286UhNos?Z4$t67A$leR4=NNvA^qThhby>xg*~uWJs5)j`&%=i06{N0WvokNk0t z)iC%Y+Vj~vl;&0GNt+dF0auk2z%>R?5lZ#KQ4jbXX;NaN0T!pXu{aZ6{r>p`Ga4;$ z8K=r_B3(CQqSG{$cZv5ORq4E20bD51aIV_};s92T$`p?9Xjg%{nI5*X^&sd+>4UR0S2yqZNECyMV*uUreoc)~KtKRc z-a}TF2EA%IC-TE}kdfoMzj@5Hs0ILe7jbF3WirKBHo%fySsm$_(dfs4So;PLV#i)b zY#E&}Gc)T0$}%hM(Zs@JQU2IGQLY_u$ER#;2%sQfLY)^R536FvTIMK#iQR(yoSN#X zy00zhNXv{MgaQ9Cm*<=#RmPTD_PMDgWI)6T%L#~b+i1cZ9 zsJQy-$--<)YwPIb2`kPNNh@^3HEWBo8H$1Ne1?eII) zgrzHN`9-RTutea4shUPgg67==v6GDeD@hd_2UVB?`3So{w zzIWv(_V)J9DWWS$27}f}0jPTu0N6ivcV7oYxW4t5ImN8P(E*RL)`#hiS87XuRKI7Z z_9Ax}HO^^-lBnCZ(50KWN9po|#TEAFf1D?+dsdMN5Iz}i0Ct$mX{AZvbl9z^+foHQ z$!yRRJdg?%k6yQ|LU+L5kiL3VEq}JMh)(Zs3~&7IqgEl{6!K|tRg5tgGXN{ylVzkY zU)*KWP3bF?M60TWDF380=V?aJQ=c+@M15xJa`#6);M2fM$-(MjD{{uhh8a2HIk2^H zIB|7hA|`#0yfcC_d!feK>xZCkiZ;jWY=_duL(jSZcqi1!#%J-zr*Tn4;=H&usXi%2 z6vuwwD1Lhv|Gub8x_tNt?f2b<0E0v5f8pu^At8c~WZT(9q27g+N;m3xqlTSx{yH@T zP^ZAzp?FxCu_kIS3yOv=p1L|vu`S)f*W`3!#BBqUZvk>+OE_u^uZ1&M=#Yn8^^Jc! zOS*MaUQyeGsSNt83v?ib^e@V=ZfvQaah7KF)0K3A2LnCpI<~*1HMlrU2TF(b41GyAxjn2R$?7 zFEE3Ljeuhpu--pG`-O_SS6?rZt?JE+px!E?iEOYWX$E?`4~!vW4fL5~@`5ek2!mPf zcQq{)mzWes^C6k2bsgn#gZ0s*wxzx?t>e-*MTuf&{I~G%HHug>v7)NVLv;vy-^R|a z7-l4Q7#TmKgLKIR7%^2jbis!-6H&akeT7?6?~}7~C544XwJ<4G5(orkk%JOE-NEE1 zj~{aiDI3#gf*P|6?@NNj$*@QHaPIY#&EewGZq#w+{~iHt1}^_TNc#UjLfv5M=NEcZ T$C|r=z>kTcxk0JE^Y#A#^HlKO literal 0 HcmV?d00001 diff --git a/ex_figs/quickcheck_4.png b/ex_figs/quickcheck_4.png new file mode 100644 index 0000000000000000000000000000000000000000..9dc4c64cb7492a119581d34491883dc377fb0071 GIT binary patch literal 33743 zcmdqJby!y0*EYHyI;EsL1Vl6>^d%L$jG1r`9jycAC-(yVPD9X#=<51y15QP8q2~r7yQ2ZbWF^`1- zUb#0gy#)S;<|y@484LXK!ZHp8f5*0aqTvWZ1P1VLM7BhhIe1anNm||MrLC!xtAT?F zWNqMNXJzYTWp2peV&dRvZfnE9#|{4BWH57bvU|Y8^S?g8ZR_xgCpdK&7lIg|r^tuO zZb=(6u5KTeFEMsRbpCMD_A9YlBPdDPHE~{Vm=n>YF>%OBMtPhwuGurZBt(l;B|w)} zreS|1ta7vIH79xu{`Z^gH`%!qACuEQO_`%-G@lI9Mx$&UgKIgu#SmO6{F>yke9z<+MDzy0Fz);Q^X>m_$d0f?aD;StIbhvEsoo_n|>zn7dx-3dI|-tVS%ks z$u(#R9xt;b-)gw>d1otPJt=U$?R%9Qq^j|1y3!uJ6IYkG@X^7+f!A`BGYi*mEA7h{ zv=Y-EL&%9)_s|9+5*Dx0j_};7YR{Pxv>g4}mj5lGEu8wZ{bJjp4F;5!k%7XOfTyc_ z{!$L<{Ek%ednh5wJ93`KXNQ{$D=Q)W{feT0JDBe~{th#4`1w;#^msnNY`XHUm$&yo zt-GUKJa3b+tAVaAT`&QCL{txOO-8{@JENIWZ zD%EW2U7eB}^HGBNWzT#}O(g)o&W{?d(DiF^f5wxr$)}kQWv&i?BjMJo&$BM{Ausw} zcE0*KvOhx_w`19Xg@r|bxhoc<{bx_lk3eja{z`kJxB(F^8o}*0s=Sd>+8+t>czyb&K*0&KOK$4>G`}N*YhF@r@HrAn|rKY!S`%a$Z2bu z68_+DHc{sD#9))|j~XLA$3qGd>E36DQQga-k&*2p+qFhc5{?pmm^Ww|(;f!&r#(ak z%RtJ=EMA^%w+G>p%auRG@*mUlrJbFfJw9HHggujmjSUrQ3!_9$ki$rj3kD~`!gjBP zKq3v;e&^^&Mp2RQ^kDs!V*W_24?J)b<0C>EnlRuSq6fn-D;<{KO7{(Ydzr;+`jcYX zzMU5S&gaMByly-CG{O$88H>uVwrA_y&Uf31$jLD!BqUmY{4hA)S-9`ImDE)Kv6jH< z;ON)JL`k+@gE*u=nx`>2_IU5;Z&&Ocx8Dh`2uVm#fxm+pEY5mv{{U;ZIu(5}V+gT8 zeoEg0rP6tiiKLz!Ch{7fuqvd4)q9^YF*BnAhi@DBEHm&`mV9Y>+5AYv`&8h8>sHt; z*^l$UyiNOmhIZTOebX$*3bM1ac`L8dABAK{zau-|s@dLI%Psr`wxhw$ z6j9wqQZKz6dN4Qj-Nk=ZKi77`2z{T+l3RGbLH05)lPq2yKGJxbbsVB1I`h9 zcD2(&00IC3ujiqa*V$%8Xn6RKorRX&&5AjTqt2cl>GlZPV*AC%a;f695?AN^Z4tEZ z-1mMPoFDH_UVu{_1nm6_HfXK;UXf`J6;fK-V7$n{wCp0)_p+(cA9c5rEmglI7(ZUr zolmt;H@qnTQ_XYg1DiqyFxq{OeRlEVdDJ|e8ZvBbY!EI4$T-#Yi>@pS>w-5nth|mF zLewj4XsHCu{8~Z?KI=93u!D#SmKi}HaZY%4wB0^k<=A74pbuBgY#heRzlWR`$S|3xkc$!$a`r&mUkKq^2g-Cjs-<&CODut=aF~X(3S%^#}WJ z04z=+;Ul_Le`aG%J?plJzcE>cTjR3XTxq`uL3+Ly_vpNK9)jq#m}pzy#FpyydVQ?$ z^Q*p;V_-78t;K~9`o|zG!v-|`{WGDXqodIeff{z{efJg{8|K2oLcD~pxS-Xz-~PUn zo4b4Z%Pe_S@8IXco`;;8O9iZ^D~W6yE(E!Cssopnj5~eba=Fuw|J{}bRxCDX z!GIdai;c<(drHlRAc*;KnB?Qf7?}#G5qdr+=wM$T&DDEHr>5GLtb}ezMroY^6QKEV z_!=4+1rbVt#mGX0S0{bGEbaGwFKc)9_DHTSj~c>fL!NylCM6~P1?-99cMPIM*;r5f z-A521E*WRkwCUiNryx43Wh--R3PMaA9M5i&lA2eT7#m|j86f_=_Z}J?M1$;)cMLYm z$H&$0S`4!X93HF=JlY4pJ>9{)Q2Z$F+oc&djO41}LWW}nI#O7S1$qtKAn*JD!G?2U z`)JOW9AatDlXr0`-}(EO`08}5fn^F87gt(YImT_HxV?1haX8g2C=?7Kdq@jLGn@Es z5_|MgTbt9%%c~WH5U@n;O1p;;VPWmxxA5Y=e*Frb0}dHa#^=vv72RE3#8B(2RPW50 zO!v(K?JB1F^S`nSi;K)DzL)Ol${e?jgu&D|J)fZZ!C$Ycs+zRM36d(c7qzBjYX(1J7i68cetwYsau?~Q;8&&F z#KgpCP;_jpiV3xo^V;xJ5X9eURoIwUklwr*T2#c_xhQzFSL?d1i;y%nHdc0TfRGjz zIURj{K>PwBXv{|F z$Xk)xmj|{Y6iUf&N>M5VEF+g9I`OOhX+$`wBnsrU`-6PEm&ce$241++g-is3CVx*iLUTtmd{N`pja3QF1cd;FR>k34@C~cdu0@`P? zvSl-&RCOK)Qj(H>hYstb`3UIt?c48d#ClwebU1(Wjk3-dZGqzoHr?%SC)=$v=Rbqe zXX-s=Ab_#o#0jGjR{A@AdU~1xb_@fx#Dp*5p>qzf)`+;Sudgpiuo_d&9}*JYcpR)P zfB^j1-My-O@VogCYe}vz$b~;v2R`5Rz4WTRx;)$2;oa_g2BI+`CFQrejX!_h093ay z{O#pr!4G=hb0T<_PEMsu?oXdSMWhuxKxzEQrA2u2rt!`vvE5b@)3Rj%3*CObVIw3Z zMORQzaP#y80Fw?1YtVDPI9)GX9m*mwG&E$H0xyQYe@`61{S#y;DjFlOg{w0)LIA3l zPKk<&?u;8nCXD!m|N3PJ+#mB%FrI&5VWEqsqGD9j*^lWyM-YpMsHonm>AIuS`y8h~ zbbhL)s!9f@!q=}+HfHOn-@JKaT6VV7`Jtn;vniB_nPoe*u~8C__QA~>*X_qopT1cV zJWZ{EA&#!D?)Z0;cN$075Ka(^ZQTJG@X`6jMN}Rh9)ODGLB#)d=sd<2$h-P*fSrTm zI@JM&($d|H3%-kmhljMcFJAKb=}kX8GKU);K`UYi&~-fjs~Z4CTEB>mr6HH0BY~Xt z*ZR`~nhpWLNRsfS^YchbA(Z<2H9r?%H3*7*7Ib-W#tee#Q0DVTz#K$`gz$?15Mx%@ z%D7Yf)e;#Q8F)69R$3|oGuEH( zDxK~>ifw3USi;pE5_hBnsMLEGB>IfXN{U25Ybk(Ti0J4dzIK)j>7yQSKB9k{afKEy~!y<>`8*&&2~OL90-3UKYcJW}|8aeJ@Yg3Uuq<0uZ7p zFz|SfCc%SFg@Y7?+Z*6hX z-hb6g!>Lt{2k#BQ>~ucI2At|ejlUY4m+n(QomPNq!X&KNC`1tN&!E*AfLV4RMsn)a z(*Vdgs9Pv;`*Xp`=_yq*i~YfxO6OH&&sXfwHdd;o-@i3W50igi-ZNUTcnJ<3FCb1< zbM>_Fl)@@!^!M5tu#zw*A4mYk=h$0!RrXQbDv?Ko&-3 zZ9s?u=VeG$DCXHM)qKq$aJI~IWdbnpn*rz_$kX5tfYNf+3U$@%Jns1%FJL|u+5uap zV=ZEF>az*rB)~6MjBIQ&f0MjHx&iD%$nqqKPRv99;`HGCRT#i5c^V~cd4gtWXlQ9h z?GfrVE?h7n%s;(JMrH^A-+S+#3(@uWoyJe(x?Q_CwK~j z)0Gs@8?bwCH(c3#IAcB2hgeEx!2DoZ1e2|enHut$8dn)fNeBjub3SMI)>Gv_O8Ug# z0XwE^l$gSVlQB?IU7cbyUyCil#DV|-#>STlVnI(ouo?jV4mcPMfZS<1myJs4>FMxd zc)<}4qZYamDSpb{n<^0r_@L%A7cn_`2oN=l)`q_U=>QMVBykWdm;j)LgAED_%FnI$ zBVSL5hw|$^JUrkG3j_CGi;HFJ`|z9rLmdZX1cq7fwz6Oe1ke$f(r?YwF!S@10M_#D zuOJ8yaCU+X*iDIkVsz4o3@107>^tdL&UYgd{*_F3)4;S`WDqLh<< zUWi@+6r^73c9+g$l~KTY@}?9P%zuPzru#2f9?o4|%v~|^@)Gum?%}t|=E2N488$CiI(WD+d8pu`S5e4|hY$S*D z_3PI~0DM}F7a>4$EAIJ#JTz!HVOzoy;Ga0$m@wKu1vGk2A7dpe)raGO^BN0qqQKPD zRI|~%1PWn?FR)VQk_yi28^GM&q@^(c9K%89z5gOQIoX=GTh5jBTd~WZ-lCRGjo@m5 zl07hA_KAW7`zk;~Hw9y#-2#5i@JY~`127C}u+Oc94VM-UWJO!n&fBuCtZ9JofXG8~ z_wL>1_I8<4%Q5*pt%~0H_}^U}9fSbC5d%9&amT#_Sai38IWiPve)A*BdloTZBY7+H zizo7ni;FP|QwgPDKnUoKgio!pXM@iL8ng=tjq=%ec%2K_Rl=iL&q83Lq53Sj9u)cK z&q+bpCF}Wgj|6Bi89?JQ{PC6)!@3UWD6nr!dwJFF93GMx7#J|!zWo&VmkJ;Ad2Me*>`K+hh^fPxo-$Kq$@t;Iw&ePH(tSPY;}2`Hi`Txuu^97aDmo5i~?V zm?UFlWP}$9;5OBU(VF*HpefuK`1X>GF|PZXT%9b({g|?C;DXFv5ht#P(EAW>PL`2~ zi;HJ~IAI7JiS?C)+L61s{jWd@aj!~vi(q;LhRpyQ1f#h3ixg*GK6^bv&E4IyARj*# zsW}1+nJ8bgj2NsF!$0H^Q%Wp`oEc)+6q!;#Clu(+)o+f=1ZT>EdBH zI5l9U-*0Fkd)YyTtSL{s4U90v{d%kSSer;C@;^0z9qq>dttN2)w*WgEfSXZ3+yp~> zUO9qG2kp#(k_HBhfJk#a7>c-K0Ok27CDDKdkJnIwuNw#Lt@KUp5Bv58BKmOvsC=TJ z5F(WhrTL(uV`5HKslcC!0IUOigMppBWA=G5buUVRKX@fDFmN7Z#p}w1{Oh^7Bs3|w z2;uU7EJCP)v1O#BP~Ly|usL%RE?m5`C$q!1BZDg)dkaTGcl@pw6!QO7Z3<>xWiSL=;z00c6isXm!gorW@X}AYR6%S9@2+Or7SEs09^_e^E`x~ zi@fou0$2UpFJX*O8p?ko+MpMG(KcxRNch6}-xHToV)bs{T7D){9vGT)UDXjxpZe1c zJy{z6_kloplsv`1PIkzMFxW~~QmexdCG>`d`rm-1C>)4xBh?rTHXY{CLi!`$co?XPZ^LA;s0@nCr>0D9KP2jN+Fkc z1A7@HgWi%#GRd?-Wx4 z_0f{(zm>o<`}bK#8r1(@pHrN_0WfXx-D}gXj-KBH!`%q~H!)W{ePM8vXsG@z_Sumf z13b3Czpno2exSW5U~Sm zrf*2tcRM4qMoqz4cgIXUy)yF~BExcEFSa#V78 z-TxZH00yY~l7Kr$y?_52sIE{WP~sxVxHQv%AnH6}#l#f%MFgX$^fdw)!F{3zaW5Kt zFec{ahp`3_eJMm-IDwRf2|baOZ2^CPg)jh=34nai2I6EfNXI}dOb2Bt5PLzWs~%X4 zjg3vu%ail0N=s`jCxDM%G0KxAfuw2__1M3E9bhbSads4EwLRA$;j%T24aE!D(!p31 zs9C>~U1yt)cNd=rOY~fV1T>i%A0MyjjfX_`BKa_)fJ24+{*KrI*jte!HZ&|O#ws!_ z3~6IipwjNOSvF?3_xt%~-^t#J^?)*17=sfA!hsko+Q@N@Ti+27+D;RI#|0rEMLU(X zzwcfKNu=C@iYQukARppTMj?GhS}Ueg-YdB@tbO=_l86Wa#G3$%Ilvh3b6$h0tE1HR zhAj=|lch>+LFg3Jif-S=PDn_2zX|@c3K(U0L~=?>L|`BqsJe`;-bW$}kXIWGN!W5r zl}XjOmD$-?AV&`B_I7nOgPkTKBMWAI{z<THhWsNRI6fS4w5aNdN-tm9Z;aoCiR$cgMkWUeDINq_jOzHe zTS#Tve+TU1M#5_?a2zAv{yUDh_V4(?$6Wc}iT`xIV(*!Mkq942P7KLAcC;YOKKgX3 zC+15J!Jjk6^HZH?A%tJV(GN`(g5*&c;TJ)0VqQa|$iw#g&sRZo#(s{@JFZsOYDEOc z6!rf+;jW**gEPd*y=Zs>&!Qg(&N;?U98EL3pbijqKN~`&C~W?OM3jT7&;+{N($2qF zBVYst*r-9WUGQt!|6T)=|FMi?UBOEvO!jYm&&P2qbr3gStAOxkYgi-vU%dc7ATx9@ zRVKckzNwd<5AMsOF*?Zo_gZ3CP(Cs?hLAk`@2FlWhNH-uyMggr6WHp_e?nKijNdc~ z1Tx%zS&sxV;iDuEV0Ezjx421e{Z%JK96zug)AfAbbm1_}Z@1h9UkL+C)y4=8?T9kP zO2gwro25hT0I|Eh70e8TRHSjBJi6i#`p>5f|NTJc8iMgh7_f!^Gk$DtW5<2$z>cPW zq1CJxopTfAH`orD*6W1uLKqtjm~(0VpLv|fU*Fi>!+@5^V<(EGk;XXxm9cgrIvAkF z&RQvUmF6Yu&<^qI&kH)WL2W^pjdSfJUr9NVs+5dZf{CKV{~6XAT@l00qoY61hXI36x7TYl&tv#+2#hKEC4{Yr*##ykfNeydOl=9 zoK{%DhT$gyORf+FiQLojqS1*_g*@PU=c?fU4xKZXOPAK|8N+x!!gwX1sD2<;-FmEncZvb#C5Q^@* zY*4`LZab5ZXd?XCDyHOt2|+S(T51K`Qbmn_6@x(Z;=f0(-h|Q>-*$29CqwKXI!jm3 z>UH5f#eVGs95BR4=GOniF3B*Qnia_GAaL#5q%(K@)^O4mIQ1bc{2h!&V0Y&0EdvHX z$g=vcZmMY@^0E_yiC&cb8z{5d&&1=FWDi9nH5?Q0uKnn(QX#H-ixYYjNNF#w-9RPq zA$ig6UB4u7tlj>7^Sv{q(!5&bhe%6l@F2#558O4Z$;%kV_rn1b8N4VZm4wd)#lN|w z`?3;JNQ@Pl;xWM30`q#Sr5#uaW3%f~)Rnlg!;!`k|5=7c<1$Wc=UqP#zk-y~vgKZ= z^)P2}{F{msn?Jj9W;=9x;?~?ZMFO=Wh=pyqg#FUjeg`TI3z4@KDjl<7qEBsLagD|AOdHK$5YaP{j0LjGH z^bJ)XNN~g?B*kU}OrOUJHOp{7k+}_2WbEe~5wPYCN<|=Pj&8jy)D@$7;DiN8(=n*D z0Jf>7*24zC7)Kr5he4d>%{e@hG&xpK_=-RLo2oIErc~S>)S(MfY37$N`n^fQz*wOJ z>vP^mI$^XTWMHn3Kq(4VJ>%mkI5obv>d$p_JOO&bjj0M-A$MOB6BBlJ_Na40qN+O= z=c<{C2wq(TnjhN(!Dwx9$FfSnPtr(JyEW+l0DWL-G`JFwBv|a`(kd5l{|O54j4Uk3 zGZGS#m!1TEp&da?-vI4h=;E+Pk#0L`53{pkRWgVE4FGfA!?- z4L=!28wVz==;TT(Xn%M2Awal*RNtE7=jR92%Qz}pk$d+@ft=iuBIaq~L<1g~DL^!v zlM4yv(nAE4iXc*s{Q9nGMoFa7{;kIE-P^ZzplN7xIWLcsM%0ZENGxW9U$8Uf5?&KA zKTdlTa$|M6ivJ_G&Rc+j<&y3XMC7wFIpB|yZP$v55fZVtFE{fo&2R$m)0+|u1CV_f zVF#*1t0TE_hks*kzl0!A!)X4y+lcnYXpnk(FC^)M79E?cG z%I2^968Jzip&mFSk8B>^o@8om<6_QOyYCDE-1Mp}%O+94oEShqVRc=r; zPPT$HEp?A9v$^w14^ZV%o=EOzfw|a7YpnfE@rK zg=YWNZnUOS39eVg56F9t?!Ij;#Apj{k)>AJ*9D z)c;=WRXa+sU0vY})n9 z5mIVa*MP!`ES2_mV;bZ;Rk}R`5{ya?7ycXTqe34!GxMSExZ@_*sO<-?eTdR4&fjsNq;35g0CWjxD6P{WQ)%9EVy!{B zebh->jX3%`2I&0tOrrj(vF-R>0&vq+cXk@a&BG_d1`n>IcW&#iFfs zrzBfR#YCywqLzsww>|@a6NX*z76yt;@08h9-glR;v7T@5)Bt$(IyKUX)gGBPtv{Er z9$~Oww<}ns$Jgo%>Ck@`19%OY{vJcpvG>CAMuMCv;U%<)=%0ESe5K_MHKbNsfZEKr zE@vQ~=#XBg{4Y}9B(T4@Z1Zrm0%eW#ng2Gy@A*y>-?X&S=8pVq*88~QVDUikk&f|T zLlkUsL<4vF81Y2vU58}tft^;sdc{>wN#HouD@4sd+#7mHA*tnzQ2Tn7{Ni{yh>jv= zynnu0Cu{G8c4i|9{r2O@%m&Lkt}VY3KOK7DdzE;2L{#`hvtR?L84I1LR{N8o;8pfzaWSGd;kk>5w>!5`K?)2=Qt zzCL3()5I3{tu{)Y6>5j7#Z_VHjOnzyF{y(qZ|H1$DCe(eldZU0R@1@Q+wS5&>U3MO zQxO3xw(;K6Gp~lz`4;tW%yt~(%9f|JMZXi;{ZUZTQLU-H=%V$Eq?AC1_v-HWR=sT> z%I^n$-0xsA#hKB+j&fMA7}a0)9Pndg@B!b#u9^OZLn?8s-tb z!QN$YP~Raw4o58vn~%!+KT=nA6we-q7Ob#U$-hM4#dtCbbKfkS9?OxRVg>!gLwhrd zs_9a0X*53KDeRmt%?htFD^_V>m<)7r3=6eQ*_N?~tTV;o#8E(r2^!i(6qY~7$=@0tjp6Xm|9d;TqJ-kfG&+)Xc+xxC z8{}g!E8WXOLNcrSZ`k544D6Ckz0~7O7mlsuWLl-shMo5aH`c>XAFlBS%B>FkT=My0 zg5yplu^3C}#2P@G}Kzy)dW%Zfv#}5AJ#5VOl^NxCG*RwR6`Fwnw*hcghdfp3d zRho4?->)S*zM?;FbPxsYS>_*=#bCHkUzyFzhTJ>bM^t@bu21Wd(UW z5fpdfg0XFTiK)Od_s;k4Fo|A-9!Fz*-(~BYX%uQv;zfZVi5W;;TeaH=QsGDZ0y+(Z=gesm~Y zUH#PZmGZ1|&Bfq# zooK*+u!!A#^Vz<~t2?7x4Wav7Svi%l2VEQ>298;}7h429n-8nhc~R$8tGfMeBK8E;ATG&OyiS)HAc6rM-s^;CV(l>IXrVS^nRvb=IRTQWw2F zz@bhz|LAYuij$Vw%g0rxIojS>{^HoeitWpxMM3gC?bzwpN}|pRoZ_@I4{P#M8ial| z-+A>ws7mMQu=AhaT%F70si~M{R1yjxq0aDpclv%Rxtk|idzAiF_O`k_VoUaAB(p%4X2oLao%cp59bBy;Q}Was zYT5+_#G_6$8K!>{-g|HjQx_y<8`?TN*2el28Y(wPPjJ+LN?v4~`@rq~Bnu&AM!XfN zAmb_t63#qLy|{{##?@K$hNZRcIC|sC|)}KCtY=wiL%vDokpm?MulbaUcsv(G4)qo^N9py(b{9I z572n_Eg6MnpPa>exS31J3fK~cswZ>Ld-#Z((Qx4VwM}!V86H*}jXTviRO}6_lqOdZ z9ra$3o|W7okG9ZInE6Y2P_@U zP>kKp91ntL{xl{Y_?M9kKa5JEgL7!Y(xSyX^3*ob$})7s2k(Q_U%wQ_@88fG!2zv6(%nx?HmnJ0BoXMb^cdlBHNbVY1qy*o_` zA$R%&9QZ0uHA>rgNartI@Db3m3R>fM5wt`agYY}lgDS5A=I{==xL-E5524aM)@>hB zduw;Dgn$FqTHVU-+9WgNC!Wt-Mk*;iP8*CrF4*w*sP=QCpL=7WEy|k~|IvUP@1!M) zq;)t{mdUeSsR)1k`JxxiFuy}zZ*!Y*1WghO#fq_H3s`l*Zm{=#eR#y6XS-i1_HZez z*L?plsZMos_sVsi>E<(1We$I}dJ!v!27QUevP1{wnKmtX-G@^N)WyFDT7D$Gb0RCh ze7aKVriLR6=5rjPGRyDtRSjQhd{~yVIBoZjODQMgq{1Y|~obVB4NgsUbr-CkN zi1ie zSk;Ti`>*g4KR?noao(tTRWrX6MD50Km&*S!S@^8eIUkf}5L$S+@6@(5%_$urKW3ks z&5+c;E`E3zvv6UsX+M^jKvr?mmuGprC@0Q(V8Jb1mdriFOTm7GLU+u=JI~cvNM~m+ zVP_X~+U1Yy&qF`z37w5dKt+oZjX`yiqk;#^H8O~&pZ)Y!AX+wvYrCZK!dRsD;?FIh zK7uIBpio+*J!}f)Wt{mqVXV+{NZG#at5@=nr!|6&dOx?6{ zQ~>4sUuE0exrES;&dP3zM#(C2pUVcDctQ5`(`2sFooJVFk$j$NfygMUhV5Q5K ztqe1#44!E|Me*9tNqGJsH((a^3cc2QWTpiooI|a&L@xWx=%P@$=G9I&b~q*p)2a`m zA6Ejt$*&lnCBCd(zko7k?74wb4p(16QR9W8q95o<^%MdsMW)joKb2jVzufbaEFU@< zj{YL0S{OFV^W0I(bb0^_i`$q3kaAyTI%PO|u$=L9j?#p0zs}y*@{9GUjOXP-rJZlm zhzLJ6gr`-lb#`{xP#Ax3AJ3Ga+gNg2h=Yef8w**6K8gQQfx>*2{C)*)<@)W$7TiiZ zPsBPV8KT=mZI(NSE6NkCGUaQ%*QP+!E~lM6y`sk>Z~8Wlx5oVE`+!{p`ka%nq8M0J zE(qH{<+v?~_b#=TT0(5Dtjj)F=TnfS@Ag23c*bk*Tdz)d$u2>%MF457C7av+*z{a% za`SmJDqeL8V!r=`8e_@{{lsITPy;n*y4_C!vg<2}EeQ(Ahcao2<({U(;VVHaTOF<=KJfyV!>bfaV3FMVHsj-JWfqIDvojPJ3@#f}rSujRvy(MB#bYAd z#mzszNO>yR?JsGTbn0+1y2d?!LjkqRVZQI{+9mL<>C%#u(C>S z1p(P@HI?v_fM-L)Sn&cm9}FgaoRCBiRTuyKibZQ0bAZ+?qt4gCvA zh#L39M}j}#QUdX5OM}xGit=6PL6FA-L_gRAYw95VxvF(&I4c+T?}6ZhJqqab7V_7+ z#CIavUzYo)nk0Grs^7Qe(nc5<3bba*o5HU(n83@tIR{lJIl=~=S8*$d{H#LqX%z~= z+(_l*lsxS?)pKT)(nah|DLOQPb(4lOSJZ!5LJv{o1`!fwRy2ZY{?mKw)@Av9C|&%rh;ijE?WRP(^%7+W3Cz zRlMT2z(WE16w|zyGKZhF5c@~Fd4>uOYX6?5L|F)NIJaSV>3zS%p4QVKVxTX)OO4PTwAmAIFeP-LL5QcUf`j$8>!x-|bl7!K2>ZervW>mNyBPBM zz4Zo)ixsHer%KEaNnw@9-;vo0`8OPBm;E3qa38x>nD;#Twhb-R2P)n`*<=FM7tpoS z^68%C#tf+S)bPin!Ict+>1y0@(8&!Nec|ha;0j|{&2~LCxFd-K7bNNuA0v&0PH`W` z#oWO=G`M7TumM9-gFf+K{E@5_^ZqnMb91xZ=Hxw47jUrjK$i@_Ny`ce(|L!K?*(G% z|D@NzU|-$yXe;R1S1CC;IcuRr0PO!nA~-o>LCN$&cIo_o+;Y(Ab8>P5n*LaT_^L_X z12j@Ij@bRb2k#v^O?n|#2Os$v^rZhu(*dNYsu~RX*C-0>4ynLxH>Kr^|9i6mM&#?z zj^b4{sE||^HH{pUbmaTKfk=Lvho>FfWa|g*UY&>FdVvw>s)L46li}MJvvr3)Z!R49Fsfu>cohnDSS2m{G0-CtXBv$%T3rE@|k;Vk4q>|oniAo}C3q>&G zQtsZ2$^f`|h7)_KAIAZ%ae{vn%OgWO29(Jm4bbx9^Zz2J1L&+WsE0KAasGcu?srvaciwj4kq5Ift^69+X_S_hhOh7*I)IA~ zH|Rv++eQNBgJ>@^<=_h<{ppV|;mfF?7gyTFMUYiKN&WNLbI^eRvLv{KNJvbqG!*g> zxuht7F3v+hdGm3Ow+YT(NH1Zuk;58P32rRyuZ_rhx`Unu$nVAH$M6knxLg14-ril% z=Br)pgbkX3n?Ub35jC}CAu9=yu;_DgTfb>sAzPL4s(VOfYT#EuS|6HEiaiAqc_8RZ znArCj0(FFxlp9|MchLTTW)S(8`?x#1#t@6c3vST8BKS2k^B1^x0N+LeoqS3Kq;ZQM z=DtViDXU)eB9$!#u8I%iM4ZF@LUS6SCpQ)+3J({0=pzl9N>)_?T^z!3yZl7Mn&{hgRyRS{kfH5vjs_Y&Ql5McUFbScz}qm3CA zs-0kJksKlyoOyt1uP|g+Qyvm(75Sfbg#x)%W#~=Ey=yHo@bKmzK2}L4wSPgH1BhqZ z*Hr4M*+>6fxXBC%zncCZBttu5?Ln8>I;_!y28ZZ7L&}f|@VhF|X#wU0tMX%@i4X+( z>UD@X+VWonOBW+eN~V@VmIYrw{8zP2?48~DjJciOwhy2#96h7#g5XCGS8>2f1##%Q zrsw>%QY5VT3xz({fRZ;TsF5B-pnI1KGz;@tmf5pgj26qF`*jQ?=y z`|)o1bpb*xQ^~v+tr?VwbHO??9(-29*1pYI;>w>*1^1D_k5GQa>QzS2=_q6{_Qwmf zUFE1{>R@(QKhZ;EL!M z=zzF>0Z9zEpDCTVR6EmF+X=FWH-nUcT9F?B0i$5pDCj{z1I6ZxeJ1%idz*dx`QTzr1E`N7x(;P9FFxg zaYnVu1iIBqFyV{*h4#cJAw`voV}$Qmmp0%A7@*?6U$iUQrkX)XyEPYZKFIQ_{bdso z?9&bu5r-;9*5tezS*22M5-T`1X7ErH&}EixfNF|df4J{QFMN-kmnN|{90eKeM|s#e zUV_d}n@F8Tu$y`+%0^OHF)?H3vQq=~mU%ju{R6Be!(>_wx}WA<`6?RAF$Etzt$MvK zzSdt$2Jas@0y)Ki^geFh55ND(aR@Zm-nQ&R`(^fUbYl2`Qk`3y=OK8@&Eq7cy3Ppm z02H9c-5=0H^PNKc(cOm77pgNLHA9UuQ@z8h_ahXAKiu?Z`5^hzHt~*Y;vdyct6dNC zh%C?PYVOwjiCmv68)5gaHrwM5pLZY2p4DLf$IYzTMjrG##RV2GS@y>^@Ns?XO|I4s zMF7E5A%rJkT888>h66~c727OoH#YA5aE3ixV%C`d-o-DM(rp9FIDh+PwCqv_M19d3 zq5N-Bri1kh)}w)+rK)SHP`+)7iuw{?_(FV#Gq8J4r6zj+q=~-viR@6)it*xrcAkVo zXWm5Zr_bM%KhA{I58~VspY_3DbL5i1eRpdn3Fj+Lw@XsfuB@}fi?4zvI$UisK)RFr z{qNX1B0g*m^CBKJtA)Q|^dk#*bqs8Op_S8Z3WRmJV+VTBRn(X@xk#A{Y3_+@mTaT^ zE|7?*T8~`ZI6vd6&cEe;v8S`byOOmC>F-|L|Bf)9g3FSHX>#9ZVz2lgEo}i$E{Pwi zvwSy{J(0)~VnffPL&Ie)XNlFbRs6A;e``n|gvF((bn#>@+ml`&YRgz#S>pW9J+SLV_ z^$ulC$lOXmGjy*pY`G|FNUc{g9KI5@OV!ij5V%3pf`yO&gE>;v za69vU-YzJ7jAZbMR&7Lb966RbJKZ8^f4%(>v!l4|vC8wy0ert@)4*FsI*rfn%s#1Z z+J9!CLF!5M{Y#fhx>K=G6&~BSn22!OqH6iLPo#>^U7iZ+`m$!eeRr_%f_^tUm=lYPUI+d)c{^q1d%^pgz zu*tPMz)4eTKv`|JHDwZ70abhOvrs5R-FQJOJKS_19c@t5_v^&2IKY)?O8Clplu%k<=W;m|;?L z)~UUhfR+K$*&XhR(?jQnS5=GO=0mmPUSc+a)?9GA7`_aA*W&_Ef_LD$havdcGG;T? z_XYpap%FS}R?J`aD zP0#IaLOToPL+>}=2c?CJrT`3~;%i5~B{Xs)IO__=BC9#-nLV@Cbx^M0#N(a&p z`$zJdH`D6tXU|S@atoS!gyzEFA8iBf7i*PU8PGX`du3)eHiwtAfB>0oMn2AyPbRye z-gMbTA(`t88Ienu7qo2_10hNu5yR}N^Jzx!GAZB>%E!>9$_EE^%(x5>Tex7P_D>yB zUo8+JMT@*JIWSBYdu1_0hV9xMmeCDaqtxm6myeLLJBiK+%N{htA3bZ4xa##)(dhm3 zxaz|aWi_?W)zwsVk?n0m4b&j2` z%xT;<){4p=$?}Tr8ANvPw|{tMuUr3px)-xX;a7fg`|fD(V&&doQ@U9O`N!BNyoC2f zk@iY`0=s4#*tEw_XvV!$9?dioypP9mDW;kEuSzrp_}2?X!v2QmCt-*@2Sc-I+A% zTNc9b163|R2LtmS6tg@%gK`+-Q?*Ctvy=7QbYy9`SW?&E; zlx~m~l$Ms35JZWM(%njT!_c9C0um}AsGxK=46U@1(%s$7xo7X^`JZ#0^Sn7Pxn@{v z&AsmU<@Y02>oza+3-Oe&Soyggq^GA>91!53=5J@m1?u|)LHQP_+A{{uXcg!xK>i%Wt=FVH9cO-S=|fUPPGaCMe&096B5~+GKks&wjWtU_qca1 zTv=$ILMs}*5&1RB%~BuTLpNEq79f-{_4ijj0n#eG5W+sluXujrE#_yq!&E`*dnQQZ za~aeO*7isoPXvPku%<<4Ot)jzMoHI&q=TEAU}tA1um6Q-V=URN9o-*;-b!0s1Jv=% z#jPQ~pHgrxz*|>-r1@X#iHJ>ln&vw_YjSk1DMyOM3YjgxE46;LO+*kBm?La})BX|r zPwhtZW=H>@)pmNV{b~Be=Z?eQvam!-P@-*%JVw_y^SLW4C*IQXYnEFW4I<<&U=pVP zAPp!SvHc(81>xeWb3rZgU)mrXnkgA+1EIE2c+Leao>}w@ilUA$NJ$vDhhY*O^&9F*OgkTu5RxNd3*fSwe}m;&4kzp1kOn&`*QQ zu+R+A{MX)n!*6n4S@Ts~?MB&KW$U?Ar|EqxLE$mEe=hW(-QNhG_O{YcW5#9j-yKt# zXtWi3SbK^?X+%+Et0q(g{b*~AVYaI7{na%rYvnj%RGC>&oa#ewsG9k_Pt9~7P$BQ~ z+Z`>*Lq2Zr4L@5kUj>&&HEl>jc1$tJ2XaS#kTbM+7g?T(l%RK0bHoO zhp0^Uxp$QfO{MhyT@zXb0dMrt3J=`^WRY8E>*T)U{hbqGT2R5%i&&3_hbC32n$9=+ zeKx-mG`waT*|N}u4xb#$$z~GE5DrJIvAlI8dg%t`@3oHkL8xt2%7}NNDN%0p0X{bi zK@0ktrzE$ryyds;Blj2N*}nR94QCn}hdpnqR<9+opii&dW_x}pb#LZrL(}7K&3Z}C z=CTWI_pbu>ul>S;U;7ravhtn4clLv=c4B$FE%T~v_6Q67+-XH$H#!t(aK>+is;A2B zr~FvfN0W1+oL^@CgeTIikB4b;PP*bkydGn`J@dGCIZ+DT{!Yl4)Xo0MFs^K2WVM#DK|8$>L(@rYa+*{X}5QQ9&hIJor3onrO# zotDMmnnVkE$%>F4VY9RCLX(G@rqEYxJn}qcWGJ%gdwth|q;A2Z)mc5k_EYf2?dpG)fv)riQdmziG5MNFM%CRINrl-rQu zLxn7%(KnxaAeo5i@SSo)BnHd8r{*?OI4%rV1d&~tiN-4M;k>#&f z2b|un^FL;da=RXL$DdBKZQGa1liR9xU)7Nd$-SP+eULUt5ZcN`xoc*Uh6a>Vl}%9 z$M=(u*Q^4@!`_8bRp}429gH3CPHCL%Ig;FI{0{%>Hvl_mUBBErEClnIN4Z5~{b=s>7{8}u z+sM>n9Up4nXgsoyAgzi2VM-q7LZ2qF^lM?-Rz0!UtfdeQ7kVy^sfw0oqdq0_PWg0` zcWetJve>QjJp#TaMcMS?-ImtG{oq7kH1_;;Pq~E7;NxD`nZo5bYKXl=dSPoaz+PgG zOgi1yN7sc0@_aQZ3I07;gR8#$Xv~5yIFsFU^_#4A@g?QaV5N1%_I_}*ww?=dpKlet zlzxeh3ynG{*|DptAF_h>RrAG&yLZy$`I@_}#Kgj;9zp$Xd7smpQRf`IX`_qkZzouz zRAc{qD^5i4GKbn?V-w}Rh03J!SOcwVP*h7*_QF0Otjt#eb$0kxGBlG1u}d?nW<5YA z6J!+?%Z1bBb4QCP$IA~ttm*F9|HXxx3^G1>3ROhd*lzx*3)tIZ@`kHLvH2{WQ+Vgs zPd!Avxr`CI7k^)y+j6mPU~E|nSQi3u2Grx;@s^GMO7KsGLxm5c_ zf7^Z%ZE9&6BHp%QRp??RVYQF_`F;UCw)9jl>8QNZY{hjel;w=po#xEy>(R!2 z9f+#$I`97L6gST-QD2`5bwjNn1^OhUgjc-w-$6WDrm02pnc=GJJ-?n_!)7M7XgVh2 zgN-3m9un^K<+RQHr7~e@SvQVJqZ~tu9O?mqLgRcq&nYU}S*zM+ttD zWrPp~)!v7#$?8iyvEbBoM?L!n#~|V;>t~9vu>U zp16tLB2lT^A98wr;m(D_qaC`QcA0Q5(~}eR_@npO;b3(uzL$(ct`MO!X z$v&x_t$ubhFy9Wg(M&TQn>kl$Q>>mS{mbq+rF zIj>%%Z%O5Qa+{GC9R5vA+H~8vM;fhcbnpIAAYekg*K>siG|4@>f#4@d~0IuTi`F?LTd! zb}Pg=H&UOWu7ICx%4MA(<9j^~Pp(`bxjn%+;sRqVZRL*F_sy@H^^Ee6kU3Y<#nPO9 zFF)@eoQk%RoY*c6sJxG}Gwb4qSRs<^%a@udjr|nEP0)hiyf*5CkDg!eE~#vL9Uj?~ zzCCHqY+G#C`J2ZD_IURs8UoJgNb)2-zEpZBefznFkMNbxkbTR(f8BP%lCu)@gy$)- zE`KpLoSCAi#a<&q@&h7-!IXv=MIN!hX`GdAo9|T7#pQTCstj-bnTkzGVURdaBxmt7 zBi*Is!%HkxobS~9169>j+{RFU%~LC@LdAC-JFHIH(9qm)mgDAH2lRwoaJY1n96L;n z-<=+7qv{`ZQY>Puj)Bm{W6j*Y+;xb$Dc-mF5iv@{x2r_?kYGq5KpttzE1gajh0iLqURYxG#5{HG@05UO_zeF>NCoD?1e_?6km zqTfspyh_*5)cmS(7xHR1D|aM+o~he+YjkZ*l&9;T1)?%D()=paqN`U33K#89^($;# zfJz(csc1=eS6<$3(XmnOq^{8YAYT|l0_6n2f5H|2Td2Vn7QeH)xES8?FBW#$zy>L- zxrMuQzeIa8Y#v3x?%nQ**UKz}_EO_L`f>~XidLP(k3 zp-)S`wU5HBH6pyY0x|ZDG@Q4+Xs^|gIs%AAY8v&OYrVu=%V$7r2c^%pU-XGkcM zSVHNq@3f{ZKE8o=vww6aI7|Qe^;&9wFlxX9_PXY{?C=vZ*#hRuuaADk!HgX4QnIRK zV^ES7*U8=U$+b{)@sTqRvpzzGzeF1M9f9feJmw|B%M?14B*7-qdkekc)3z6q(4T}? z>M7l4HYLScaYlaQ^bA_jQwwJ6?LVn%AFtv9!>X@W`PiJCclcq9IG*$!EJ{LTh9Q22 zvJ~blDlbJ*Eo-z8qYAdr+K?BgrUJQiECW+lA2y3&9k;OxGNXk9@+*vgEVri(Eyn(g z5)NdDSP8^7#@=5M6FixX>ybR{s*!-(8(dT~VQDpYD*Gfoo^d~(INa0Bn;th_fj6ua z69_2((%=+Gmn4pjSnB6JI(KZ^scnPML(W8!zneofZvgMt~~?d63_W4wk8X0TLLc3#7a|d zsFu(Exc-Z3i5_EpQ6rc}1lf*y+*D2D5X;iT{jh7ZAD>h?w=3vVuJr-+xJXe4t(x~Y zSEnIckMQj~T54F;=?oQS(;W77iL>!pW#y8Yw}`gtH5_(zCtf>$j9CeGijXeDa|`1{ z+q|~+%k14>R|8Rt^jj~{RySzK1W#D<>dHNQuzO@e+dd^#Xjk<5xlDYoo?yn6ly_6D ztZ^)iF*ex^AF6Lo?kHi2ECMdA6VUP@a<3N@oDBNO>6$kr`O@B?oZU1s_v^>ov-5u0 zMVpTCOs@|E7sbfNMa3qqc?n@YsueS(!OX=`oau>u-S31k-c6OWZt&~F=7{VUmI$gmX8V*lH4 zaNVeqZ@=IwQ)&OQk1B=+KUAZcBW_j_$67y#VtepfujAPoLC{aBY=?S*I|>V12Z>H0lnuw3*+B4sTItSx-`#$)(K2Id=YjI&%^qddW|ZybvbTv+ z#a!v6y@t3E$1DrBj2rkYNRsOpNcHj_)`j(8y#112XoP7af9fF5UEx6y+n)b`=RfiuPG1& zoS85z`3^IQJ@cZwEY*0In1H(ICN=-Y@}SL4Q4vRn-5<~!Xuz@^MG$CX58EZ$`ZWu_6PT$$7xepsGUrfV1mmRkt6F=@t~ z!NWXW%{g~qCK1L2{%k1!gL7@mGj^a2ofc%P5(Z!EWa@T(7$5s?_w|qW)|$oDU@`xF z*14q;=qPW*n_$*UbhjhOvXaJXup}?nImnA3SUyOFPPGDhe&o@4#u{-F^zU8X9^~;V zNX_XcQpM(wGAWe$Vq-~gq)PGW1hG5oL83M$dd&%t01;Ns782-j2aknGgk~IWe=XBp zf456{tj+&zXQpTz{}}wRGc%Ylzr1md@w3&$)E}g%ghOS*UHOGeT_VX7TxsNh-6~@a zr&!>Jupa!iYSiM1**ntKG>5r&m;bvK;Ln}eFfy(0tSWjBCqHCrtykOhWSG7ekVR%> zhnq69gVU=QBFu*sK z;eH%ZGZEHGB5PnyzKFOc4PH3|@iT2G7_aNc*yne@C#M#Flh*5!OS>8E^ubpek=lO( zG!4*KU|U=&xK)ARea>p$jV5+axfdBVjl2VU_TMRRQ>}0MK85Ih;%*>lT=ZD2pe)ic&D9bZ4b_*>%|y@n9b6q%Z=M< zuQY7uW1*))jz)LvZ99&bpLHL0RSvG|X3M_AHVx_GnqlK{7@Ay?bID#5`oa?O7+MQe zDhhE@c#_HF{uUDyFbc8!(?yAiG7k+6?eIPj!-`HrCbM!vGxSEZP0?nr`3eL^22Y!3 z;ZqvZb3M(^o)lq~c(=Va=TGbAUgD zGK1;YLjU~!@XmZ7a6n{rqnfXJRY_!EIF#U6fW3l`|GI?w&hh$>!oU1qFZPU#vW#$Q z6YyiDks*j87U5fNR^FEUr(2^i_UjA!$Uhgb)WBY2>#Yn{2*WT;H8f0jM^FKav)76Dycwmp_`3biURHFzI(zwy@VYDA4Z3pvR?U>)as_QCVcs#!|_sL1W zgYr+`L^pO&9{!s&+_z)HcS}FHb&mOb%U3S0XIP`?jk&upCtIWP@)bXx)Iqw@urS`` z_n9(8`)NrE*FxV_qviY0XBEZWp!S8Vg*2TNwtqCCy!+?VuJ+ZyYp1TNz?1oB6f^Wv z)yX={E}k;<7wc9@yluIYICUP==6QZI55#@OL%WY4iQ}Cc6z^B;TTFl7%G$jkXg@Q5 zP}mSq@^v1Drc1hgPsN07U;#8fp&tzMk`v4sV&&r}no)(?1-K>UvN1aW~6D+p-BoelQ$H@Bj-TwIOtx*R$3%dmw zoCgFkn9^hR@rQV=w*LhG-HbMcqH$ybmI;Rj(&Lif_r-D+eq3fwL`+vFWRVAtuWj4j zziORUQtW%2`(5|!N5`X~+VYFVW;N&s_Um`jmKG~d;IXWzZda>t`ZgBU-JBTag;UAZ zNsjWJM5Jyg;w|PCl3`vUe`rQIub7l=c6w;Wr69{{j~9nd@+th(?XlhS)3NzHI;#{%qdgU$OfvCYoR z`wTh^L*!zTNCuii3sFiX5AlS7ATh+Nu++xTTljnYkuqK$wjaK`_2bXELF2KVEV71v|};qI$m_Kf9{ z;Wx02!AKaXPktzsx;;#wdDNRHj4;TU4W1fBz~$r4*x2 z&E(B=pi}O5jgWBe0kRr|?f;#d$tg%}WHmB5o4o`KKV#uwtcWL6KM?t{P%H_N7XjO_ zD;#@vzYpH~G;*_M*^;Z}6?_&x9Gr@<9hqvMaVFk$*BBO^XCb@>X9{UhVsD}UZIJW& zWB9BHK{On~y#+lheY?ZBmK8IaBsX0*l zfY%m>O)D|Leu6u6Ao4)Qrxp6(1}o)630=;o{Fc9puwL~slZVzmH*D`wC?+D_mVt>U ziJ5rQf9_gxq3X{+rjoS)^O`VTT0Ce*V?;F#k=G!po`~o!f!c0r#z`+=`4yFZ@mhmA zuZ4jDBmCn{7WyarCm5qh<%5o>yLg7>G$|a&Q8_Bg>QDo2@5M*&@&dt|Z>>;bQCK;O zu-*4$Z-lek? zb$o^{Z>y!y_Vfq5aSKD`ElL&v8?7>^Ip4}bCEMb3{OK?AFDnu$jwYRzKj10G zZyg!69ZeHaf0*yNJD}U+r;W7~%l6*1r@)(5xGGVMjiBA59UmRPe&n6+XCa@GBRVTE zGnk*7n!GVb>Y{%@Ab%(@9jq6q9n@2t^G8k3c}p;iH2t z#rBXt`kYq_8B0RQM*f?P{`)jNZn1_&F$XSuKQtaMm`!BRYVbUa4?k@2Iew-vE%{62 z&tMEoI9=A66&&(+Rl27@YRMq@JFxD++&r7r>o{JICZ2fX-y$wB@oLzk5mY2El2{FUkpFI-!rZ zz=L_&EJT~)Uxe!1`}u6y2M5b#z_{{QT-IyXqS}o|v?=g+n@-VpQ)p9&req+_(a@iM zyJQet`^G^3i%xslzciu4f%FhD-ND_Y37RoBhuC61q2 z0!tALgm{5oz!IZ{$ya>*F0SRG-Rg}mTni&v!k!-j-Td9YmD84g?4~bsSAEJWX_--A z6f~p-e}^0QCE1|4s#`IL{#cmw^DE%sY2s0do(+_TK6bo^V2G3qieUTq;^b)K3xB;x z4zoq#(alfXV&DB!Z0Dsdc1mz`>+f~LMHOG^XnH;gFCwPQYrd~d8=;HZGaD5pMlN*q z=%oem=z=h*er1o`H~X9eQw-a{y$?MrQOM<$ZtaN;Dpk6nD zU45HHhVUK2Z$1nA4$}C`)1}^5ullrGX5mM@#{IRNYUI+xA|TfiZ;q8EWg!WLA(LfY zS=n%oay!>JlwgD~XU3-cyG$J$uIA1Xdl z@H2E6Nu%!cF+cID_{}fypgWAbN&QL=_~!L2Qv`2d;eMj`TKLSaw&b^adb|LsnW5L6G7ScOT6xOhxYaD^N%*K;E!>r(}#i7m;(MoG&#h4o9B1aDYNck_A zredzzfO$+b85Sku&^e+#Vy$Q#9go9g$0()p1f`#G1s*dMJF{3Uc3kitdYCG)|E3(h zDW3Fjy_SlM<(59o2}aWnQPa0fm)x7ZIn7GJE7Qt@c~w;6U*Din$%0<5wsBDa;Ng5CePDdhe%agO)x>?z_Ue=JoF3 zg)q$BJZpc9C+-II^hS3Ezki6zqm%}PF?%i6ZBshGZ{{`WkHFbE-kX@i1pdH)a9j)} z8Y1Y;J12~Qqxb$W>Fc2Hu0vy_*XPrQS$BHp;Y(4lK~as-HE8VgTnIRx2;g`!+JaQF zr|S}j5Wh$61U$#WL`n65RJ=)%85GT_tyzkaCC8$4q$tX}%fNxxN`N#r)<>VMdyTce zz#WsLL#r`L@nAC2zB1D%>@t1w_ht)y9;g-RqoHh@b?}NtIk{%vTYeV6x5-$HhLzEn zKcKm_pgYpUzZ1rZ!wWrXwWX83(P7It8?rpSuLB+Ses+e1H!&&lVVu5xVaX>m#gM|2 z3sMd|Inq-cp_e4ptm<74-ly44fw^$^Ez5gokY*#=6*bI%GaT`D6@I%2@z&}tp#?E+ zz%>LX;jq%b!OP2S%zsY9`VBi+y6rEho1nv77grD87>bvM8ep};$SRMN;c}f4s3cC> z71$hNe{Ii#(2AVa`AaEC5myUiI3bqxFnmRo4h`XIG8ebG+%6dZ=91ZDZ`p{`Cf?;1ftMlVTL! zLGO#c-{FvUspYe(k6$Cxa^i5K5dA>}+QLX}vlP`^WiIrHP(*)aE)Ga+3gh(Sd~A5% z?Wl=2ZGpKJUX0@)45TJGEW(4HWpJ9+xIw?p6bR)aB45F(01iptXR~|-=!Dt|(E#$( zYH5#4pF}uDB$bsF0a|}By5)82G4xU~Ff1HTDGCaTdC&#I_|k_rB0;9Lg?d%a5dnKb zen*}-LP51W5s_E_Nf<{hB!{CcdO`UMP%aE9=ZIv5s5*{1Hff_C@CMc#dh=+!TRFRn z%Txg#VRU#nEGp`zIN&UOSO<)S7z`T>Fc)|$fL80Tcs4Ttfy&9v#mcx0rtNX!jmaPl z5{mUFj&PtEik0!k-dhAw7#k&qH8v0kxUeIQE z1|U2-Kp`rxF&YYO+Xe)is+G*w$sI1$`i|B5Q++Dg{E(JOciI_w!6p+c&f8S%<`k}b zbR8LluFjdEl+p@VAo9|35wJC=pOit2^49IH$|>4@P(Uet`<-=|u_cUSvY}n@Y9Z+1 z9xeLJMQ=aEk`O!>le!&jFe^6~32LcYfEqYymsNLiAP|~^L3f`fqX2|s7Q6-PmoJ-u z3a#VN3oX9bYM|R@ZeihdjdPopa?pmi#iRY;v7vX#u}hdjbX_;;zV~%!34GwZ+jv7G z?qis8_~D`dR?_c*DSG|qnWHMi*aHenSdjBTyg%k#%+A`7JProKqVG zbl|+C#8P`B2m>+ppuUrT!$|WV9%d@!^miF$*<91w>nc;qe|mL#K<+Tf<5Xih7Y!_% z9Er&&qRl%q`|5LRKr^7qj%dc^k=NdS|OpNWRqg?)Z_h|;b zGRMgxx-sg!f^V5XX6&s4J4&@}LbUyz3MvDJZf1b{{q|d(O-%!KQ7+<6E{KAB_D4F` zPhA0rXnG>j>j84A?;Czwtx>AX}Fzeh*L5I;Yk-}u{r!TF|8k(X!`lND?2~>^YkYSaoC%lgW_I-zN$H!ez$tAf= z|28$s%t}mP{2D~d-lnGn9XCh@9&r;W5d7Rb!V+Y@$-lElN1~SeJ}5JfG;iAHrq-wQ z!s`4Fj|;e3s{Z-vSZ!)9d3Ahl_BJg6_4%?){zFSaT_fk!Zv5-r)xd$Mq`32Jf|i`C zlw^@I9v3NzB?D1f3WD9s-s$H$Z^pUrH%*<9ch_Vn7TeGBWSfrgu@z9iK)nuM@NR1) z=kG!~8OR3!2P3#SkF-v}N*rhL>*t1QlD{vNR#uMC_Q8GoRj-~iUitd4c1As*dVCun zW8%lrN4FHg*Wn*P9G%4ykrCouL@uvhyiv1W(Bd{?(BYN`L?T((&aEQx1TeFaqqKiQ zTc>+M2uuOt7vvz@$c{bpW&aEl*$O`Yh6P!wa+Xh|fuEmFh3Mt!Y;E25#Elm5lrMggqEN-}bAZjR@1nx~hNI_@=#g}xyhiUW3UcOqO)+0Wrj|uA zd9j##b!xBj>$H5I(xt?jAp8UnGjulXcn%U%ohE}V3^0-9;RHnnK zdU8sTXgtuyR&^ZRP36N9I8rlp4S+GQN&K?ejaO80JmKUI9_EL6pKXlB(aK-PvG*#; zI;q4SdUfLZIoVc@^SYGnQNee-h}6tV|2ht_@ADXGCfhf4suIc_kd$)IfL77wSx3)& zKjQz&mxCB^gBT(_nJf{;K%($I2S>5}ReBYmM}$s|7)4AOndZL>P52g74x6 z;oeZ+gxlNMwSjKa`g9u`8&IQv8=SCyZhgBh=Ib=?58QFA}{`8YG46Ii%pJ;>EF*Hd`*RLc$3xe zLA?()D6+xFlSV!VxBvDH2NbU3d1(CmErzGE@VA%&z-8)XfGJ|WiA`h$${-+M)cyNM z&;RQWmjABK4{A>_pUbpTsT<%voKTd{Nma2@i{YsnyH&Z1>)x|pD>m+XZtU=K;# zye65X>-6dWJ)Ak(H~%VlX3uk^c&pO8G0kUu)h=lu<)>}s1#Lc;ipaoc>!gFHo-ds$ zY%1c1{5`TMi>~f9Ha0BpVsH8oTKlF&4&Xp5Yb%?jYgYC+r==@5rKj0&nRXPXm9u`a zm{NVD?9+01x3=~Q6tcH0I?v8D1<%^w{&(2OMI2|KFnlWFEz1xDG0NZZXhR27=4TWX z+y)Y)lItC@^xImQiUQl)+tn>z{@^EpK9M zmpz-)6BBO$6iz@;kWox*e9tS3${BVZI5|b~DCPb8ucv0Kjv}1b5eny5S7~Czz3C|_ zR2BP=_e--bvwQ*=eEp<8ocmSvsh;bN8&P-66Lo;DpFPDY8lwCveeMfDQ|bspAVHU1 zUUd&(-d;R>{NJOw`KK*Ycke%LzWP1kJ|55CfR*dckEX3 zKQm#M=JJ0{p==N$eDI%DN3~^1D5v<@U$vEaa68iY5uCRh)8GOqP<~Y(We6EV&m@Rq z)f(BD@Q6|W&L~1B96U5`W?302K>C!K{!QjHla3q<44rCbMy?%y4nFf=kiJaA6=s3( z{#`(G_{1nV{ZjA4SNh(o7;SY006+#Ds!-5O?>Ryb)MCNW1h`IUQ1^~edIc7r@6lf? z6I(%<6M{p(pa=&rmz|#J>Cs^9YoO8!kgAfXN&60u#&ZMQpp4Nizy)o;INisra(sG8 z+R7tUG&nc!Yl!+-(A^u$+?%0H0!zlA;$B z6lCY&Ip$6W{Lq5xC29!m5OA*dhFb6@27?Nwi|p8xcF38ZXGR0PVa_zkY#1ksNDkm= z9Oqi`wynUl8XFtqtzbVA1=G>Kc2fwS>H#*!3jlpk5SWZA#ghmM{h7-~AVClH_xBUl zgthYyZnt0^UhX{lZ>*Rj0D*7zy_Vs>+*4r0nSgD^QG|BZUh}C7(ALi^E+z^%?J_Fz z+PAcws-uGXhlak6a`W;1K5y)?#DeIxs7!9H9nS`y`Fnzw8A1d6G%6XwIudmr?IH-^ zDq8`l>+SpZq0^UJ9tR4Pa?~_5#XiUO7l2pV4hV%X2w;uyU2^BJz(qpAMKb(V<%n6J zmZ!3^SiNG~;*hkZ<=sRcqXKOy&`I8A<<^aBk_s3l00C^WJJ-g3|NbKb13Cb3ZE_Um z;Lh*|1GOD~W9?XuMTUEZ_xIpn2*5T`0HA9Ju*F&?KUF>QMplk*YFF4&0lnlf(9ER@wW3a;7hJvoBbD?0 zyLw(yqgzs9;#pm&m6g@N$7=bU>2Qj*b1)ndM3JL%XF?=Sk2Ew$gzUybXdLQ@0Anoc zUU5WK;F+Wh;VV!W)OzcHTm`e9@PgE2L5aq{hmJ@v=r{VGIs8A9Uz+KzPMI|k0G|T- z0Y1>8od;6K9YD%}Y#c1|2>|56FoS^(fYYSg$SiORqUgnVE`|>XUZk9j4F_0ah}e$#Nd;1Q3^}@LF;Oph zxn{fJ95}zk;s7KC+Vu&h{Lk$GmYgwXtU}_h@y_*G~#@lCtz$0gn z1K~MpYHB%X4m<;AR3z96r@R9dP|c_z~9K6w5I?f4V$Ic4j@umoSE5d<@M}Y zbn@G`3|oIo**v4!a9?BRPUrpwctTw13Bc{1p$1338Y7Z{h(bVE7`PW0O$Xr9$a;8S ze%I{eH_*ujmSqRvYE{LaV_0`T-!ghGr+Q#eeHcVpLG4yEAgIB8VAv*s#)m$F%88s! zfUsP@e-q@6PC=o%=^b^_vumMl2!dZI$;mwDbC?px>_XtOHz42zFav^wsHkX^9rET) zH6x=wzYRcingf^c1WRrQz|Ju^2ny$jiKFjc0R%2{dC*1hRM-&6LQ&vX$A7s(%jHQF1f;71_)3<8Cx5AJLXR~4@9|6FphZleZ)zsAV z4-S4AZTS2dhCm=DI(Wd^d-Nzs>jH)bE-3*7eB%AmLu`|1<6|FD%$R_s5762xS|35P z3l|F%nJh;t-<>Lkfexa9phpC7={bf0yOaq4On~0@KDQV!XaLJP6trT29yY+GaIBpK zM=i?ZFEGy_2y`Cp?cc|3rv1EjOk7@GeggQ0KQEOD)p4s)k;z|x302r|b|1rP1NdCe z`Hz4d{}wci18Hc?kez@nCkD7$z#_xouK*5205F!fBzl3l_IqqB>g>$h)z#G)w26Y8 zNXY|C%P3gon25?&)%G@BioJ=C5I8eW|#g8%lw2 QDFi9WtH>2RG7kE`0KLR@a{vGU literal 0 HcmV?d00001 diff --git a/hparams.py b/hparams.py old mode 100644 new mode 100755 index ca39cbf..8edf66e --- a/hparams.py +++ b/hparams.py @@ -40,6 +40,105 @@ decoder_stability_loss=0.0, # max 100 decoder_activation_loss=5e-06, # max 0.001 ) + + + + + +# Test setting with multiple attention heads +#python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --no_eval --no_forward_split --asgd_decay=0.99 --max_steps=11500 --save_from_step=200 +params_TEST_attn_head = dict( + batch_size=256, + #train_window=380, + train_window=283, + train_skip_first=0, + rnn_depth=267, + use_attn=True,#!!!!!!!!!!!!!!!! Set True + attention_depth=64, + attention_heads=2,#!!!!!!!!!!!!!!!! Set True + encoder_readout_dropout=0.4768781146510798, + + encoder_rnn_layers=1, + decoder_rnn_layers=1, + + # decoder_state_dropout_type=['outside','outside'], + decoder_input_dropout=[1.0, 1.0, 1.0], + decoder_output_dropout=[0.975, 1.0, 1.0], # min 0.95 + decoder_state_dropout=[0.99, 0.995, 0.995], # min 0.95 + decoder_variational_dropout=[False, False, False], + decoder_candidate_l2=0.0, + decoder_gates_l2=0.0, + + fingerprint_fc_dropout=0.8232342370695286, + gate_dropout=0.9967589439360334,#0.9786, + gate_activation='none', + encoder_dropout=0.030490422531402273, + encoder_stability_loss=0.0, # max 100 + encoder_activation_loss=1e-06, # max 0.001 + decoder_stability_loss=0.0, # max 100 + decoder_activation_loss=5e-06, # max 0.001 +) + + + +# Test setting with multiple GRU/LSTM layers +#python3 trainer.py --name TEST_stacked --hparam_set=TEST_stacked --n_models=3 --no_eval --no_forward_split --asgd_decay=0.99 --max_steps=11500 --save_from_step=200 +params_TEST_stacked = dict( + batch_size=256, + #train_window=380, + train_window=283, + train_skip_first=0, + rnn_depth=267, + use_attn=False, + attention_depth=64, + attention_heads=1, + encoder_readout_dropout=0.4768781146510798, + + encoder_rnn_layers=2, + decoder_rnn_layers=2, + + # decoder_state_dropout_type=['outside','outside'], + decoder_input_dropout=[1.0, 1.0, 1.0], + decoder_output_dropout=[0.975, 1.0, 1.0], # min 0.95 + decoder_state_dropout=[0.99, 0.995, 0.995], # min 0.95 + decoder_variational_dropout=[False, False, False], + decoder_candidate_l2=0.0, + decoder_gates_l2=0.0, + fingerprint_fc_dropout=0.8232342370695286, + gate_dropout=0.9967589439360334,#0.9786, + gate_activation='none', + encoder_dropout=0.030490422531402273, + encoder_stability_loss=0.0, # max 100 + encoder_activation_loss=1e-06, # max 0.001 + decoder_stability_loss=0.0, # max 100 + decoder_activation_loss=5e-06, # max 0.001 +) + + + + + + + + + + + + + + + + + + + + + + + + + + # Default incumbent on last smac3 search params_definc = dict( @@ -185,6 +284,9 @@ 'foundinc':params_foundinc, 'inst81':params_inst81, 'inst83':params_inst83, + + 'TEST_attn_head':params_TEST_attn_head, + 'TEST_stacked':params_TEST_stacked, } @@ -196,3 +298,4 @@ def build_from_set(set_name): return build_hparams(sets[set_name]) + diff --git a/percent_dense.png b/percent_dense.png new file mode 100644 index 0000000000000000000000000000000000000000..2ad21ffeb72843d2afbbc89a5217875410c2cbe5 GIT binary patch literal 8162 zcma)h2{@GB`}c#SEM@Fld>e`EYj%pUm8C^e_6SLqvG0;)WF7misD$j<4N~@N(>ij@QJ#AMRKQKY-I2_;13^3nI*99r z-dXFjex`|bIafD@IEEdVnXi9(MIUnJx`51kWj(D66%MI60_~o@X#>K_<<12G^S{@Q zCG-o5#|-QgMk%rf?Vdbabt)*&9$0HFvMesC!Moj;hzwJf<^o>Oh==Y zUVHnJLFq=2j?<3m+^@$?J4*O`7>o=CE9^P*mVzFJCREHsq6|?>`iM7b!d`GV^P_r3 zSRs9HHcC6p7>7UzenIlX?_5k#grTXQBr9wtUAaro%>4H2_ej*y9S;u~4i1j2*t;KQ z^7Ha6uVyAID8bMclIglHI65D*|Bs^;EFB+U<9LOIX(J;eiOr&L@1N;*mf~#jNlBqE zsLnSiUxLBvJD$|5?oU_?T)D!&x%cD8BSS;OS9y8lK0ZF)zY7ayIO0(#)8$@?#;cnHZ`T~F5V$~Y2$KBvW(13*WX{SghoU(O7Z*X)Q*KG>akM!6LoJ=?Hehy z1c#tb*T@A5Ch+q6?y`MuMiti7u%OZC*iMD)bJf+=#1ubsbMEc!Z3x=i+xx66UH_^Y zLt~Wrs;QK6KKltJ#HBd;a=RhL9NS!(#k1?@Ang>R%_QD-IeLqT+}Jpt${;)bW@l%| ztEkA?x=>aLAFuX$u(dK=B~ap^=3&jL|Fza^XY^pbO6`IM5nFdlmzy(m8mmUdr7C5^ z<+QoUqYLNAu}W>!FHqOhi@{d3Ke~=Ud~umiF&&zG_E|M{9J}$@LqHzMkv&^uUdF!~ z0`oT$$LXY>i#eU9=*D2H%S{8ZalX-rPvO^t7510+Dl02rsIRZ@>gj1qwFrz{Sgpdjr2Vplq} zpc;NsE!Nl$*o$X}@NE7&LY-6dsn$Y1zw;f1bFjlJDUIjleH$q2b1zUqYsA`i3f!s)*%_~{qsIS3GrRDpgLUYr1>7WCABcbK2JPciI zjH6yX{CXUe+Sm3TedJ1y_AA$8vZ+3+a2w?!l+&bqnpbl( z%M*^|NoiC=oGJ@)Xt7*5dE0-7^kTYRvx`CUmSm7{u2js^b;P2lBXd}jDUt~)KhC)n zmPn80sF-1P*>$Jk5Os&kI532CZa0CkR8tzs%|#E-LFGU7^V@V#+9k#~bI-!jmj-b; z?H>7>k78r-YJBiP%X+^_#l~U?8WfJbCwCTB$lg2KyRD*11|2L<=P~Q6Asjl8_`IG! zG;%?OG_swc*Lp{JBD-Eph@0>Bbnc7Yxi1oPm(=e<@9a&-tDn9(>#OIJ-LcgrxucK^ zG`?&uqJWNm7?oBtqL(XXFmIM$e&xa)Ya`5Ck@%3RlJ)nB^RYH&;+JEWV#*@Bc23{J z@$vDYqM}}WvTO~fz&TO&=fm?VX+5=H_!x`=+X&qSMnOD=Q_Ln3xt;3Uc?2ErSfpTTVmea)Cv_ zDgw9UMDQaccK~jF?fIUfq`W93L=zSkhUu`E=J@#WBLofPs>Y#EX9nNwsdo;1Y;$0o zVNujr5kN3q`#5m=>$h)kNy(8d&*bc@Ao=oezxK-`@`waJJj_oRmG=dn3kOG6B_v$x3N7D%Nmv5={{` zdwFo+ZDVWOW-tAyKSM@9P>>vI+U^nZUM;DO&&rBU7P|ifgP+6l--CEE)=#kY{Er(* zb|12T)1u)aCZ4VgU$*?&pn%`DZ+T(p8?-fwK4C>vM^4jT z2ZNY-iiLCWh&Qwa1!@VaSJz8ULx#;Cxp=i)d|slO;&NwZ*A}|n>^CZ{blMDA6k%6D z*s!h7<)(s;Rx2l|`7h>O>p*((!afyf4L;hg`{zrjd6_#GoZ3>@OLoemFK}%A4ptgq z>h>$^`o`hn)Y|=kSV9*)y3H8p1c%ERjz+&i^U`0niXL%A3A9$s%r1iP11(gT6FLU=Icscx0eWQj}E%W@>oX)i;Scwo;Q5gEX zWqsrIE!hGxsF&e032ru>NUs+s&HfZ?5zZNfE(~zEVhTY#^2~mY{g=8|=rko!Z4jjp zkTLFa&5k1BULk(&Jqdur=RwQv^%Whcb(97KV>QGs1?Y%6qjE*{#cf`hGv}N2NgdA# z^6@u{^IWk2|m#`B@v4+jB)B;-KZ~ZG$0Vos3nzC1p^jpJME%< z8NSNW98OFiFGPUWjST`fv{}|`D?}PJ2#he{xkw8w)ELjDy+HXh_ReaU8F9wuVuCsC zbiceuXQ{xq^&I(GF|~~xP`Up(yc0isz8&dx`%@s{f#ghMHp(9aE6Q0TPQle_aVZ;N zkO%}XM4;@iy%5AFBrtqwZ~tT|ZuvQ!ju?Bm`T0CLJ|4;+Mp5zj50zXD&=|h&0R`s{ zEEjmfx1T+t-K3o>5}_x0=7L6%puva%|ZaK`MhfMLa-j9h}Q z85^hBjR-p8w={LR6sbRr$J_z79F~z6!B0&+L}(sw>f9?BeO*&iGoEJM8T);r!n$rL z#iZQw6M0fn(k{Uqa4!;K(skdLmGRiRZ~U1Wp@ob;RoS4r&dZslz|bm#W&zy+US6_- zf`a94%lgK`EHj?d&*c0M#4qcwVVIpfJ+D?)RgpoL6Q9V{V@fT9=%5@vkP1Jo@Ox$J zM`UDhbXrHXhoDd>6>D6>s%SKa4E6BvFo?$L8X94QQNdq9=cZ0-CH}4ERXq>9<$!-=BfX-B<2}hKBxg@gf-wtLT@F zK)fRO=&jULn`9(I206uH&B6ZTksg`PYhX>&y`0u>%Xezlt;IHR09^I#3JC}xb)cQ<)PNxFuq?xAWBJ(pc`0DS;$gff2hw0J7lf-2Kj!Q_MyH6 zJLF9vKq?BBEvpQm1~`KC20*zNs^8jJjw;vZ2QKp0I1p9gYOTtG$^2dE@st!<%OSoLAJa^Ei_7% zXs1iScs1>_PRQYK9?;r@!^S|tAW4uSk+i!ozSvRO(ZV^%Wsa8QZn_J-L_vc7glhdD zEqeiJwa&PXwA(WV;ix0dUSef9&4u!_fC%T2>}&;I^Q~$wC#AK& z4b^p0F(bA*X<&QNdEh!iuoc;|hTWD*Lk7^7@|4^oA*#G$hRlC{;xts}+Ckz-u=>td zJ(xFdPPw|einGy9lv{lc#;W}J#S5}L*fO=0yW#8eQwGod?LX=MeLP|mS^Kc0tGoNU ztE)s$Z!bDGHx85{D%gi6CR=}2%dC%&4!O_%*5Toir-SO`!|i(L(F*^WN(6Ke|BSbE z{2nWou>B>1qr@x zXFzc_cj&XI$HU>N;h5>VB?wlC8ojHIcr)n6(SP1cpmNgD8&|))Ip?<_uWbBp<&x-^ z-K@N_wUb-@Q9t_SQQFE`fyi+%dSRifQjM`NfVYd1)3ayo-#(|X&I`zxFUw0SJ@biL-6WGN-PftTbgR-coXlQaWG9-ljO+kU8 z`T29_G}YB9ii(SGo0-K>(y)-=dpWT?|I_T!j`iLdPF0S?4oVWvx}N<%>BF|BW?cQm zWK1j||M1HKqz)ygJ>-9q3LO8#ZSz6Q!H+0n>}QD(1O+k-5^Yf0X5@cn zT0~N1X&R-jn38~oV;)*+CQeJ8VP_4Ew634iNBm|eUkZGk&k1j(LpQNsg%ujKrV#GS z^h3eYyyMf6+5zM>GqOQuRFL@A4?g&v*TEuqUJnr{IBjT^gowpJvIjVLNSVkoS}uFJ zmJzK$X;Yt?TN&hF5V=UQb2fS9j8~xD{Kqw1(h4c%Z$}3U<6BZEy+RhB0ODdqb{_|F zoZx_YYhQc?o4gwKh?O38_&Qh!KT>oVYMqsJ(E){p&_WNfj0(JWm$NQN5mtD=HJMQJ zc|X%?=g1R&rz%(we>2UI9w zWtO~X#%~}=A~c2+#?W?@y7kkJA^8O+5>EwNwE5tBG-=3_o=5X7#Qg`a|4zCo_zJ#F zeV;fzn&`8|04ua;O(GmS5PTJD5Bc)0(Tgt~uNSiP9s=ETHA*nUbGp=TC3x0%Wdy=|neNEtqsL?Qc z*vih#wM=Ks$uR-UaH9I@{g$%-);SO0GkY~RO&i700tp=>)ySv zfdPF`JpH4rj2TY@g_P3P7e>JH)c}?0Y-`A?JSK-0!nJ`sMaf;TR66E>&diK@dovZT zqQae?o-WQ2V_aoR_wwaS2r4!zX$zxabvYs8vEiIfzwRF2nDG44%($4~?Zl#qc&^1u z6e;q1-fa^y>9{{ZP~GJ5+j@MDvDw6r3J^QrHNaixq$hBO>&!(F4jC|RrSNur1es3q zQXr^mLAdy-^_QBW;f4=ziY-;nEGd%K382ddv_m@7_{W6~sh%z9|ZPia?0ozDL2WGzn-~N*5nHoZ7KS5FdAL z!kHn#b}>*A!8Bob4@cGPOb&gCFt|m6W2NC;5K^q|ME^+#*o6UQEiSiuW8NvFTZ9UQ z0$&pJrO6?d&K(et;A2UCe`zac{R1`_+P8;ToUy~kD$QW6>3ZS`hu=%`17rlO;e+;% z!*v8A_(l(A^A?aXSUxk6PrKy~K%hnl$BS zO{C4^|I#AW@7WF?)Jh(JA{pO5$|AL~eyaIWB1gN}-AGjF;`mZ??MU;3336pXaDLqg zG{Z$WtksUu)CYe2R|mrIXo>d7mM}X!ubA%tf;J{UpW?jICPaIh;^$y)-0$BGUdwCw z4X^1S7mAH01mf%ekiE^VuR6`3bP_LrKiIvc8>H7ptnydqUnMMexDAGenljUx+gYEoKUS|WpfudY&E z*uKHo8LPJTbrq(#=c^>={4;*;1xkAzq|pCEmqfH!1CTc~Ho6`k`RL}%;AeK$)*Nl@ z>?k1J%cJq+1?&YE`?~_Bf&JV6M|AJ*x;svNJPU0OYw=hO=V=fFf%FXDaQV}1238>} z%X%|aG;GYF9UG^5?!P#CtBvGEplYg~91d=Q0_!uVwze0$1q1{(HNkG=2-Z{zc-RYO zz}RP7#Sn2BzHI_ncuTWdG79yl^MC8w|0aRI%odS)+{1*>(4box|296(D=qyM^IR!d zem3gw)oUVOTwFXnG_>HJ^=L_j>ln$w=hndb_ew2WTCAi6I{Dx6>=$Xonl?j5E`~qC z&D7t}Vq^O0F7{uSMbAp=Q=qN8KQm1M(7)IsRAh_A5!Sv+S&qf%X__g9D?-p z^o%QQ&OWlXuCVE5$Wrt=7fkHn8&uk%1-maHA;D#H;d-avGhnugiV9RrjE5td7NR=G z)tV*m1_brZKB+<4RhBRyPl1F&B6MWXy?gi4RD)E&kXx-3=U-J-F}7AaJ2VFpeSr!E zYF4Z1nrE@gvoiSp{{F76uGY25Di{w>9L5j-XVL$7D=sK>*dFod5!|TMG;)96GdFFp zlP8D^6r0+*y6#HLF^8PL*h&{os~wna2XkWHyty>vHmGdxYFqZ?XU5tsP*y3xE^O1^y!4GQuu3&AK9P{Onw9Y1i% zBN0X=5BXNd%UEIPz18tvz-l!RHoYk#$j%F1sD9f-Fw34MpB)6^O-04+y!`yP;Fgh* zaY;epA-EwBmt&cK(^wt+s=~;<4b~qH$=^wex-GK%?FJ0|8yHKHA616l1XsH#4a~U z1W{c0oSX$rpIBNd3WGhs_*P?;!zB$$kA+RUufUN&g4DTrGm%}&Ry!a7^J8a4utgBB z?6ja>!ujyk{D(;;l31L!@a;C!d15Y3BZ1^iW3U)r{9FZyaxtS&Z``>N;KQe z014us$;rvsWxtNGu`wM}(=6;^ac6zryLXx_A`dSqdalJY3mK8@86QvIY4J{ISxiJk zk7PGE+|0}j&>HyF%#y*Z{b;xGnOxN%^Sr#gFSb>CzH{S0M;}$7^a~O@Iy6^)|4snI zXzS=i4))oBZC56_-El5>I3kilNI*Q8&tx^r|( ziLh4XeYlh=@hFtoIW;kXie%(hD<0eT*?8{cfO81EpDpJ~{`&RngUP|b@T4TBqaUm- z*Gx<@2m6UY|ovGm4tl1ru zsrah%Ty%MPxp~1OvsFxT&*bcE3^?lgQCN_p_TT|K5K+DQ_iW!Ha{$Caq$i1@W@Z-$8(Q8BV_H7t& zVtjIPn1L#hX;6hA49?GnXJ==ZF=c&jOM3lg{TqVW`cy!qIow&MfFva)NwFt4b8vmS zmRCW61DKUf#(@ck!JC7K(YgGyDnj6HPu+$4ASmiZ2JovhKdqbv?9` zFiuWRCM^?*lmu_KtT1-L#>oBveo&Fmr9iI2<03pfJapXMKLT{MZ7ubHT2a|oH?#lp zdDWxxqwUm|wLRc4)%n%l_;u*xK-kw&Qwsrddn5pi7Pk+=Jc#I# zn?6`Pj>7ymuG`z&8F+ZafONMm<0P=N_AC(ej%OoEteGB?v>V{k(brE|T(kibj*^y^?#k1M`eG?gx^by$17iSheFR4bI=Z?^ zL;_xM?kQmM42+Dt5)u;I1Bx4RIrA++Eo?EGVC00vMC@)ak6o!SzNhDgj-Fm}S{hq- zS64iU;^4g1eI>!h#s&oO=r`})-FW=?Eq1)jtkDR>wZ06Q;_>v$xC{ay;OUB9rG4pA zS|oMuiG#y-_n~Q(Z4Ag8k;C>$xyQQ&BXe^&#+~%!WIo_{#u6i;(E@{1pyf{sk$_~{ z%?@U#kUg;FRag)jA!(p~_<;86a&oD#7vJ*ey(W;GwZo1a`m<4}(-Hq)r_#hAI}@u@ U6!Ll%;Cvp^(a=Yfs#%8oFWw8v?f?J) literal 0 HcmV?d00001 From aeabb2a253360174be76ee0d111a3e36d8999023 Mon Sep 17 00:00:00 2001 From: gk Date: Tue, 26 Jun 2018 10:33:59 -0700 Subject: [PATCH 02/42] simple imputation --- PREPROCESS.py | 61 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/PREPROCESS.py b/PREPROCESS.py index 747d5fc..ca2869a 100644 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -19,6 +19,7 @@ #from statsmodels.tsa.seasonal import seasonal_decompose #stl = seasonal_decompose(x) +from sklearn.preprocessing import Imputer @@ -126,19 +127,40 @@ def imputation_big_gaps(df): return df_filled - def imputation__simple(df): + def imputation__simple(df,imputation_method): """ Juat as placeholder for now, fill all missing with zeros, or mean or median imputation """ - df_filled = df - return df_filled + missing_values = [-1]#['NaN', -1] + imp = Imputer(missing_values=missing_values, + strategy=imputation_method, + axis=1) + vals = imp.fit_transform(df.values)#[:,1:]) #The data is only [:,1:]. + #"Some rows only contain missing values: [ 35 251 281]" + #But get some rows with all missing vals. Since we don't actualyl care about this and never will use this + #for now just use the "Page" number as well to avoid this. + + + cols = df.columns + new_df = pd.DataFrame({cols[i]:vals[:,i] for i in range(vals.shape[1])}) + new_df['Page'] = df['Page'] + #Put "Page" at left + cols = new_df.columns.tolist() + new_df = new_df[cols[-1:]+cols[:-1]] + new_df.reset_index(drop=True,inplace=True) + return new_df + + if (imputation_method == 'median') or (imputation_method == 'mean'): + df = imputation__simple(df,imputation_method) + else: + raise Exception('not implemented other methods yet') #First deal with small gaps (missing gaps fewer than e.g. 7 days): - df = imputation_small_gaps(df,imputation_method) + #df = imputation_small_gaps(df,imputation_method) #Deal with longer gaps [e.g. by removing enough blocks of length S, where #S is the seasonality, to completely get rid of gaps] @@ -157,7 +179,7 @@ def imputation__simple(df): -def format_like_Kaggle(df, myDataDir, start_date=None, end_date=None): +def format_like_Kaggle(df, myDataDir, imputation_method, start_date=None, end_date=None): """ Take my data and format it exactly as needed to use for the Kaggle seq2seq model [requires making train_1.csv, train_2.csv, key_1.csv, key_2.csv] @@ -165,7 +187,7 @@ def format_like_Kaggle(df, myDataDir, start_date=None, end_date=None): """ - def make_train_csv(df, save_path, start_date, end_date): + def make_train_csv(df, save_path, imputation_method, start_date, end_date): """ Make the train_1.csv """ @@ -184,6 +206,11 @@ def make_train_csv(df, save_path, start_date, end_date): idx = pd.date_range(earliest,latest) OUT_OF_RANGE_FILL_VALUE = -1 #np.NaN #0 #puttign as nan casts to float and cannot convert to int + + + + + #Reorganize data for each id (->"Page") unique_ids = pd.unique(df['Page']) df_list = [] @@ -196,12 +223,22 @@ def make_train_csv(df, save_path, start_date, end_date): dates.index = pd.to_datetime(dates.index).strftime('%Y-%m-%d') dd = pd.DataFrame(dates).T dd['Page'] = u + + #If doing imputation / other + #for each series individually + #... + df_list.append(dd) df = pd.concat(df_list,axis=0) cols = df.columns.tolist() df = df[cols[-1:]+cols[:-1]] df.reset_index(drop=True,inplace=True) + + #Imputation, dealing with missing seasonality blocks / out of phase + df = do_imputation(df,imputation_method) + + df.to_csv(save_path,index=False) return df @@ -217,12 +254,12 @@ def make_key_csv(df): #Make the train csv [for now just do 1, ignore the train 2 part ???] save_path = os.path.join(os.path.split(myDataDir)[0],'train_1_my_data.csv') - df = make_train_csv(df, save_path, start_date, end_date) + df = make_train_csv(df, save_path, imputation_method, start_date, end_date) #For the prediction phase, need the key ???? # make_key_csv(df) - return + return df @@ -240,8 +277,8 @@ def make_key_csv(df): # PARAMETERS # ============================================================================= # TOTAL COMPLETED TRIPS: - myDataDir = r"/Users/......../Desktop/exData/totalCompletedTripsDaily" - imputation_method = 'STL' + myDataDir = r"/Users/kocher/Desktop/forecasting/exData/totalCompletedTripsDaily" + imputation_method = 'median' #'STL' START_DATE = '2015-01-01' #None END_DATE = '2017-12-31' #None REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful @@ -264,8 +301,6 @@ def make_key_csv(df): df = remove_cities(df,REMOVE_ID_LIST) #Put into same format as used by Kaggle, save out csv's - format_like_Kaggle(df, myDataDir, start_date=START_DATE, end_date=END_DATE) + df = format_like_Kaggle(df, myDataDir, imputation_method, start_date=START_DATE, end_date=END_DATE) - #Imputation, dealing with missing seasonality blocks / out of phase - df = do_imputation(df,imputation_method) \ No newline at end of file From 21731eddca950989de9a3760ddd5f8817123e975 Mon Sep 17 00:00:00 2001 From: gk Date: Wed, 27 Jun 2018 14:41:57 -0700 Subject: [PATCH 03/42] making features from our data --- .gitignore | 2 + PREDICT.py | 0 PREPROCESS.py | 76 +++++++++++++++++++-- QUICKLOOK.py | 0 Readme.md | 4 +- cocob.py | 0 extractor.py | 0 feeder.py | 0 input_pipe.py | 0 make_features.py | 170 ++++++++++++++++++++++++++++++++++------------- model.py | 0 trainer.py | 0 12 files changed, 198 insertions(+), 54 deletions(-) mode change 100644 => 100755 PREDICT.py mode change 100644 => 100755 PREPROCESS.py mode change 100644 => 100755 QUICKLOOK.py mode change 100644 => 100755 Readme.md mode change 100644 => 100755 cocob.py mode change 100644 => 100755 extractor.py mode change 100644 => 100755 feeder.py mode change 100644 => 100755 input_pipe.py mode change 100644 => 100755 make_features.py mode change 100644 => 100755 model.py mode change 100644 => 100755 trainer.py diff --git a/.gitignore b/.gitignore index 4959db7..fc3b504 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ data/submission.csv.gz data/* */.DS_STORE .DS_STORE +images/* +ex_figs/* diff --git a/PREDICT.py b/PREDICT.py old mode 100644 new mode 100755 diff --git a/PREPROCESS.py b/PREPROCESS.py old mode 100644 new mode 100755 index ca2869a..1331230 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -10,17 +10,17 @@ #import matplotlib #matplotlib.use('Agg') -#import matplotlib.pyplot as plt +import matplotlib.pyplot as plt import os import pandas as pd -#import numpy as np +import numpy as np #from statsmodels.tsa.seasonal import seasonal_decompose #stl = seasonal_decompose(x) from sklearn.preprocessing import Imputer - +from collections import Counter @@ -83,6 +83,62 @@ def get_earliest_latest_dates(df): + + +def __missing_vals_distribution(df): + """ + Look at two things: + - What fraction of our time series are desne vs. have >= 1 missing value? + - Of the series that have missing values, what is distribution of gap lengths? + [important to know since will be doing imputation on it] + + df - in format like Kaggle competition: cols are dates, rows are series + start/end missing, nd intermedite gaps have been filled with -1 + """ + + def make_cdf(v): + c = Counter(v) + x = c.keys() + x = np.array(x) - 1 #-1 to go from diff in days from present data -> gap length + y = c.values() + # print(c) + plt.figure() + #plt.plot(x,y,drawstyle='steps')#,marker='o') + plt.plot(x,y,linestyle='None',marker='o') + plt.title('Distribution of Missing Data Gap Length',fontsize=20) + plt.xlabel('Gap Length [days]',fontsize=20) + plt.ylabel('Count',fontsize=20) + # plt.axis([-1,10,0,550]) + plt.show() + + #get fraction dense vs sparse: + dd = df.values[:,1:] + sparse = (dd==-1).sum(axis=1) + Nsparse = float((sparse>0).sum()) + print(Nsparse) + Ntotal = float(dd.shape[0]) + fraction_dense = (Ntotal - Nsparse) / Ntotal + print('Nsparse', Nsparse) + print('Ntotal', Ntotal) + print('fraction_dense', fraction_dense) + + #Look at distribution of INTERMEDIATE gap lengths + #ignore the leading / lagging unfilled since could just be from the series + #not officially starting yet, or it got closed out. + all_gaps = [] + for row in dd: + inds = np.where(row!=-1)[0] + x = np.diff(inds) + t = list(x[x>1]) + if len(t)>0: + all_gaps.extend(t) + make_cdf(all_gaps) + + + + + + def remove_seasonal_blocks(df): """ For places in the data where there are missing gaps of length > 1 seasonality, @@ -189,7 +245,7 @@ def format_like_Kaggle(df, myDataDir, imputation_method, start_date=None, end_da def make_train_csv(df, save_path, imputation_method, start_date, end_date): """ - Make the train_1.csv + Make the train_2.csv """ #Rename columns to be as in Kaggle data: df.rename(columns={'id':'Page'},inplace=True) @@ -235,6 +291,11 @@ def make_train_csv(df, save_path, imputation_method, start_date, end_date): df = df[cols[-1:]+cols[:-1]] df.reset_index(drop=True,inplace=True) + + #Just for analysis: look at kinds of gaps in series + __missing_vals_distribution(df) + + #Imputation, dealing with missing seasonality blocks / out of phase df = do_imputation(df,imputation_method) @@ -253,12 +314,15 @@ def make_key_csv(df): #Make the train csv [for now just do 1, ignore the train 2 part ???] - save_path = os.path.join(os.path.split(myDataDir)[0],'train_1_my_data.csv') + save_path = os.path.join(os.path.split(myDataDir)[0],'train_2[ours].csv') df = make_train_csv(df, save_path, imputation_method, start_date, end_date) #For the prediction phase, need the key ???? # make_key_csv(df) + + + return df @@ -303,4 +367,4 @@ def make_key_csv(df): #Put into same format as used by Kaggle, save out csv's df = format_like_Kaggle(df, myDataDir, imputation_method, start_date=START_DATE, end_date=END_DATE) - + diff --git a/QUICKLOOK.py b/QUICKLOOK.py old mode 100644 new mode 100755 diff --git a/Readme.md b/Readme.md old mode 100644 new mode 100755 index d651938..b8b7841 --- a/Readme.md +++ b/Readme.md @@ -46,11 +46,11 @@ GK modifications for own data: 2. $source activate gktf 3. $cd ..../kaggle-web-traffic 4. $python3 PREPROCESS.py -5. $python3 make_features.py data/vars --add_days=63 +5. $python3 make_features.py data/kaggle/vars kaggle --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full} depending on using default Arturius kaggle vs. own custom for this application 6. $python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 7. $python3 PREDICT.py -- confirmed it runs with 2 layers stacked, or with attention mechanism. Performance is worse in both cases, at least initially. +- confirmed it runs with 2 layers stacked GRU (for both encoder and decoder modules), or with attention mechanism. Performance is worse in both cases [SMAPE], at least initially. To do: diff --git a/cocob.py b/cocob.py old mode 100644 new mode 100755 diff --git a/extractor.py b/extractor.py old mode 100644 new mode 100755 diff --git a/feeder.py b/feeder.py old mode 100644 new mode 100755 diff --git a/input_pipe.py b/input_pipe.py old mode 100644 new mode 100755 diff --git a/make_features.py b/make_features.py old mode 100644 new mode 100755 index 23d547f..82664cd --- a/make_features.py +++ b/make_features.py @@ -28,7 +28,7 @@ def read_cached(name) -> pd.DataFrame: return df -def read_all() -> pd.DataFrame: +def read_all(data_type) -> pd.DataFrame: """ Reads source data for training/prediction """ @@ -43,40 +43,43 @@ def read_file(file): df = pd.read_pickle(path) else: # Official data - df = read_file('train_2') - # Scraped data - scraped = read_file('2017-08-15_2017-09-11') - # Update last two days by scraped data - df[pd.Timestamp('2017-09-10')] = scraped['2017-09-10'] - df[pd.Timestamp('2017-09-11')] = scraped['2017-09-11'] + filename = f'train_2[{data_type}]' + df = read_file(filename) + + if data_type=='kaggle': + # Scraped data + scraped = read_file('2017-08-15_2017-09-11') + # Update last two days by scraped data + df[pd.Timestamp('2017-09-10')] = scraped['2017-09-10'] + df[pd.Timestamp('2017-09-11')] = scraped['2017-09-11'] df = df.sort_index() # Cache result df.to_pickle(path) return df -# todo:remove -def make_holidays(tagged, start, end) -> pd.DataFrame: - def read_df(lang): - result = pd.read_pickle('data/holidays/%s.pkl' % lang) - return result[~result.dw].resample('D').size().rename(lang) +## todo:remove +#def make_holidays(tagged, start, end) -> pd.DataFrame: +# def read_df(lang): +# result = pd.read_pickle('data/holidays/%s.pkl' % lang) +# return result[~result.dw].resample('D').size().rename(lang) +# +# holidays = pd.DataFrame([read_df(lang) for lang in ['en']])#['de', 'en', 'es', 'fr', 'ja', 'ru', 'zh']]) #!!!!!!!!!!! can play around with this: english only +# holidays = holidays.loc[:, start:end].fillna(0) +# result =tagged[['country']].join(holidays, on='country').drop('country', axis=1).fillna(0).astype(np.int8) +# result.columns = pd.DatetimeIndex(result.columns.values) +# return result - holidays = pd.DataFrame([read_df(lang) for lang in ['de', 'en', 'es', 'fr', 'ja', 'ru', 'zh']]) - holidays = holidays.loc[:, start:end].fillna(0) - result =tagged[['country']].join(holidays, on='country').drop('country', axis=1).fillna(0).astype(np.int8) - result.columns = pd.DatetimeIndex(result.columns.values) - return result - -def read_x(start, end) -> pd.DataFrame: +def read_x(start, end, data_type) -> pd.DataFrame: """ Gets source data from start to end date. Any date can be None """ - df = read_all() + df = read_all(data_type) # User GoogleAnalitycsRoman has really bad data with huge traffic spikes in all incarnations. # Wikipedia banned him, we'll ban it too - bad_roman = df.index.str.startswith("User:GoogleAnalitycsRoman") - df = df[~bad_roman] +# bad_roman = df.index.str.startswith("User:GoogleAnalitycsRoman") +# df = df[~bad_roman] if start and end: return df.loc[:, start:end] elif end: @@ -164,7 +167,7 @@ def find_start_end(data: np.ndarray): return start_idx, end_idx -def prepare_data(start, end, valid_threshold) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]: +def prepare_data(start, end, valid_threshold, data_type) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]: """ Reads source data, calculates start and end of each series, drops bad series, calculates log1p(series) :param start: start date of effective time interval, can be None to start from beginning @@ -173,7 +176,7 @@ def prepare_data(start, end, valid_threshold) -> Tuple[pd.DataFrame, pd.DataFram ratio is less than threshold :return: tuple(log1p(series), nans, series start, series end) """ - df = read_x(start, end) + df = read_x(start, end, data_type) starts, ends = find_start_end(df.values) # boolean mask for bad (too short) series page_mask = (ends - starts) / df.shape[1] < valid_threshold @@ -262,6 +265,10 @@ def normalize(values: np.ndarray): def run(): parser = argparse.ArgumentParser(description='Prepare data') parser.add_argument('data_dir') + + parser.add_argument('data_type', help="Which data set to use: {'kaggle','ours'}") + parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full'}") + parser.add_argument('--valid_threshold', default=0.0, type=float, help="Series minimal length threshold (pct of data length)") parser.add_argument('--add_days', default=64, type=int, help="Add N days in a future for prediction") parser.add_argument('--start', help="Effective start date. Data before the start is dropped") @@ -269,8 +276,11 @@ def run(): parser.add_argument('--corr_backoffset', default=0, type=int, help='Offset for correlation calculation') args = parser.parse_args() + print(args.data_dir, args.data_type, args.features_set) + # Get the data - df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold) + df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type) + # Our working date range data_start, data_end = df.columns[0], df.columns[-1] @@ -281,7 +291,11 @@ def run(): # Group unique pages by agents assert df.index.is_monotonic_increasing - page_map = uniq_page_map(df.index.values) + #Only do this wikipedia web scraping if doing the kaggle comp. Not for ours + if args.data_type=='kaggle': + page_map = uniq_page_map(df.index.values) + + # Yearly(annual) autocorrelation raw_year_autocorr = batch_autocorr(df.values, 365, starts, ends, 1.5, args.corr_backoffset) @@ -298,39 +312,99 @@ def run(): quarter_autocorr = normalize(np.nan_to_num(raw_quarter_autocorr)) # Calculate and encode page features - page_features = make_page_features(df.index.values) - encoded_page_features = encode_page_features(page_features) + if args.data_type=='kaggle': + page_features = make_page_features(df.index.values) + encoded_page_features = encode_page_features(page_features) # Make time-dependent features features_days = pd.date_range(data_start, features_end) #dow = normalize(features_days.dayofweek.values) week_period = 7 / (2 * np.pi) - dow_norm = features_days.dayofweek.values / week_period + dow_norm = features_days.dayofweek.values / week_period #S.dayofweek gives day of the week with Monday=0, Sunday=6 dow = np.stack([np.cos(dow_norm), np.sin(dow_norm)], axis=-1) - + + #index of week number + year_period = 52. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] + woy_norm = features_days.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday + woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) + + # Assemble indices for quarterly lagged data lagged_ix = np.stack(lag_indexes(data_start, features_end), axis=-1) - page_popularity = df.median(axis=1) - page_popularity = (page_popularity - page_popularity.mean()) / page_popularity.std() + + count_median = df.median(axis=1) + count_median = normalize(count_median) + + + #Play around w a few other basic summary stats + percentiles = [] + for pctl in [5,25,75,95]: + percentiles.append(normalize(np.percentile(df.values,pctl,axis=1))) + count_variance = normalize(np.var(df.values,axis=1)) + #filled_len = df.values.shape[1] - np.count_nonzero(np.isnan(df.values),axis=1) #non-nans + #series_length = (df.values>0).sum(axis=1) #actually it has already been log transofmred so this is not correct + # Put NaNs back df[nans] = np.NaN - # Assemble final output - tensors = dict( - hits=df, - lagged_ix=lagged_ix, - page_map=page_map, - page_ix=df.index.values, - pf_agent=encoded_page_features['agent'], - pf_country=encoded_page_features['country'], - pf_site=encoded_page_features['site'], - page_popularity=page_popularity, - year_autocorr=year_autocorr, - quarter_autocorr=quarter_autocorr, - dow=dow, - ) + + #Compile the features + print(f'Using {args.features_set} set of features') + + if args.features_set == 'arturius': + tensors = dict( + hits=df, + lagged_ix=lagged_ix, + page_map=page_map, + page_ix=df.index.values, + pf_agent=encoded_page_features['agent'],#ll-access_all-agents all-access_spider desktop_all-agents mobile-web_all-agents + pf_country=encoded_page_features['country'],#de en es fr ja ru zh + pf_site=encoded_page_features['site'], #commons.wikimedia.org wikipedia.org www.mediawiki.org + count_median=count_median, + year_autocorr=year_autocorr, + quarter_autocorr=quarter_autocorr, + dow=dow,#N x 2 array since encoded week periodicity as complex number + ) + + elif args.features_set == 'simple': + tensors = dict( + hits=df, + count_median=count_median,#this is just the median feature, can put in others too + dow=dow, + ) + + elif args.features_set == 'full': + tensors = dict( + hits=df, + page_ix=df.index.values, + + year_autocorr=year_autocorr, + quarter_autocorr=quarter_autocorr, + count_median=count_median,#this is just the median feature, can put in others too + count_variance=count_variance,#variance + + #percentiles + count_pctl_5=percentiles[0],#5th percentile + count_pctl_25=percentiles[1],#25th percentile + count_pctl_75=percentiles[2],#75th percentile + count_pctl_95=percentiles[3],#95th percentile +# series_length=series_length,#length of series [number of samples] to get idea of how much history a series has #number nonzero + + #Other time-frequency/scale features + #... + + #N x 2 array since encoded week periodicity as complex number + dow=dow, + woy=woy,#and want want week number too, aggregating last ~10 days into week 52 + ) + else: + raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full"]') + + + + plain = dict( features_days=len(features_days), data_days=len(df.columns), @@ -341,6 +415,10 @@ def run(): ) + + print(tensors) + print(plain) + # Store data to the disk VarFeeder(args.data_dir, tensors, plain) diff --git a/model.py b/model.py old mode 100644 new mode 100755 diff --git a/trainer.py b/trainer.py old mode 100644 new mode 100755 From 6b48e096b8fbac1968807dcf599b2dc7177ac743 Mon Sep 17 00:00:00 2001 From: gk Date: Wed, 27 Jun 2018 18:11:58 -0700 Subject: [PATCH 04/42] sampling_period daily weekly etc --- PREPROCESS.py | 25 +++++++-- Readme.md | 5 +- make_features.py | 130 +++++++++++++++++++++++++++++++++-------------- 3 files changed, 114 insertions(+), 46 deletions(-) diff --git a/PREPROCESS.py b/PREPROCESS.py index 1331230..6c268b8 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -235,7 +235,7 @@ def imputation__simple(df,imputation_method): -def format_like_Kaggle(df, myDataDir, imputation_method, start_date=None, end_date=None): +def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, start_date=None, end_date=None): """ Take my data and format it exactly as needed to use for the Kaggle seq2seq model [requires making train_1.csv, train_2.csv, key_1.csv, key_2.csv] @@ -243,10 +243,20 @@ def format_like_Kaggle(df, myDataDir, imputation_method, start_date=None, end_da """ - def make_train_csv(df, save_path, imputation_method, start_date, end_date): + def make_train_csv(df, save_path, imputation_method, sampling_period, start_date, end_date): """ Make the train_2.csv """ + + def aggregate(df, sampling_period): + """ + Aggregate the data (average it) to downsample + to desired sample period, e.g. daily measurements -> weekly or monthly. + Should smooth out some noise, and help w seasonality. + """ + return df + + #Rename columns to be as in Kaggle data: df.rename(columns={'id':'Page'},inplace=True) @@ -298,6 +308,10 @@ def make_train_csv(df, save_path, imputation_method, start_date, end_date): #Imputation, dealing with missing seasonality blocks / out of phase df = do_imputation(df,imputation_method) + #Could do impoutation then downsampling, vs. downsampling then imputation ... unclear which is better here in general. + #for now assume we do ipmutation THEN aggregation: + df = aggregate(df,sampling_period) + df.to_csv(save_path,index=False) @@ -314,8 +328,8 @@ def make_key_csv(df): #Make the train csv [for now just do 1, ignore the train 2 part ???] - save_path = os.path.join(os.path.split(myDataDir)[0],'train_2[ours].csv') - df = make_train_csv(df, save_path, imputation_method, start_date, end_date) + save_path = os.path.join(os.path.split(myDataDir)[0],f"train_2[ours_{sampling_period}].csv") + df = make_train_csv(df, save_path, imputation_method, sampling_period, start_date, end_date) #For the prediction phase, need the key ???? # make_key_csv(df) @@ -346,6 +360,7 @@ def make_key_csv(df): START_DATE = '2015-01-01' #None END_DATE = '2017-12-31' #None REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful + SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly' @@ -365,6 +380,6 @@ def make_key_csv(df): df = remove_cities(df,REMOVE_ID_LIST) #Put into same format as used by Kaggle, save out csv's - df = format_like_Kaggle(df, myDataDir, imputation_method, start_date=START_DATE, end_date=END_DATE) + df = format_like_Kaggle(df, myDataDir, imputation_method, SAMPLING_PERIOD, start_date=START_DATE, end_date=END_DATE) diff --git a/Readme.md b/Readme.md index b8b7841..2f8e2d2 100755 --- a/Readme.md +++ b/Readme.md @@ -46,7 +46,7 @@ GK modifications for own data: 2. $source activate gktf 3. $cd ..../kaggle-web-traffic 4. $python3 PREPROCESS.py -5. $python3 make_features.py data/kaggle/vars kaggle --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full} depending on using default Arturius kaggle vs. own custom for this application +5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period 6. $python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 7. $python3 PREDICT.py @@ -55,7 +55,6 @@ GK modifications for own data: To do: 1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks] -2. modify make_features / InputPipeline / VarFeeder etc. to NOT do the lagged autocorrelations [if ts too short], to NOT use lagged_x, to NOT use wikipedia specific features. -Use only features relevant to this data. Still use the (tiled) median series value (before standard scaling), or few other quantiles, too. Keep day of week, add onehot encoded continent or use country like he has it. +2. PREPROCESS.py - allow downsample in time to weekly, monthly 3. Prediction intervals 4. Architecture improvements \ No newline at end of file diff --git a/make_features.py b/make_features.py index 82664cd..9321fe6 100755 --- a/make_features.py +++ b/make_features.py @@ -28,7 +28,7 @@ def read_cached(name) -> pd.DataFrame: return df -def read_all(data_type) -> pd.DataFrame: +def read_all(data_type,sampling_period) -> pd.DataFrame: """ Reads source data for training/prediction """ @@ -43,7 +43,7 @@ def read_file(file): df = pd.read_pickle(path) else: # Official data - filename = f'train_2[{data_type}]' + filename = f'train_2[{data_type}_{sampling_period}]' df = read_file(filename) if data_type=='kaggle': @@ -71,11 +71,11 @@ def read_file(file): # return result -def read_x(start, end, data_type) -> pd.DataFrame: +def read_x(start, end, data_type, sampling_period) -> pd.DataFrame: """ Gets source data from start to end date. Any date can be None """ - df = read_all(data_type) + df = read_all(data_type,sampling_period) # User GoogleAnalitycsRoman has really bad data with huge traffic spikes in all incarnations. # Wikipedia banned him, we'll ban it too # bad_roman = df.index.str.startswith("User:GoogleAnalitycsRoman") @@ -167,7 +167,7 @@ def find_start_end(data: np.ndarray): return start_idx, end_idx -def prepare_data(start, end, valid_threshold, data_type) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]: +def prepare_data(start, end, valid_threshold, data_type, sampling_period) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]: """ Reads source data, calculates start and end of each series, drops bad series, calculates log1p(series) :param start: start date of effective time interval, can be None to start from beginning @@ -176,7 +176,7 @@ def prepare_data(start, end, valid_threshold, data_type) -> Tuple[pd.DataFrame, ratio is less than threshold :return: tuple(log1p(series), nans, series start, series end) """ - df = read_x(start, end, data_type) + df = read_x(start, end, data_type, sampling_period) starts, ends = find_start_end(df.values) # boolean mask for bad (too short) series page_mask = (ends - starts) / df.shape[1] < valid_threshold @@ -267,7 +267,8 @@ def run(): parser.add_argument('data_dir') parser.add_argument('data_type', help="Which data set to use: {'kaggle','ours'}") - parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full'}") + parser.add_argument('sampling_period', help="Sampling period for our data: {'daily','weekly','monthly'}") + parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full','full_w_context'}") parser.add_argument('--valid_threshold', default=0.0, type=float, help="Series minimal length threshold (pct of data length)") parser.add_argument('--add_days', default=64, type=int, help="Add N days in a future for prediction") @@ -279,9 +280,13 @@ def run(): print(args.data_dir, args.data_type, args.features_set) # Get the data - df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type) + df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type, args.sampling_period) + # ============================================================================= + # STATIC FEATURES + # ============================================================================= + # Our working date range data_start, data_end = df.columns[0], df.columns[-1] @@ -316,40 +321,64 @@ def run(): page_features = make_page_features(df.index.values) encoded_page_features = encode_page_features(page_features) - # Make time-dependent features - features_days = pd.date_range(data_start, features_end) - #dow = normalize(features_days.dayofweek.values) - week_period = 7 / (2 * np.pi) - dow_norm = features_days.dayofweek.values / week_period #S.dayofweek gives day of the week with Monday=0, Sunday=6 - dow = np.stack([np.cos(dow_norm), np.sin(dow_norm)], axis=-1) - - #index of week number - year_period = 52. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] - woy_norm = features_days.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday - woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) - - - # Assemble indices for quarterly lagged data - lagged_ix = np.stack(lag_indexes(data_start, features_end), axis=-1) - + #To get idea of overall scale of a time series, to compare between time series, which would be lost if just used standard scaled values: count_median = df.median(axis=1) count_median = normalize(count_median) - - #Play around w a few other basic summary stats percentiles = [] - for pctl in [5,25,75,95]: + for pctl in [0,5,25,75,95,100]: percentiles.append(normalize(np.percentile(df.values,pctl,axis=1))) count_variance = normalize(np.var(df.values,axis=1)) + #entropy = normalize(entropy(df.values,axis=1)) #filled_len = df.values.shape[1] - np.count_nonzero(np.isnan(df.values),axis=1) #non-nans #series_length = (df.values>0).sum(axis=1) #actually it has already been log transofmred so this is not correct + + # ============================================================================= + # TIME-VARYING FEATURES + # ============================================================================= + + if args.sampling_period=='daily': + + features_days = pd.date_range(data_start, features_end, freq='D') + #dow = normalize(features_days.dayofweek.values) + week_period = 7 / (2 * np.pi) + dow_norm = features_days.dayofweek.values / week_period #S.dayofweek gives day of the week with Monday=0, Sunday=6 + dow = np.stack([np.cos(dow_norm), np.sin(dow_norm)], axis=-1) + + #index of week number, when sampling at DAILY level + year_period = 53. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year + woy_norm = features_days.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday + woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) + + + if args.sampling_period=='weekly': + #index of week number, when sampling at WEEKLY level (this is different than above) + fff = pd.date_range(data_start, features_end, freq='W') + #!!!!!!!!!!!!! still need to worry about alignment ... + year_period = 53. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year + woy_norm = fff.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday + woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) + + + if args.sampling_period=='monthly': + #month index (only used if sampling monthly) + fff = pd.date_range(data_start, features_end, freq='M') #!!!!! need to think about alignment of starting month on particular dates .... + period = 12. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year + moy_norm = fff.month.values / period #not sure if by default this starts on Monday vs Sunday + moy = np.stack([np.cos(moy_norm), np.sin(moy_norm)], axis=-1) + + + + + # Assemble indices for quarterly lagged data + lagged_ix = np.stack(lag_indexes(data_start, features_end), axis=-1) + # Put NaNs back df[nans] = np.NaN - #Compile the features print(f'Using {args.features_set} set of features') @@ -368,6 +397,8 @@ def run(): dow=dow,#N x 2 array since encoded week periodicity as complex number ) + + elif args.features_set == 'simple': tensors = dict( hits=df, @@ -375,7 +406,7 @@ def run(): dow=dow, ) - elif args.features_set == 'full': + elif (args.features_set == 'full') or (args.features_set == 'full_w_context'): tensors = dict( hits=df, page_ix=df.index.values, @@ -384,23 +415,46 @@ def run(): quarter_autocorr=quarter_autocorr, count_median=count_median,#this is just the median feature, can put in others too count_variance=count_variance,#variance + #entropy #percentiles - count_pctl_5=percentiles[0],#5th percentile - count_pctl_25=percentiles[1],#25th percentile - count_pctl_75=percentiles[2],#75th percentile - count_pctl_95=percentiles[3],#95th percentile + count_pctl_0=percentiles[0],#min + count_pctl_5=percentiles[1],#5th percentile + count_pctl_25=percentiles[2],#25th percentile + count_pctl_75=percentiles[3],#75th percentile + count_pctl_95=percentiles[4],#95th percentile + count_pctl_100=percentiles[5],#max # series_length=series_length,#length of series [number of samples] to get idea of how much history a series has #number nonzero #Other time-frequency/scale features #... - #N x 2 array since encoded week periodicity as complex number - dow=dow, - woy=woy,#and want want week number too, aggregating last ~10 days into week 52 - ) + + ) + + if args.sampling_period=='daily': + tensors[dow]=dow + tensors[woy]=woy #and want want week number too, aggregating last ~10 days into week 52 + elif args.sampling_period=='weekly': + tensors[woy]=woy + elif args.sampling_period=='monthly': + tensors[moy]=moy + else: + raise Exception('Must specify correct sampling period') + + + #If provide other info based on e.g. new location (any features that are not derived purely from the time series) + if args.features_set == 'full_w_context': + tensors['country'] = asdasdasd + tensors['region'] = asdasdasd + tensors['city_population'] = asdasdasd + raise Exception('not implemented yet') + #... can write scraper function to get these ... + + + else: - raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full"]') + raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full","full_w_context"]') From 2a67dd1b15c3a66a63a871706d5a87590beb3f50 Mon Sep 17 00:00:00 2001 From: gk Date: Thu, 28 Jun 2018 11:10:19 -0700 Subject: [PATCH 05/42] py36 fixes --- PREPROCESS.py | 22 +++++++++++++--------- make_features.py | 8 ++++---- model.py | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/PREPROCESS.py b/PREPROCESS.py index 6c268b8..c2385c3 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -98,9 +98,9 @@ def __missing_vals_distribution(df): def make_cdf(v): c = Counter(v) - x = c.keys() - x = np.array(x) - 1 #-1 to go from diff in days from present data -> gap length - y = c.values() + x = list(c.keys()) + x = np.array(x) -1 #-1 to go from diff in days from present data -> gap length + y = list(c.values()) # print(c) plt.figure() #plt.plot(x,y,drawstyle='steps')#,marker='o') @@ -297,13 +297,15 @@ def aggregate(df, sampling_period): df_list.append(dd) df = pd.concat(df_list,axis=0) - cols = df.columns.tolist() - df = df[cols[-1:]+cols[:-1]] + #cols = df.columns.tolist() + #df = df[cols[-1:]+cols[:-1]] df.reset_index(drop=True,inplace=True) - #Just for analysis: look at kinds of gaps in series - __missing_vals_distribution(df) + #Just for analysis: look at kinds of gaps in series + VERBOSE = False + if VERBOSE: + __missing_vals_distribution(df) #Imputation, dealing with missing seasonality blocks / out of phase @@ -313,7 +315,8 @@ def aggregate(df, sampling_period): df = aggregate(df,sampling_period) - + #SHould end up with a csv that is rows are series (each id), cols are dates + #:eftmost col should be "Pages" to be same as Kaggle format df.to_csv(save_path,index=False) return df @@ -328,7 +331,8 @@ def make_key_csv(df): #Make the train csv [for now just do 1, ignore the train 2 part ???] - save_path = os.path.join(os.path.split(myDataDir)[0],f"train_2[ours_{sampling_period}].csv") + #save_path = os.path.join(os.path.split(myDataDir)[0],f"train_2[ours_{sampling_period}].csv") + save_path = os.path.join(os.path.split(myDataDir)[0],"train_2[ours_{}].csv".format(sampling_period)) df = make_train_csv(df, save_path, imputation_method, sampling_period, start_date, end_date) #For the prediction phase, need the key ???? diff --git a/make_features.py b/make_features.py index 9321fe6..1acdded 100755 --- a/make_features.py +++ b/make_features.py @@ -433,12 +433,12 @@ def run(): ) if args.sampling_period=='daily': - tensors[dow]=dow - tensors[woy]=woy #and want want week number too, aggregating last ~10 days into week 52 + tensors['dow']=dow + tensors['woy']=woy #and want want week number too, aggregating last ~10 days into week 52 elif args.sampling_period=='weekly': - tensors[woy]=woy + tensors['woy']=woy elif args.sampling_period=='monthly': - tensors[moy]=moy + tensors['moy']=moy else: raise Exception('Must specify correct sampling period') diff --git a/model.py b/model.py index 4d658d8..6b2a8c2 100755 --- a/model.py +++ b/model.py @@ -66,7 +66,7 @@ def make_encoder(time_inputs, encoder_features_depth, is_train, hparams, seed, t def build_rnn(): return RNN(num_layers=hparams.encoder_rnn_layers, num_units=hparams.rnn_depth, input_size=encoder_features_depth, - direction='unidirectional', + direction='unidirectional', #Let's try bidirectional as well, or ,ay as well try keeping unidirectional but with order reversed, just see what happens dropout=hparams.encoder_dropout if is_train else 0, seed=seed) static_p_size = cuda_params_size(build_rnn) From a681e1ee7bdb46df16da13ce182198b46526c9c3 Mon Sep 17 00:00:00 2001 From: gk Date: Fri, 29 Jun 2018 13:38:08 -0700 Subject: [PATCH 06/42] incorporating my modified features --- PREPROCESS.py | 2 +- Readme.md | 7 +++ input_pipe.py | 147 ++++++++++++++++++++++++++++------------------- make_features.py | 14 ++--- trainer.py | 14 ++--- 5 files changed, 109 insertions(+), 75 deletions(-) diff --git a/PREPROCESS.py b/PREPROCESS.py index c2385c3..b1e4ffa 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -332,7 +332,7 @@ def make_key_csv(df): #Make the train csv [for now just do 1, ignore the train 2 part ???] #save_path = os.path.join(os.path.split(myDataDir)[0],f"train_2[ours_{sampling_period}].csv") - save_path = os.path.join(os.path.split(myDataDir)[0],"train_2[ours_{}].csv".format(sampling_period)) + save_path = os.path.join(os.path.split(myDataDir)[0],"train_2_ours_{}.csv".format(sampling_period)) df = make_train_csv(df, save_path, imputation_method, sampling_period, start_date, end_date) #For the prediction phase, need the key ???? diff --git a/Readme.md b/Readme.md index 2f8e2d2..a09eb3d 100755 --- a/Readme.md +++ b/Readme.md @@ -47,13 +47,20 @@ GK modifications for own data: 3. $cd ..../kaggle-web-traffic 4. $python3 PREPROCESS.py 5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period +python3 make_features.py data/vars kaggle daily full --add_days=63 + + +#no reason to expect 10000 to 11500 is good range to save out. View loss along the way 6. $python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 7. $python3 PREDICT.py - confirmed it runs with 2 layers stacked GRU (for both encoder and decoder modules), or with attention mechanism. Performance is worse in both cases [SMAPE], at least initially. +- tried bidirectional encoder but has input dimension issues, think about that more later. To do: +0. get to work w my features +0. save log files to view SMAPE etc metrics during training 1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks] 2. PREPROCESS.py - allow downsample in time to weekly, monthly 3. Prediction intervals diff --git a/input_pipe.py b/input_pipe.py index 7627344..06b8766 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -42,7 +42,7 @@ def __init__(self, tensors: List[tf.Tensor], cluster_indexes: tf.Tensor, n_split self.seed = seed clustered_index = self.cluster_pages(cluster_indexes) index_len = tf.shape(clustered_index)[0] - assert_op = tf.assert_equal(index_len, size, message='n_pages is not equals to size of clustered index') + assert_op = tf.assert_equal(index_len, size, message='N_time_series is not equals to size of clustered index') with tf.control_dependencies([assert_op]): split_nitems = int(round(size / n_splits)) split_size = [split_nitems] * n_splits @@ -71,65 +71,70 @@ def prepare_split(i): class FakeSplitter: def __init__(self, tensors: List[tf.Tensor], n_splits, seed, test_sampling=1.0): - total_pages = tensors[0].shape[0].value - n_pages = int(round(total_pages * test_sampling)) + total_series = tensors[0].shape[0].value + N_time_series = int(round(total_series * test_sampling)) def mk_name(prefix, tensor): return prefix + '_' + tensor.name[:-2] def prepare_split(i): - idx = tf.random_shuffle(tf.range(0, n_pages, dtype=tf.int32), seed + i) + idx = tf.random_shuffle(tf.range(0, N_time_series, dtype=tf.int32), seed + i) train_tensors = [tf.gather(tensor, idx, name=mk_name('shfl', tensor)) for tensor in tensors] if test_sampling < 1.0: - sampled_idx = idx[:n_pages] + sampled_idx = idx[:N_time_series] test_tensors = [tf.gather(tensor, sampled_idx, name=mk_name('shfl_test', tensor)) for tensor in tensors] else: test_tensors = train_tensors - return Split(test_tensors, train_tensors, n_pages, total_pages) + return Split(test_tensors, train_tensors, N_time_series, total_series) self.splits = [prepare_split(i) for i in range(n_splits)] class InputPipe: - def cut(self, hits, start, end): + def cut(self, counts, start, end): """ Cuts [start:end] diapason from input data - :param hits: hits timeseries + :param counts: counts timeseries :param start: start index :param end: end index - :return: tuple (train_hits, test_hits, dow, lagged_hits) + :return: tuple (train_counts, test_counts, dow, lagged_counts) """ - # Pad hits to ensure we have enough array length for prediction - hits = tf.concat([hits, tf.fill([self.predict_window], np.NaN)], axis=0) - cropped_hit = hits[start:end] + # Pad counts to ensure we have enough array length for prediction + counts = tf.concat([counts, tf.fill([self.predict_window], np.NaN)], axis=0) + cropped_hit = counts[start:end] # cut day of week - cropped_dow = self.inp.dow[start:end] - - # Cut lagged hits - # gather() accepts only int32 indexes - cropped_lags = tf.cast(self.inp.lagged_ix[start:end], tf.int32) - # Mask for -1 (no data) lag indexes - lag_mask = cropped_lags < 0 - # Convert -1 to 0 for gather(), it don't accept anything exotic - cropped_lags = tf.maximum(cropped_lags, 0) - # Translate lag indexes to hit values - lagged_hit = tf.gather(hits, cropped_lags) - # Convert masked (see above) or NaN lagged hits to zeros - lag_zeros = tf.zeros_like(lagged_hit) - lagged_hit = tf.where(lag_mask | tf.is_nan(lagged_hit), lag_zeros, lagged_hit) + if self.inp.dow: + cropped_dow = self.inp.dow[start:end] #!!!!!!! only if using dow feature [sampling daily] + #!!!!!!!!!!!! do same for moy , woy if using those features + + if self.inp.lagged_ix: + # Cut lagged counts + # gather() accepts only int32 indexes + cropped_lags = tf.cast(self.inp.lagged_ix[start:end], tf.int32) + # Mask for -1 (no data) lag indexes + lag_mask = cropped_lags < 0 + # Convert -1 to 0 for gather(), it don't accept anything exotic + cropped_lags = tf.maximum(cropped_lags, 0) + # Translate lag indexes to count values + lagged_hit = tf.gather(counts, cropped_lags) + # Convert masked (see above) or NaN lagged counts to zeros + lag_zeros = tf.zeros_like(lagged_hit) + lagged_hit = tf.where(lag_mask | tf.is_nan(lagged_hit), lag_zeros, lagged_hit) # Split for train and test - x_hits, y_hits = tf.split(cropped_hit, [self.train_window, self.predict_window], axis=0) + x_counts, y_counts = tf.split(cropped_hit, [self.train_window, self.predict_window], axis=0) # Convert NaN to zero in for train data - x_hits = tf.where(tf.is_nan(x_hits), tf.zeros_like(x_hits), x_hits) - return x_hits, y_hits, cropped_dow, lagged_hit + x_counts = tf.where(tf.is_nan(x_counts), tf.zeros_like(x_counts), x_counts) + return x_counts, y_counts, cropped_dow, lagged_hit #!!!!!!!!!!!! return other cropped time dependent features as well - def cut_train(self, hits, *args): + + + def cut_train(self, counts, *args): """ Cuts a segment of time series for training. Randomly chooses starting point. - :param hits: hits timeseries + :param counts: counts timeseries :param args: pass-through data, will be appended to result :return: result of cut() + args """ @@ -150,56 +155,72 @@ def cut_train(self, hits, *args): offset = tf.random_uniform((), self.start_offset, free_space, dtype=tf.int32, seed=self.rand_seed) end = offset + n_days # Cut all the things - return self.cut(hits, offset, end) + args + return self.cut(counts, offset, end) + args - def cut_eval(self, hits, *args): + def cut_eval(self, counts, *args): """ Cuts segment of time series for evaluation. Always cuts train_window + predict_window length segment beginning at start_offset point - :param hits: hits timeseries + :param counts: counts timeseries :param args: pass-through data, will be appended to result :return: result of cut() + args """ end = self.start_offset + self.train_window + self.predict_window - return self.cut(hits, self.start_offset, end) + args + return self.cut(counts, self.start_offset, end) + args - def reject_filter(self, x_hits, y_hits, *args): + def reject_filter(self, x_counts, y_counts, *args): """ Rejects timeseries having too many zero datapoints (more than self.max_train_empty) """ if self.verbose: print("max empty %d train %d predict" % (self.max_train_empty, self.max_predict_empty)) - zeros_x = tf.reduce_sum(tf.to_int32(tf.equal(x_hits, 0.0))) + zeros_x = tf.reduce_sum(tf.to_int32(tf.equal(x_counts, 0.0))) keep = zeros_x <= self.max_train_empty return keep - def make_features(self, x_hits, y_hits, dow, lagged_hits, pf_agent, pf_country, pf_site, page_ix, - page_popularity, year_autocorr, quarter_autocorr): + def make_features(self, x_counts, y_counts, dow, lagged_counts, pf_agent, pf_country, pf_site, page_ix, + count_median, year_autocorr, quarter_autocorr): #!!!!!!!!!!!! if kaggle feats as is """ Main method. Assembles input data into final tensors + + split into 3 sets of features: time-dependent, per series but static, and context features + input as dicts + ts_dynamic : {x_counts, y_counts, dow, woy, moy, lagged} + ts_static: {count_median, other percentiles..., autocorrelations, } + + def make_features(self, ts_dynamic, ts_static, context): + """ + # Split day of week to train and test - x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) - - # Normalize hits - mean = tf.reduce_mean(x_hits) - std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_hits, mean))) - norm_x_hits = (x_hits - mean) / std - norm_y_hits = (y_hits - mean) / std - norm_lagged_hits = (lagged_hits - mean) / std - - # Split lagged hits to train and test - x_lagged, y_lagged = tf.split(norm_lagged_hits, [self.train_window, self.predict_window], axis=0) + if ts_dynamic['dow']: + x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) + if ts_dynamic['woy']: + x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) + if ts_dynamic['moy']: + x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0) + + + # Normalize counts + mean = tf.reduce_mean(x_counts) + std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean))) + norm_x_counts = (x_counts - mean) / std + norm_y_counts = (y_counts - mean) / std + norm_lagged_counts = (lagged_counts - mean) / std #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ?? + + # Split lagged counts to train and test + if ts_dynamic['lagged_ix']: + x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) # Combine all page features into single tensor - stacked_features = tf.stack([page_popularity, quarter_autocorr, year_autocorr]) - flat_page_features = tf.concat([pf_agent, pf_country, pf_site, stacked_features], axis=0) + stacked_features = tf.stack([count_median, quarter_autocorr, year_autocorr])#!!!!!!! if kaggle feats. Else need also the oher quntiles too + flat_page_features = tf.concat([pf_agent, pf_country, pf_site, stacked_features], axis=0) page_features = tf.expand_dims(flat_page_features, 0) # Train features x_features = tf.concat([ # [n_days] -> [n_days, 1] - tf.expand_dims(norm_x_hits, -1), + tf.expand_dims(norm_x_counts, -1), x_dow, x_lagged, # Stretch page_features to all training days @@ -217,9 +238,15 @@ def make_features(self, x_hits, y_hits, dow, lagged_hits, pf_agent, pf_country, tf.tile(page_features, [self.predict_window, 1]) ], axis=1) - return x_hits, x_features, norm_x_hits, x_lagged, y_hits, y_features, norm_y_hits, mean, std, flat_page_features, page_ix + #!!!!! why no lagged_y alnoe, only in y_features??? + #!!!! why no norm_y_counts ????? + return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_page_features, page_ix + #later on the above is assigned to: + #self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \ + #self.norm_std, self.page_features, self.page_ix = it_tensors + - def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], n_pages: int, mode: ModelMode, n_epoch=None, + def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None, batch_size=127, runs_in_burst=1, verbose=True, predict_window=60, train_window=500, train_completeness_threshold=1, predict_completeness_threshold=1, back_offset=0, train_skip_first=0, rand_seed=None): @@ -227,7 +254,7 @@ def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], n_pages: int, Create data preprocessing pipeline :param inp: Raw input data :param features: Features tensors (subset of data in inp) - :param n_pages: Total number of pages + :param N_time_series: Total number of pages :param mode: Train/Predict/Eval mode selector :param n_epoch: Number of epochs. Generates endless data stream if None :param batch_size: @@ -242,7 +269,7 @@ def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], n_pages: int, :param rand_seed: """ - self.n_pages = n_pages + self.N_time_series = N_time_series self.inp = inp self.batch_size = batch_size self.rand_seed = rand_seed @@ -293,7 +320,7 @@ def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], n_pages: int, # Assign all tensors to class variables self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \ - self.norm_std, self.page_features, self.page_ix = it_tensors + self.norm_std, self.page_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures self.encoder_features_depth = self.time_x.shape[2].value @@ -305,5 +332,5 @@ def init_iterator(self, session): def page_features(inp: VarFeeder): - return (inp.hits, inp.pf_agent, inp.pf_country, inp.pf_site, - inp.page_ix, inp.page_popularity, inp.year_autocorr, inp.quarter_autocorr) + return (inp.counts, inp.pf_agent, inp.pf_country, inp.pf_site,#!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures + inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr) diff --git a/make_features.py b/make_features.py index 1acdded..84e37c5 100755 --- a/make_features.py +++ b/make_features.py @@ -43,7 +43,7 @@ def read_file(file): df = pd.read_pickle(path) else: # Official data - filename = f'train_2[{data_type}_{sampling_period}]' + filename = f'train_2_{data_type}_{sampling_period}' df = read_file(filename) if data_type=='kaggle': @@ -383,8 +383,10 @@ def run(): print(f'Using {args.features_set} set of features') if args.features_set == 'arturius': + if args.data_type == 'kaggle': + raise Exception('arturius features can only work with data_type "kaggle" since scrapes wikipedia pages') tensors = dict( - hits=df, + counts=df, lagged_ix=lagged_ix, page_map=page_map, page_ix=df.index.values, @@ -397,19 +399,17 @@ def run(): dow=dow,#N x 2 array since encoded week periodicity as complex number ) - - elif args.features_set == 'simple': tensors = dict( - hits=df, + counts=df, count_median=count_median,#this is just the median feature, can put in others too dow=dow, ) elif (args.features_set == 'full') or (args.features_set == 'full_w_context'): tensors = dict( - hits=df, - page_ix=df.index.values, + counts=df, + page_ix=df.index.values,#!!!!!! year_autocorr=year_autocorr, quarter_autocorr=quarter_autocorr, diff --git a/trainer.py b/trainer.py index 4ecb65b..4d40dbe 100755 --- a/trainer.py +++ b/trainer.py @@ -415,10 +415,10 @@ def train(name, hparams, multi_gpu=False, n_models=1, train_completeness_thresho with tf.device("/cpu:0"): inp = VarFeeder.read_vars("data/vars") if side_split: - splitter = Splitter(page_features(inp), inp.page_map, 3, train_sampling=train_sampling, + splitter = Splitter(page_features(inp), inp.page_map, 3, train_sampling=train_sampling,#!!!!!!!!!!!! will need to edit page_features function and get rid of page_map test_sampling=eval_sampling, seed=seed) else: - splitter = FakeSplitter(page_features(inp), 3, seed=seed, test_sampling=eval_sampling) + splitter = FakeSplitter(page_features(inp), 3, seed=seed, test_sampling=eval_sampling) #!!!!!!!!!!!! will need to edit page_features function real_train_pages = splitter.splits[0].train_size real_eval_pages = splitter.splits[0].test_size @@ -440,7 +440,7 @@ def create_model(scope, index, prefix, seed): with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): split = splitter.splits[index] - pipe = InputPipe(inp, features=split.train_set, n_pages=split.train_size, + pipe = InputPipe(inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose, train_completeness_threshold=train_completeness_threshold, predict_completeness_threshold=train_completeness_threshold, train_window=train_window, @@ -449,7 +449,7 @@ def create_model(scope, index, prefix, seed): back_offset=predict_window if forward_split else 0) inp_scope.reuse_variables() if side_split: - side_eval_pipe = InputPipe(inp, features=split.test_set, n_pages=split.test_size, + side_eval_pipe = InputPipe(inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, train_completeness_threshold=0.01, predict_completeness_threshold=0, @@ -458,7 +458,7 @@ def create_model(scope, index, prefix, seed): else: side_eval_pipe = None if forward_split: - forward_eval_pipe = InputPipe(inp, features=split.test_set, n_pages=split.test_size, + forward_eval_pipe = InputPipe(inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, train_completeness_threshold=0.01, predict_completeness_threshold=0, @@ -581,7 +581,7 @@ def ema_vars(model): for epoch in range(max_epoch): - # n_steps = pusher.n_pages // batch_size + # n_steps = pusher.N_time_series // batch_size if tqdm: tqr = trange(steps_per_epoch, desc="%2d" % (epoch + 1), leave=False) else: @@ -665,7 +665,7 @@ def predict(checkpoints, hparams, return_x=False, verbose=False, predict_window= with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): inp = VarFeeder.read_vars("data/vars") - pipe = InputPipe(inp, page_features(inp), inp.n_pages, mode=ModelMode.PREDICT, batch_size=batch_size, + pipe = InputPipe(inp, page_features(inp), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features n_epoch=1, verbose=verbose, train_completeness_threshold=0.01, predict_window=predict_window, From 0e7b6282988cf407952bbb2b6f933c1c0d799a91 Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 2 Jul 2018 09:48:07 -0700 Subject: [PATCH 07/42] continue putting in my features --- PREDICT.py | 6 ++- input_pipe.py | 119 ++++++++++++++++++++++++++++++++++++++--------- make_features.py | 16 +++---- trainer.py | 19 ++++---- 4 files changed, 120 insertions(+), 40 deletions(-) diff --git a/PREDICT.py b/PREDICT.py index 0617d65..e8adbb6 100755 --- a/PREDICT.py +++ b/PREDICT.py @@ -25,6 +25,10 @@ +FEATURES_SET = 'arturius'# 'arturius' 'simple' 'full' + + + # ============================================================================= # Performance Metrics @@ -65,7 +69,7 @@ def mean_smape(true, pred): t_preds = [] for tm in range(3): tf.reset_default_graph() - t_preds.append(predict(paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63, + t_preds.append(predict(FEATURES_SET, paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63, n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True)) diff --git a/input_pipe.py b/input_pipe.py index 06b8766..ce9c59c 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -190,16 +190,27 @@ def make_features(self, x_counts, y_counts, dow, lagged_counts, pf_agent, pf_cou def make_features(self, ts_dynamic, ts_static, context): - """ - + # Split day of week to train and test if ts_dynamic['dow']: x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) if ts_dynamic['woy']: x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) if ts_dynamic['moy']: - x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0) - + x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0) + + """ + + + + if self.sampling_period == 'daily': + x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) + x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func + elif self.sampling_period == 'weekly': + x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) + elif self.sampling_period == 'monthly': + x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0) + # Normalize counts mean = tf.reduce_mean(x_counts) @@ -208,14 +219,37 @@ def make_features(self, ts_dynamic, ts_static, context): norm_y_counts = (y_counts - mean) / std norm_lagged_counts = (lagged_counts - mean) / std #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ?? - # Split lagged counts to train and test - if ts_dynamic['lagged_ix']: + + if self.features_set == 'arturius': + # Split lagged counts to train and test x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) + + # Combine all page features into single tensor + scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr])#!!!!!!! if kaggle feats. Else need also the oher quntiles too + flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) + series_features = tf.expand_dims(flat_features, 0) + - # Combine all page features into single tensor - stacked_features = tf.stack([count_median, quarter_autocorr, year_autocorr])#!!!!!!! if kaggle feats. Else need also the oher quntiles too - flat_page_features = tf.concat([pf_agent, pf_country, pf_site, stacked_features], axis=0) - page_features = tf.expand_dims(flat_page_features, 0) + + if self.features_set == 'full': + # Split lagged counts to train and test + x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) + + # Combine all page features into single tensor + + scalar_features = tf.stack([count_median, count_variance, \ + count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, \ + quarter_autocorr, year_autocorr]) + flat_features = tf.concat([scalar_features], axis=0) + series_features = tf.expand_dims(flat_features, 0) + + #!!!!!!! also do for simple, full w context + #.... + + + + #Any time dependent feature need to be split into x [train] and y [test] + #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths # Train features x_features = tf.concat([ @@ -223,9 +257,9 @@ def make_features(self, ts_dynamic, ts_static, context): tf.expand_dims(norm_x_counts, -1), x_dow, x_lagged, - # Stretch page_features to all training days + # Stretch series_features to all training days # [1, features] -> [n_days, features] - tf.tile(page_features, [self.train_window, 1]) + tf.tile(series_features, [self.train_window, 1]) ], axis=1) # Test features @@ -233,20 +267,20 @@ def make_features(self, ts_dynamic, ts_static, context): # [n_days] -> [n_days, 1] y_dow, y_lagged, - # Stretch page_features to all testing days + # Stretch series_features to all testing days # [1, features] -> [n_days, features] - tf.tile(page_features, [self.predict_window, 1]) + tf.tile(series_features, [self.predict_window, 1]) ], axis=1) #!!!!! why no lagged_y alnoe, only in y_features??? #!!!! why no norm_y_counts ????? - return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_page_features, page_ix + return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix #later on the above is assigned to: #self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \ - #self.norm_std, self.page_features, self.page_ix = it_tensors + #self.norm_std, self.series_features, self.page_ix = it_tensors - def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None, + def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None, batch_size=127, runs_in_burst=1, verbose=True, predict_window=60, train_window=500, train_completeness_threshold=1, predict_completeness_threshold=1, back_offset=0, train_skip_first=0, rand_seed=None): @@ -269,6 +303,10 @@ def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: :param rand_seed: """ + + self.features_set = features_set + self.sampling_period = sampling_period + self.N_time_series = N_time_series self.inp = inp self.batch_size = batch_size @@ -319,8 +357,16 @@ def __init__(self, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: it_tensors = self.iterator.get_next() # Assign all tensors to class variables - self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \ - self.norm_std, self.page_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures + if self.features_set=='arturius': + self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \ + self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures + if self.features_set=='simple': + pass + if self.features_set=='full': + pass + if self.features_set=='full_w_context': + pass + self.encoder_features_depth = self.time_x.shape[2].value @@ -331,6 +377,35 @@ def init_iterator(self, session): session.run(self.iterator.initializer) -def page_features(inp: VarFeeder): - return (inp.counts, inp.pf_agent, inp.pf_country, inp.pf_site,#!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures - inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr) +def page_features(inp: VarFeeder, features_set): + + if features_set=='arturius': + d = (inp.counts, inp.pf_agent, inp.pf_country, inp.pf_site, + inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr) + + elif features_set=='simple': + raise Exception('not ready yet') + elif features_set=='full': + d = (inp.counts, + inp.count_median, inp.count_variance, + inp.count_pctl_0, + inp.count_pctl_5, + inp.count_pctl_25, + inp.count_pctl_75, + inp.count_pctl_95, + inp.count_pctl_100, + inp.page_ix, inp.year_autocorr, inp.quarter_autocorr) + elif features_set=='full_w_context': + raise Exception('not ready yet') + + + #!!!! does it actually need the dow, moy features??? + #if this is required then would need the sample_period as an input to this function [follw pattern of features_set] + """if sample_period=='daily': + d += (inp.dow,inp.woy) + elif sample_period=='weekly': + d += (inp.dow,inp.woy) + elif sample_period=='monthly': + d += (inp.dow,inp.woy)""" + + return d \ No newline at end of file diff --git a/make_features.py b/make_features.py index 84e37c5..c4fe151 100755 --- a/make_features.py +++ b/make_features.py @@ -110,7 +110,7 @@ def single_autocorr(series, lag): def batch_autocorr(data, lag, starts, ends, threshold, backoffset=0): """ Calculate autocorrelation for batch (many time series at once) - :param data: Time series, shape [n_pages, n_days] + :param data: Time series, shape [N_time_series, n_days] :param lag: Autocorrelation lag :param starts: Start index for each series :param ends: End index for each series @@ -146,14 +146,14 @@ def find_start_end(data: np.ndarray): """ Calculates start and end of real traffic data. Start is an index of first non-zero, non-NaN value, end is index of last non-zero, non-NaN value - :param data: Time series, shape [n_pages, n_days] + :param data: Time series, shape [N_time_series, n_days] :return: """ - n_pages = data.shape[0] + N_time_series = data.shape[0] n_days = data.shape[1] - start_idx = np.full(n_pages, -1, dtype=np.int32) - end_idx = np.full(n_pages, -1, dtype=np.int32) - for page in range(n_pages): + start_idx = np.full(N_time_series, -1, dtype=np.int32) + end_idx = np.full(N_time_series, -1, dtype=np.int32) + for page in range(N_time_series): # scan from start to the end for day in range(n_days): if not np.isnan(data[page, day]) and data[page, day] > 0: @@ -248,7 +248,7 @@ def encode_page_features(df) -> Dict[str, pd.DataFrame]: """ Applies one-hot encoding to page features and normalises result :param df: page features DataFrame (one column per feature) - :return: dictionary feature_name:encoded_values. Encoded values is [n_pages,n_values] array + :return: dictionary feature_name:encoded_values. Encoded values is [N_time_series,n_values] array """ def encode(column) -> pd.DataFrame: one_hot = pd.get_dummies(df[column], drop_first=False) @@ -462,7 +462,7 @@ def run(): plain = dict( features_days=len(features_days), data_days=len(df.columns), - n_pages=len(df), + N_time_series=len(df), data_start=data_start, data_end=data_end, features_end=features_end diff --git a/trainer.py b/trainer.py index 4d40dbe..2c33773 100755 --- a/trainer.py +++ b/trainer.py @@ -396,7 +396,7 @@ def process_eval_results(self, run_results, offset, global_step, epoch): return mae, smape, new_best, smooth_mae, smooth_smape -def train(name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01, +def train(features_set, name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01, seed=None, logdir='data/logs', max_epoch=100, patience=2, train_sampling=1.0, eval_sampling=1.0, eval_memsize=5, gpu=0, gpu_allow_growth=False, save_best_model=False, forward_split=False, write_summaries=False, verbose=False, asgd_decay=None, tqdm=True, @@ -415,10 +415,10 @@ def train(name, hparams, multi_gpu=False, n_models=1, train_completeness_thresho with tf.device("/cpu:0"): inp = VarFeeder.read_vars("data/vars") if side_split: - splitter = Splitter(page_features(inp), inp.page_map, 3, train_sampling=train_sampling,#!!!!!!!!!!!! will need to edit page_features function and get rid of page_map + splitter = Splitter(page_features(inp, features_set), inp.page_map, 3, train_sampling=train_sampling,#!!!!!!!!!!!! will need to edit page_features function and get rid of page_map test_sampling=eval_sampling, seed=seed) else: - splitter = FakeSplitter(page_features(inp), 3, seed=seed, test_sampling=eval_sampling) #!!!!!!!!!!!! will need to edit page_features function + splitter = FakeSplitter(page_features(inp, features_set), 3, seed=seed, test_sampling=eval_sampling) #!!!!!!!!!!!! will need to edit page_features function real_train_pages = splitter.splits[0].train_size real_eval_pages = splitter.splits[0].test_size @@ -440,7 +440,7 @@ def create_model(scope, index, prefix, seed): with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): split = splitter.splits[index] - pipe = InputPipe(inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features + pipe = InputPipe(features_set, inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose, train_completeness_threshold=train_completeness_threshold, predict_completeness_threshold=train_completeness_threshold, train_window=train_window, @@ -449,7 +449,7 @@ def create_model(scope, index, prefix, seed): back_offset=predict_window if forward_split else 0) inp_scope.reuse_variables() if side_split: - side_eval_pipe = InputPipe(inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features + side_eval_pipe = InputPipe(features_set, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, train_completeness_threshold=0.01, predict_completeness_threshold=0, @@ -458,7 +458,7 @@ def create_model(scope, index, prefix, seed): else: side_eval_pipe = None if forward_split: - forward_eval_pipe = InputPipe(inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features + forward_eval_pipe = InputPipe(features_set, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, train_completeness_threshold=0.01, predict_completeness_threshold=0, @@ -660,12 +660,12 @@ def ema_vars(model): return np.mean(best_epoch_smape, dtype=np.float64) -def predict(checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1, +def predict(features_set, checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1, target_model=0, asgd=False, seed=1, batch_size=1024): with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): inp = VarFeeder.read_vars("data/vars") - pipe = InputPipe(inp, page_features(inp), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features + pipe = InputPipe(features_set, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features n_epoch=1, verbose=verbose, train_completeness_threshold=0.01, predict_window=predict_window, @@ -744,6 +744,7 @@ def predict(checkpoints, hparams, return_x=False, verbose=False, predict_window= if __name__ == '__main__': parser = argparse.ArgumentParser(description='Train the model') + parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full','full_w_context'}") parser.add_argument('--name', default='s32', help='Model name to identify different logs/checkpoints') parser.add_argument('--hparam_set', default='s32', help="Hyperparameters set to use (see hparams.py for available sets)") parser.add_argument('--n_models', default=1, type=int, help="Jointly train n models with different seeds") @@ -782,5 +783,5 @@ def predict(checkpoints, hparams, return_x=False, verbose=False, predict_window= # save_from_step=10500) # print("Training result:", result) - # preds = predict('data/cpt/fair_365-15428', 380, hparams, verbose=True, back_offset=60, n_models=3) + # preds = PREDICT('data/cpt/fair_365-15428', 380, hparams, verbose=True, back_offset=60, n_models=3) # print(preds) From ad816133dfcc12811ae815296caf61bf7c9f47e0 Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 2 Jul 2018 12:01:31 -0700 Subject: [PATCH 08/42] working with few example our features --- Readme.md | 10 ++++++++-- input_pipe.py | 32 ++++++++++++++++++++++---------- make_features.py | 5 ++++- trainer.py | 19 ++++++++++--------- 4 files changed, 44 insertions(+), 22 deletions(-) diff --git a/Readme.md b/Readme.md index a09eb3d..21af290 100755 --- a/Readme.md +++ b/Readme.md @@ -51,15 +51,21 @@ python3 make_features.py data/vars kaggle daily full --add_days=63 #no reason to expect 10000 to 11500 is good range to save out. View loss along the way -6. $python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 +python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 +--name TEST_attn_head --hparam_set=TEST_attn_head +--name TEST_stacked --hparam_set=TEST_stacked + + 7. $python3 PREDICT.py - confirmed it runs with 2 layers stacked GRU (for both encoder and decoder modules), or with attention mechanism. Performance is worse in both cases [SMAPE], at least initially. - tried bidirectional encoder but has input dimension issues, think about that more later. + + To do: -0. get to work w my features +0. -- got working with few examples of our added features (one static, one time varying 2D), now just organize programmatically 0. save log files to view SMAPE etc metrics during training 1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks] 2. PREPROCESS.py - allow downsample in time to weekly, monthly diff --git a/input_pipe.py b/input_pipe.py index ce9c59c..9459f9d 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -105,8 +105,11 @@ def cut(self, counts, start, end): # cut day of week if self.inp.dow: - cropped_dow = self.inp.dow[start:end] #!!!!!!! only if using dow feature [sampling daily] - #!!!!!!!!!!!! do same for moy , woy if using those features + cropped_dow = self.inp.dow[start:end] + if self.inp.woy: + cropped_woy = self.inp.woy[start:end] + + if self.inp.lagged_ix: # Cut lagged counts @@ -127,7 +130,7 @@ def cut(self, counts, start, end): # Convert NaN to zero in for train data x_counts = tf.where(tf.is_nan(x_counts), tf.zeros_like(x_counts), x_counts) - return x_counts, y_counts, cropped_dow, lagged_hit #!!!!!!!!!!!! return other cropped time dependent features as well + return x_counts, y_counts, cropped_dow, lagged_hit, cropped_woy #!!!!!!!!!!!! return other cropped time dependent features as well #added cropped_woy @@ -178,8 +181,8 @@ def reject_filter(self, x_counts, y_counts, *args): keep = zeros_x <= self.max_train_empty return keep - def make_features(self, x_counts, y_counts, dow, lagged_counts, pf_agent, pf_country, pf_site, page_ix, - count_median, year_autocorr, quarter_autocorr): #!!!!!!!!!!!! if kaggle feats as is + def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, + count_median, year_autocorr, quarter_autocorr, count_pctl_100): #!!!!!!!!!!!! if kaggle feats as is #!!!! added woy, count_pctl_100 """ Main method. Assembles input data into final tensors @@ -204,6 +207,9 @@ def make_features(self, ts_dynamic, ts_static, context): if self.sampling_period == 'daily': + print(dow) + print() + print(woy) x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func elif self.sampling_period == 'weekly': @@ -225,13 +231,13 @@ def make_features(self, ts_dynamic, ts_static, context): x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) # Combine all page features into single tensor - scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr])#!!!!!!! if kaggle feats. Else need also the oher quntiles too + scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) series_features = tf.expand_dims(flat_features, 0) - if self.features_set == 'full': + """if self.features_set == 'full': # Split lagged counts to train and test x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) @@ -242,9 +248,8 @@ def make_features(self, ts_dynamic, ts_static, context): quarter_autocorr, year_autocorr]) flat_features = tf.concat([scalar_features], axis=0) series_features = tf.expand_dims(flat_features, 0) - #!!!!!!! also do for simple, full w context - #.... + #....""" @@ -256,6 +261,7 @@ def make_features(self, ts_dynamic, ts_static, context): # [n_days] -> [n_days, 1] tf.expand_dims(norm_x_counts, -1), x_dow, + x_woy, #!!!!!! added x_lagged, # Stretch series_features to all training days # [1, features] -> [n_days, features] @@ -266,6 +272,7 @@ def make_features(self, ts_dynamic, ts_static, context): y_features = tf.concat([ # [n_days] -> [n_days, 1] y_dow, + y_woy, #!!!!!! added y_lagged, # Stretch series_features to all testing days # [1, features] -> [n_days, features] @@ -286,6 +293,8 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter train_skip_first=0, rand_seed=None): """ Create data preprocessing pipeline + features_set - arturius, simple, full, full_w_context + sampling_period - daily, weekly, monthly :param inp: Raw input data :param features: Features tensors (subset of data in inp) :param N_time_series: Total number of pages @@ -381,7 +390,10 @@ def page_features(inp: VarFeeder, features_set): if features_set=='arturius': d = (inp.counts, inp.pf_agent, inp.pf_country, inp.pf_site, - inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr) + inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr, +# inp.woy, + inp.count_pctl_100 + )#!!!!!!!!!!!! ading 2 more elif features_set=='simple': raise Exception('not ready yet') diff --git a/make_features.py b/make_features.py index c4fe151..d13d0cc 100755 --- a/make_features.py +++ b/make_features.py @@ -383,7 +383,7 @@ def run(): print(f'Using {args.features_set} set of features') if args.features_set == 'arturius': - if args.data_type == 'kaggle': + if args.data_type != 'kaggle': raise Exception('arturius features can only work with data_type "kaggle" since scrapes wikipedia pages') tensors = dict( counts=df, @@ -397,6 +397,9 @@ def run(): year_autocorr=year_autocorr, quarter_autocorr=quarter_autocorr, dow=dow,#N x 2 array since encoded week periodicity as complex number + + woy=woy,#!!!!!!!! + count_pctl_100=percentiles[5],#max #!!!!!!!!!!!!!!!! just to see what happens: apend one of my features. ) elif args.features_set == 'simple': diff --git a/trainer.py b/trainer.py index 2c33773..6f61da1 100755 --- a/trainer.py +++ b/trainer.py @@ -396,7 +396,7 @@ def process_eval_results(self, run_results, offset, global_step, epoch): return mae, smape, new_best, smooth_mae, smooth_smape -def train(features_set, name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01, +def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01, seed=None, logdir='data/logs', max_epoch=100, patience=2, train_sampling=1.0, eval_sampling=1.0, eval_memsize=5, gpu=0, gpu_allow_growth=False, save_best_model=False, forward_split=False, write_summaries=False, verbose=False, asgd_decay=None, tqdm=True, @@ -435,12 +435,12 @@ def train(features_set, name, hparams, multi_gpu=False, n_models=1, train_comple all_models: List[ModelTrainerV2] = [] - def create_model(scope, index, prefix, seed): + def create_model(features_set, sampling_period, scope, index, prefix, seed): with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): split = splitter.splits[index] - pipe = InputPipe(features_set, inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features + pipe = InputPipe(features_set, sampling_period, inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose, train_completeness_threshold=train_completeness_threshold, predict_completeness_threshold=train_completeness_threshold, train_window=train_window, @@ -449,7 +449,7 @@ def create_model(scope, index, prefix, seed): back_offset=predict_window if forward_split else 0) inp_scope.reuse_variables() if side_split: - side_eval_pipe = InputPipe(features_set, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features + side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, train_completeness_threshold=0.01, predict_completeness_threshold=0, @@ -458,7 +458,7 @@ def create_model(scope, index, prefix, seed): else: side_eval_pipe = None if forward_split: - forward_eval_pipe = InputPipe(features_set, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features + forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, train_completeness_threshold=0.01, predict_completeness_threshold=0, @@ -504,14 +504,14 @@ def create_model(scope, index, prefix, seed): if n_models == 1: with tf.device(f"/gpu:{gpu}"): scope = tf.get_variable_scope() - all_models = [create_model(scope, 0, None, seed=seed)] + all_models = [create_model(features_set, sampling_period, scope, 0, None, seed=seed)] else: for i in range(n_models): device = f"/gpu:{i}" if multi_gpu else f"/gpu:{gpu}" with tf.device(device): prefix = f"m_{i}" with tf.variable_scope(prefix) as scope: - all_models.append(create_model(scope, i, prefix=prefix, seed=seed + i)) + all_models.append(create_model(features_set, sampling_period, scope, i, prefix=prefix, seed=seed + i)) trainer = MultiModelTrainer(all_models, inc_step) if save_best_model or save_from_step: saver_path = f'data/cpt/{name}' @@ -660,12 +660,12 @@ def ema_vars(model): return np.mean(best_epoch_smape, dtype=np.float64) -def predict(features_set, checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1, +def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1, target_model=0, asgd=False, seed=1, batch_size=1024): with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): inp = VarFeeder.read_vars("data/vars") - pipe = InputPipe(features_set, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features + pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features n_epoch=1, verbose=verbose, train_completeness_threshold=0.01, predict_window=predict_window, @@ -745,6 +745,7 @@ def predict(features_set, checkpoints, hparams, return_x=False, verbose=False, p if __name__ == '__main__': parser = argparse.ArgumentParser(description='Train the model') parser.add_argument('features_set', help="Which set of features to use. His default for Kaggle vs. one of my custom sets: {'arturius','simple','full','full_w_context'}") + parser.add_argument('sampling_period', help="{'daily','weekly','monthly'}") parser.add_argument('--name', default='s32', help='Model name to identify different logs/checkpoints') parser.add_argument('--hparam_set', default='s32', help="Hyperparameters set to use (see hparams.py for available sets)") parser.add_argument('--n_models', default=1, type=int, help="Jointly train n models with different seeds") From 858ff2aeed37e182fd7280a993f1d3452be0415d Mon Sep 17 00:00:00 2001 From: gk Date: Tue, 3 Jul 2018 02:36:11 -0700 Subject: [PATCH 09/42] working w our data and features --- Readme.md | 2 +- input_pipe.py | 236 +++++++++++++++++++++++++++-------------------- make_features.py | 2 + model.py | 2 + trainer.py | 10 +- 5 files changed, 147 insertions(+), 105 deletions(-) diff --git a/Readme.md b/Readme.md index 21af290..1e11cb9 100755 --- a/Readme.md +++ b/Readme.md @@ -48,7 +48,7 @@ GK modifications for own data: 4. $python3 PREPROCESS.py 5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period python3 make_features.py data/vars kaggle daily full --add_days=63 - +python3 make_features.py data/vars ours daily full --add_days=63 #no reason to expect 10000 to 11500 is good range to save out. View loss along the way python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 diff --git a/input_pipe.py b/input_pipe.py index 9459f9d..617caf4 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -101,16 +101,35 @@ def cut(self, counts, start, end): """ # Pad counts to ensure we have enough array length for prediction counts = tf.concat([counts, tf.fill([self.predict_window], np.NaN)], axis=0) - cropped_hit = counts[start:end] + cropped_count = counts[start:end] - # cut day of week - if self.inp.dow: + + # ============================================================================= + # Ordinal periodic variables + # which features are here depends on what the sampling period is for the data + # ============================================================================= + if self.sampling_period=='daily': cropped_dow = self.inp.dow[start:end] - if self.inp.woy: cropped_woy = self.inp.woy[start:end] + cropped_moy = 0*cropped_dow + elif self.sampling_period=='weekly': + cropped_woy = self.inp.woy[start:end] + cropped_dow = 0*cropped_woy + cropped_moy = 0*cropped_woy + elif self.sampling_period=='monthly': + cropped_moy = self.inp.moy[start:end] + cropped_dow = 0*cropped_moy + cropped_woy = 0*cropped_moy + - - + + # ============================================================================= + # Other features that are also time-varying + # that can be used, which depend on the choice of feature_set + # self.features_set = features_set + # ============================================================================= + + #If used Arturius' original feature set then will include the lagged data: if self.inp.lagged_ix: # Cut lagged counts # gather() accepts only int32 indexes @@ -120,20 +139,30 @@ def cut(self, counts, start, end): # Convert -1 to 0 for gather(), it don't accept anything exotic cropped_lags = tf.maximum(cropped_lags, 0) # Translate lag indexes to count values - lagged_hit = tf.gather(counts, cropped_lags) + lagged_count = tf.gather(counts, cropped_lags) # Convert masked (see above) or NaN lagged counts to zeros - lag_zeros = tf.zeros_like(lagged_hit) - lagged_hit = tf.where(lag_mask | tf.is_nan(lagged_hit), lag_zeros, lagged_hit) + lag_zeros = tf.zeros_like(lagged_count) + lagged_count = tf.where(lag_mask | tf.is_nan(lagged_count), lag_zeros, lagged_count) + + + #Will always have the count series (the series we predict on): # Split for train and test - x_counts, y_counts = tf.split(cropped_hit, [self.train_window, self.predict_window], axis=0) + x_counts, y_counts = tf.split(cropped_count, [self.train_window, self.predict_window], axis=0) # Convert NaN to zero in for train data x_counts = tf.where(tf.is_nan(x_counts), tf.zeros_like(x_counts), x_counts) - return x_counts, y_counts, cropped_dow, lagged_hit, cropped_woy #!!!!!!!!!!!! return other cropped time dependent features as well #added cropped_woy + if self.features_set=='arturius' or self.features_set=='full':#for now, for full just do sam [include lagged] + return x_counts, y_counts, cropped_dow, lagged_count, cropped_woy, cropped_moy +# elif self.features_set=='full': +# return aaaaaaaaaaa #can drop lagged + else: + raise Exception('problem with features_set') + + def cut_train(self, counts, *args): """ Cuts a segment of time series for training. Randomly chooses starting point. @@ -181,35 +210,18 @@ def reject_filter(self, x_counts, y_counts, *args): keep = zeros_x <= self.max_train_empty return keep - def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, - count_median, year_autocorr, quarter_autocorr, count_pctl_100): #!!!!!!!!!!!! if kaggle feats as is #!!!! added woy, count_pctl_100 + + + def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix, + count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, + count_pctl_75, count_pctl_95, count_pctl_100, count_variance): + +# def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix, +# count_median, year_autocorr, quarter_autocorr, count_pctl_100): """ Main method. Assembles input data into final tensors - - split into 3 sets of features: time-dependent, per series but static, and context features - input as dicts - ts_dynamic : {x_counts, y_counts, dow, woy, moy, lagged} - ts_static: {count_median, other percentiles..., autocorrelations, } - - def make_features(self, ts_dynamic, ts_static, context): - - - # Split day of week to train and test - if ts_dynamic['dow']: - x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) - if ts_dynamic['woy']: - x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) - if ts_dynamic['moy']: - x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0) - """ - - - if self.sampling_period == 'daily': - print(dow) - print() - print(woy) x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func elif self.sampling_period == 'weekly': @@ -217,7 +229,6 @@ def make_features(self, ts_dynamic, ts_static, context): elif self.sampling_period == 'monthly': x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0) - # Normalize counts mean = tf.reduce_mean(x_counts) std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean))) @@ -225,66 +236,86 @@ def make_features(self, ts_dynamic, ts_static, context): norm_y_counts = (y_counts - mean) / std norm_lagged_counts = (lagged_counts - mean) / std #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ?? + # Split lagged counts to train and test + x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) + if self.features_set == 'arturius': - # Split lagged counts to train and test - x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) - + # Combine all page features into single tensor scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) series_features = tf.expand_dims(flat_features, 0) - - - - """if self.features_set == 'full': - # Split lagged counts to train and test - x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) - - # Combine all page features into single tensor - scalar_features = tf.stack([count_median, count_variance, \ - count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, \ - quarter_autocorr, year_autocorr]) +# print(scalar_features) #4 +# print(flat_features) #18 +# print(series_features) +# print([pf_agent, pf_country, pf_site]) #4, 7, 3 #the one hot encoded features + + + elif self.features_set == 'full': + # Combine all page features into single tensor + scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, + count_pctl_0, + count_pctl_5, + count_pctl_25, + count_pctl_75, + count_pctl_95, + count_pctl_100, + count_variance]) flat_features = tf.concat([scalar_features], axis=0) series_features = tf.expand_dims(flat_features, 0) - #!!!!!!! also do for simple, full w context - #....""" - - - + + #Any time dependent feature need to be split into x [train] and y [test] #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths - # Train features - x_features = tf.concat([ - # [n_days] -> [n_days, 1] - tf.expand_dims(norm_x_counts, -1), - x_dow, - x_woy, #!!!!!! added - x_lagged, - # Stretch series_features to all training days - # [1, features] -> [n_days, features] - tf.tile(series_features, [self.train_window, 1]) - ], axis=1) + # Train features, depending on measurement frequency + x_features = tf.expand_dims(norm_x_counts, -1) # [n_days] -> [n_days, 1] + if self.sampling_period == 'daily': + x_features = tf.concat([x_features, x_dow, x_woy], axis=1) + elif self.sampling_period == 'weekly': + x_features = tf.concat([x_features, x_woy], axis=1) + elif self.sampling_period == 'monthly': + x_features = tf.concat([x_features, x_moy], axis=1) + #Regardess of period/frequency will have below features: + x_features = tf.concat([x_features, x_lagged, + # Stretch series_features to all training days + # [1, features] -> [n_days, features] + tf.tile(series_features, [self.train_window, 1])], axis=1) # Test features - y_features = tf.concat([ - # [n_days] -> [n_days, 1] - y_dow, - y_woy, #!!!!!! added - y_lagged, - # Stretch series_features to all testing days - # [1, features] -> [n_days, features] - tf.tile(series_features, [self.predict_window, 1]) - ], axis=1) - + if self.sampling_period == 'daily': + y_features = tf.concat([y_dow, y_woy], axis=1) + elif self.sampling_period == 'weekly': + y_features = y_woy + 0 + elif self.sampling_period == 'monthly': + y_features = y_moy + 0 + #Regardess of period/frequency will have below features: + y_features = tf.concat([y_features, y_lagged, + # Stretch series_features to all testing days + # [1, features] -> [n_days, features] + tf.tile(series_features, [self.predict_window, 1]) + ], axis=1) + +# print(x_features) + #!!!!! why no lagged_y alnoe, only in y_features??? #!!!! why no norm_y_counts ????? return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix - #later on the above is assigned to: - #self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \ - #self.norm_std, self.series_features, self.page_ix = it_tensors + #Must match up with setting self.XYZ = it_tensors below in __init__. + + + + +# def make_features__full(self, x_counts, y_counts, dow, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, +# count_median, year_autocorr, quarter_autocorr, count_pctl_100): #!!!!!!!!!!!! if kaggle feats as is #!!!! added woy, count_pctl_100 +# """ +# Using different features than the arturius default set +# """ + + + def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None, @@ -352,12 +383,15 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter # Choose right cutter function for current ModelMode cutter = {ModelMode.TRAIN: self.cut_train, ModelMode.EVAL: self.cut_eval, ModelMode.PREDICT: self.cut_eval} + #Choose the right feature maker function, depending on feature_set used: + #feature_maker = {'arturius': self.make_features, 'full': self.make_features__full} + feature_maker = {'arturius': self.make_features, 'full': self.make_features}#!!!!!!just for now always use art # Create dataset, transform features and assemble batches root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch) batch = (root_ds .map(cutter[mode]) .filter(self.reject_filter) - .map(self.make_features, num_parallel_calls=num_threads) + .map(feature_maker[self.features_set], num_parallel_calls=num_threads) .batch(batch_size) .prefetch(runs_in_burst * 2) ) @@ -366,18 +400,19 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter it_tensors = self.iterator.get_next() # Assign all tensors to class variables - if self.features_set=='arturius': + if self.features_set=='arturius' or self.features_set=='full': self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \ self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures if self.features_set=='simple': pass - if self.features_set=='full': - pass +# if self.features_set=='full': +# pass if self.features_set=='full_w_context': pass self.encoder_features_depth = self.time_x.shape[2].value + print('self.encoder_features_depth',self.encoder_features_depth) def load_vars(self, session): self.inp.restore(session) @@ -387,37 +422,40 @@ def init_iterator(self, session): def page_features(inp: VarFeeder, features_set): + """ + Other than inp.counts, these features are the static features. + So do not need to pass in here the time-varying ones like day of week, + month of year, lagged, etc. + + DO NOT return dow, woy, moy + """ if features_set=='arturius': d = (inp.counts, inp.pf_agent, inp.pf_country, inp.pf_site, inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr, -# inp.woy, inp.count_pctl_100 - )#!!!!!!!!!!!! ading 2 more + ) elif features_set=='simple': raise Exception('not ready yet') + elif features_set=='full': - d = (inp.counts, - inp.count_median, inp.count_variance, +# print(inp.counts) + dummy = tf.zeros_like(inp.counts) +# print(dummy) + d = (inp.counts, dummy, dummy, dummy, + inp.page_ix, + inp.count_median, + inp.year_autocorr, inp.quarter_autocorr, inp.count_pctl_0, inp.count_pctl_5, inp.count_pctl_25, inp.count_pctl_75, inp.count_pctl_95, inp.count_pctl_100, - inp.page_ix, inp.year_autocorr, inp.quarter_autocorr) + inp.count_variance) + elif features_set=='full_w_context': raise Exception('not ready yet') - - #!!!! does it actually need the dow, moy features??? - #if this is required then would need the sample_period as an input to this function [follw pattern of features_set] - """if sample_period=='daily': - d += (inp.dow,inp.woy) - elif sample_period=='weekly': - d += (inp.dow,inp.woy) - elif sample_period=='monthly': - d += (inp.dow,inp.woy)""" - return d \ No newline at end of file diff --git a/make_features.py b/make_features.py index d13d0cc..772ad8f 100755 --- a/make_features.py +++ b/make_features.py @@ -412,6 +412,8 @@ def run(): elif (args.features_set == 'full') or (args.features_set == 'full_w_context'): tensors = dict( counts=df, + lagged_ix=lagged_ix, + page_map=np.zeros(len(df)),#just set to a dummy all 0's page_ix=df.index.values,#!!!!!! year_autocorr=year_autocorr, diff --git a/model.py b/model.py index 6b2a8c2..6145305 100755 --- a/model.py +++ b/model.py @@ -67,6 +67,8 @@ def build_rnn(): return RNN(num_layers=hparams.encoder_rnn_layers, num_units=hparams.rnn_depth, input_size=encoder_features_depth, direction='unidirectional', #Let's try bidirectional as well, or ,ay as well try keeping unidirectional but with order reversed, just see what happens + #assume merge mode default is concat?? + #need to fix dimensions error. If could change merge mode to sum or mean or something then at least output dimension is same so might be easiest way to avoid error ? dropout=hparams.encoder_dropout if is_train else 0, seed=seed) static_p_size = cuda_params_size(build_rnn) diff --git a/trainer.py b/trainer.py index 6f61da1..2bface6 100755 --- a/trainer.py +++ b/trainer.py @@ -415,10 +415,10 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model with tf.device("/cpu:0"): inp = VarFeeder.read_vars("data/vars") if side_split: - splitter = Splitter(page_features(inp, features_set), inp.page_map, 3, train_sampling=train_sampling,#!!!!!!!!!!!! will need to edit page_features function and get rid of page_map + splitter = Splitter(page_features(inp, features_set), inp.page_map, 3, train_sampling=train_sampling, test_sampling=eval_sampling, seed=seed) else: - splitter = FakeSplitter(page_features(inp, features_set), 3, seed=seed, test_sampling=eval_sampling) #!!!!!!!!!!!! will need to edit page_features function + splitter = FakeSplitter(page_features(inp, features_set), 3, seed=seed, test_sampling=eval_sampling) real_train_pages = splitter.splits[0].train_size real_eval_pages = splitter.splits[0].test_size @@ -440,7 +440,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed): with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): split = splitter.splits[index] - pipe = InputPipe(features_set, sampling_period, inp, features=split.train_set, N_time_series=split.train_size,#!!!!!!!!!!!!!!!! page_features + pipe = InputPipe(features_set, sampling_period, inp, features=split.train_set, N_time_series=split.train_size, mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose, train_completeness_threshold=train_completeness_threshold, predict_completeness_threshold=train_completeness_threshold, train_window=train_window, @@ -449,7 +449,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed): back_offset=predict_window if forward_split else 0) inp_scope.reuse_variables() if side_split: - side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features + side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size, mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, train_completeness_threshold=0.01, predict_completeness_threshold=0, @@ -458,7 +458,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed): else: side_eval_pipe = None if forward_split: - forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features + forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size, mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, train_completeness_threshold=0.01, predict_completeness_threshold=0, From 983fb9c035ae9c7c516c34793c50aef7f1cc5b03 Mon Sep 17 00:00:00 2001 From: gk Date: Thu, 5 Jul 2018 16:33:53 -0700 Subject: [PATCH 10/42] finished weekly aggregation' --- PREPROCESS.py | 122 ++++++++++++++++++++++++++++++++++++++--------- hparams.py | 3 +- input_pipe.py | 109 +++++++++++++++++++++++++++++++----------- make_features.py | 32 +++++++++---- trainer.py | 2 +- 5 files changed, 207 insertions(+), 61 deletions(-) diff --git a/PREPROCESS.py b/PREPROCESS.py index b1e4ffa..6d4b1cd 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -22,7 +22,7 @@ from sklearn.preprocessing import Imputer from collections import Counter - +from copy import deepcopy def load_my_data(myDataDir): @@ -248,15 +248,75 @@ def make_train_csv(df, save_path, imputation_method, sampling_period, start_date Make the train_2.csv """ - def aggregate(df, sampling_period): + def aggregate_to_weekly(df, aggregation_type): """ Aggregate the data (average it) to downsample to desired sample period, e.g. daily measurements -> weekly or monthly. Should smooth out some noise, and help w seasonality. + + **ASSUMES WE HAVE DAILY DATA TO START. """ - return df + dfc = deepcopy(df) + dfc['month-day'] = dfc['date'].apply(lambda x: str(x)[5:]) + + #Differentiate by year + years = pd.DatetimeIndex(dfc['date']).year + #years -= years.min() + dfc['year'] = years + + #Manually define as below, as generated by pd.date_range('2015-01-01','2015-12-24',freq='W-THU') + fixed_start_dates = ['01-01','01-08','01-15','01-22', + '01-29','02-05','02-12','02-19', + '02-26','03-05','03-12','03-19', + '03-26','04-02','04-09','04-16', + '04-23','04-30','05-07','05-14', + '05-21','05-28','06-04','06-11', + '06-18','06-25','07-02','07-09', + '07-16','07-23','07-30','08-06', + '08-13','08-20','08-27','09-03', + '09-10','09-17','09-24','10-01', + '10-08','10-15','10-22','10-29', + '11-05','11-12','11-19','11-26', + '12-03','12-10','12-17','12-24']#This combines last ~10 days of year together + + + _ = [np.searchsorted(fixed_start_dates,str(x),side='right') - 1 for x in dfc['month-day'].values] + _ = np.clip(_,0,51).astype(int) #clip 52 to 51. This means lumping last few days of year into 2nd last week of year starting 12/24. + _ = [fixed_start_dates[i] for i in _] + #Overwrite the actual date with the predefined week start date: + dfc['week_start_date'] = dfc['year'].map(str) + '-' + _ + + #For each page-year-week, aggregte over the N<=7 days of that week to get the aggregted value: +# _ = dfc.groupby(['Page','year','week_start_date']).agg({'y': [aggregation_type,'size'], 'year':'min', 'date':'min', 'Page':'min', 'week_start_date':'min'}) + _ = dfc.groupby(['Page','week_start_date']).agg({'y': [aggregation_type,'size'], 'date':'min', 'Page':'min', 'week_start_date':'min'}) + new_df = pd.DataFrame({'Page': _['Page']['min'].values, + 'date': _['date']['min'].values, + 'y': _['y'][aggregation_type].values, #This is no longer necessarily an int + 'week_start_date': _['week_start_date']['min'].values + }) + + #After above process, can still have missing blocks for a given time series, so will deal with them later. + + #now that done, delete uneeded columns + new_df.drop(columns=['date'],inplace=True) + new_df.rename(columns={'week_start_date':'date'},inplace=True) + + return new_df + def remove_downsample_columns(df, out_of_range_fill_value): + """ + When doing any kind of daily --> weekly or monthly aggregation, + will have many days that are now empty (all data aggregated to single + date marking 1st date of week / month) + + So remove those obsolete columns + """ + bad_cols = [i for i in df.columns if np.alltrue(df[i].values==out_of_range_fill_value)] + df.drop(columns=bad_cols,inplace=True) + return df + + #Rename columns to be as in Kaggle data: df.rename(columns={'id':'Page'},inplace=True) @@ -269,14 +329,16 @@ def aggregate(df, sampling_period): if end_date: latest = min(latest,end_date) - idx = pd.date_range(earliest,latest) - OUT_OF_RANGE_FILL_VALUE = -1 #np.NaN #0 #puttign as nan casts to float and cannot convert to int + idx = pd.date_range(earliest,latest) #!!!!!! fro now doing daily. When doing weekly also keep with default freq='D' . If change to 'W' alignment gets messed up. Just do daily 'D', then later can correct easily. + OUT_OF_RANGE_FILL_VALUE = -1. #np.NaN #0 #puttign as nan casts to float and cannot convert to int + #Do aggregation from DAILY --> WEEKLY before doing any kind of imputation + if sampling_period=='weekly': + AGGREGATION_TYPE = 'median' + df = aggregate_to_weekly(df, AGGREGATION_TYPE) - - #Reorganize data for each id (->"Page") unique_ids = pd.unique(df['Page']) df_list = [] @@ -284,7 +346,7 @@ def aggregate(df, sampling_period): d = df.loc[df['Page']==u] #Nan / zero pad start and end date range if needed {end missing} dates = pd.Series(d['y'].values,index=d['date']) - dates.index = pd.DatetimeIndex(dates.index) + dates.index = pd.DatetimeIndex(dates.index) dates = dates.reindex(idx, fill_value=OUT_OF_RANGE_FILL_VALUE) dates.index = pd.to_datetime(dates.index).strftime('%Y-%m-%d') dd = pd.DataFrame(dates).T @@ -302,18 +364,33 @@ def aggregate(df, sampling_period): df.reset_index(drop=True,inplace=True) - #Just for analysis: look at kinds of gaps in series - VERBOSE = False - if VERBOSE: - __missing_vals_distribution(df) + #If we did aggregation, then above reogranization will have many of the columns Nan / -1, + #since e.g. went from daily to weekly, then 6 days of the week will look empty. So remove them. + if sampling_period=='weekly': + AGGREGATION_TYPE = 'median' + df = remove_downsample_columns(df, OUT_OF_RANGE_FILL_VALUE) + + + + + # ============================================================================= + # Just for analysis: look at kinds of gaps in series, for DAILY data + # ============================================================================= + #VERBOSE = False + #if VERBOSE: + # __missing_vals_distribution(df) + + #Imputation, dealing with missing seasonality blocks / out of phase - df = do_imputation(df,imputation_method) - #Could do impoutation then downsampling, vs. downsampling then imputation ... unclear which is better here in general. - #for now assume we do ipmutation THEN aggregation: - df = aggregate(df,sampling_period) + if imputation_method: + df = do_imputation(df,imputation_method) + #Could do impoutation then downsampling, vs. downsampling then imputation ... unclear which is better here in general. + #for now assume we do ipmutation THEN aggregation: + #df = aggregate(df,sampling_period) + print(df) #SHould end up with a csv that is rows are series (each id), cols are dates #:eftmost col should be "Pages" to be same as Kaggle format @@ -321,6 +398,8 @@ def aggregate(df, sampling_period): return df + + def make_key_csv(df): """ Make the key_1.csv, key_2.csv @@ -360,12 +439,11 @@ def make_key_csv(df): # ============================================================================= # TOTAL COMPLETED TRIPS: myDataDir = r"/Users/kocher/Desktop/forecasting/exData/totalCompletedTripsDaily" - imputation_method = 'median' #'STL' + IMPUTATION_METHOD = None #'median' #'STL' #None START_DATE = '2015-01-01' #None END_DATE = '2017-12-31' #None REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful - SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly' - + SAMPLING_PERIOD = 'weekly' #'daily', 'weekly', 'monthly' # ============================================================================= @@ -374,8 +452,9 @@ def make_key_csv(df): print('START_DATE',START_DATE) print('END_DATE',END_DATE) print('REMOVE_ID_LIST',REMOVE_ID_LIST) - print('imputation_method',imputation_method) + print('IMPUTATION_METHOD',IMPUTATION_METHOD) print('myDataDir',myDataDir) + print('SAMPLING_PERIOD',SAMPLING_PERIOD) #Load df = load_my_data(myDataDir) @@ -384,6 +463,5 @@ def make_key_csv(df): df = remove_cities(df,REMOVE_ID_LIST) #Put into same format as used by Kaggle, save out csv's - df = format_like_Kaggle(df, myDataDir, imputation_method, SAMPLING_PERIOD, start_date=START_DATE, end_date=END_DATE) - + df = format_like_Kaggle(df, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, start_date=START_DATE, end_date=END_DATE) diff --git a/hparams.py b/hparams.py index 8edf66e..dfab587 100755 --- a/hparams.py +++ b/hparams.py @@ -5,7 +5,8 @@ params_s32 = dict( batch_size=256, #train_window=380, - train_window=283, + #train_window=283, + train_window=65,#try 65 w our data to see if allows more samples through filter train_skip_first=0, rnn_depth=267, use_attn=False, diff --git a/input_pipe.py b/input_pipe.py index 617caf4..b86707c 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -212,7 +212,7 @@ def reject_filter(self, x_counts, y_counts, *args): - def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix, + def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance): @@ -240,33 +240,17 @@ def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_age x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) - if self.features_set == 'arturius': - - # Combine all page features into single tensor - scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too - flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) - series_features = tf.expand_dims(flat_features, 0) - + # Combine all page features into single tensor + scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too + flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) + series_features = tf.expand_dims(flat_features, 0) + # print(scalar_features) #4 # print(flat_features) #18 # print(series_features) # print([pf_agent, pf_country, pf_site]) #4, 7, 3 #the one hot encoded features - - - elif self.features_set == 'full': - # Combine all page features into single tensor - scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, - count_pctl_0, - count_pctl_5, - count_pctl_25, - count_pctl_75, - count_pctl_95, - count_pctl_100, - count_variance]) - flat_features = tf.concat([scalar_features], axis=0) - series_features = tf.expand_dims(flat_features, 0) - - + + #Any time dependent feature need to be split into x [train] and y [test] #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths @@ -308,13 +292,80 @@ def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_age -# def make_features__full(self, x_counts, y_counts, dow, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, -# count_median, year_autocorr, quarter_autocorr, count_pctl_100): #!!!!!!!!!!!! if kaggle feats as is #!!!! added woy, count_pctl_100 -# """ -# Using different features than the arturius default set -# """ + def make_features__full(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix, + count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, + count_pctl_75, count_pctl_95, count_pctl_100, count_variance): + """ + Main method. Assembles input data into final tensors + """ + if self.sampling_period == 'daily': + x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) + x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func + elif self.sampling_period == 'weekly': + x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) + elif self.sampling_period == 'monthly': + x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0) + # Normalize counts + mean = tf.reduce_mean(x_counts) + std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean))) + norm_x_counts = (x_counts - mean) / std + norm_y_counts = (y_counts - mean) / std + norm_lagged_counts = (lagged_counts - mean) / std #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ?? + + # Split lagged counts to train and test + x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) + + # Combine all page features into single tensor + scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, + count_pctl_0, + count_pctl_5, + count_pctl_25, + count_pctl_75, + count_pctl_95, + count_pctl_100, + count_variance]) + flat_features = tf.concat([scalar_features], axis=0) + series_features = tf.expand_dims(flat_features, 0) + + + #Any time dependent feature need to be split into x [train] and y [test] + #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths + # Train features, depending on measurement frequency + x_features = tf.expand_dims(norm_x_counts, -1) # [n_days] -> [n_days, 1] + if self.sampling_period == 'daily': + x_features = tf.concat([x_features, x_dow, x_woy], axis=1) + elif self.sampling_period == 'weekly': + x_features = tf.concat([x_features, x_woy], axis=1) + elif self.sampling_period == 'monthly': + x_features = tf.concat([x_features, x_moy], axis=1) + #Regardess of period/frequency will have below features: + x_features = tf.concat([x_features, x_lagged, + # Stretch series_features to all training days + # [1, features] -> [n_days, features] + tf.tile(series_features, [self.train_window, 1])], axis=1) + + # Test features + if self.sampling_period == 'daily': + y_features = tf.concat([y_dow, y_woy], axis=1) + elif self.sampling_period == 'weekly': + y_features = y_woy + 0 + elif self.sampling_period == 'monthly': + y_features = y_moy + 0 + #Regardess of period/frequency will have below features: + y_features = tf.concat([y_features, y_lagged, + # Stretch series_features to all testing days + # [1, features] -> [n_days, features] + tf.tile(series_features, [self.predict_window, 1]) + ], axis=1) + +# print(x_features) + + #!!!!! why no lagged_y alnoe, only in y_features??? + #!!!! why no norm_y_counts ????? + return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix + #Must match up with setting self.XYZ = it_tensors below in __init__. diff --git a/make_features.py b/make_features.py index 772ad8f..b6fc82f 100755 --- a/make_features.py +++ b/make_features.py @@ -339,6 +339,11 @@ def run(): # ============================================================================= # TIME-VARYING FEATURES # ============================================================================= + #Could determine week of year number in several ways: 1) as in Pandas as starting on a particular day of week, + # 2. just use day of year / 365 + WEEK_NUMBER_METHOD = 'floor7'#'pandas' #'floor7' + WEEK_NUMBER_MAX = 53. #52. + if args.sampling_period=='daily': @@ -349,19 +354,30 @@ def run(): dow = np.stack([np.cos(dow_norm), np.sin(dow_norm)], axis=-1) #index of week number, when sampling at DAILY level - year_period = 53. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year - woy_norm = features_days.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday + if WEEK_NUMBER_METHOD=='pandas': + week = features_days.weekofyear.values + elif WEEK_NUMBER_METHOD=='floor7': + week = np.floor((features_days.dayofyear.values - 1.) /7.) + year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year + woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) + #To catch longer term trending data, can also include year number. [depending on size of train / prediction windows and random sampling boundaries could be same value over whole series] + year_nmumber = features_days.year + if args.sampling_period=='weekly': #index of week number, when sampling at WEEKLY level (this is different than above) fff = pd.date_range(data_start, features_end, freq='W') #!!!!!!!!!!!!! still need to worry about alignment ... - year_period = 53. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year - woy_norm = fff.weekofyear.values / year_period #not sure if by default this starts on Monday vs Sunday - woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) - + if WEEK_NUMBER_METHOD=='pandas': + week = fff.weekofyear.values + elif WEEK_NUMBER_METHOD=='floor7': + week = np.floor((fff.dayofyear.values - 1.) /7.) + year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year + woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday + woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) + year_nmumber = features_days.year if args.sampling_period=='monthly': #month index (only used if sampling monthly) @@ -369,8 +385,8 @@ def run(): period = 12. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year moy_norm = fff.month.values / period #not sure if by default this starts on Monday vs Sunday moy = np.stack([np.cos(moy_norm), np.sin(moy_norm)], axis=-1) - - + year_nmumber = features_days.year + # Assemble indices for quarterly lagged data diff --git a/trainer.py b/trainer.py index 2bface6..fae64c1 100755 --- a/trainer.py +++ b/trainer.py @@ -458,7 +458,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed): else: side_eval_pipe = None if forward_split: - forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size, + forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, train_completeness_threshold=0.01, predict_completeness_threshold=0, From 147245795b9a238a635862ac346ee3ad4a0d85f7 Mon Sep 17 00:00:00 2001 From: gk Date: Sat, 7 Jul 2018 17:55:42 -0700 Subject: [PATCH 11/42] removed dummy tensors, now only return exact needed --- input_pipe.py | 155 +++++++++++++++++++---------------------------- make_features.py | 2 + 2 files changed, 66 insertions(+), 91 deletions(-) diff --git a/input_pipe.py b/input_pipe.py index b86707c..b0989d5 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -111,7 +111,7 @@ def cut(self, counts, start, end): if self.sampling_period=='daily': cropped_dow = self.inp.dow[start:end] cropped_woy = self.inp.woy[start:end] - cropped_moy = 0*cropped_dow + cropped_moy = 0*cropped_dow #Month information is alreayd contained in week information. COuld incude anyway to be explicit, but for now do not use as a feature elif self.sampling_period=='weekly': cropped_woy = self.inp.woy[start:end] cropped_dow = 0*cropped_woy @@ -130,6 +130,7 @@ def cut(self, counts, start, end): # ============================================================================= #If used Arturius' original feature set then will include the lagged data: +# if self.features_set == 'arturius': if self.inp.lagged_ix: # Cut lagged counts # gather() accepts only int32 indexes @@ -156,10 +157,17 @@ def cut(self, counts, start, end): if self.features_set=='arturius' or self.features_set=='full':#for now, for full just do sam [include lagged] - return x_counts, y_counts, cropped_dow, lagged_count, cropped_woy, cropped_moy + if self.sampling_period=='daily': + return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy + if self.sampling_period=='weekly': + return x_counts, y_counts, lagged_count, cropped_woy + if self.sampling_period=='monthly': + return x_counts, y_counts, lagged_count, cropped_moy + # elif self.features_set=='full': # return aaaaaaaaaaa #can drop lagged else: + print(self.features_set) raise Exception('problem with features_set') @@ -212,15 +220,51 @@ def reject_filter(self, x_counts, y_counts, *args): - def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix, - count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, - count_pctl_75, count_pctl_95, count_pctl_100, count_variance): - -# def make_features(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix, -# count_median, year_autocorr, quarter_autocorr, count_pctl_100): + + + def make_features(self, *args): """ Main method. Assembles input data into final tensors """ +# def make_features__arturius(self, x_counts, y_counts, lagged_counts, dow, woy, moy, pf_agent, pf_country, pf_site, page_ix, +# count_median, year_autocorr, quarter_autocorr, count_pctl_100): +# +# def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix, +# count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, +# count_pctl_75, count_pctl_95, count_pctl_100, count_variance): + + + # ============================================================================= + # Unpack the vars depending on which features_set - sampling_period + # The order needs to match the output of the cut method. + # cut_train and cut_eval return args + cut_output + # the args are things like pf_agent, p + # the cut_output is the same order as the return of the cut method. + # ============================================================================= + print(args) + if self.features_set == 'arturius': + if self.sampling_period == 'daily': + x_counts, y_counts, lagged_counts, dow, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + elif self.sampling_period == 'weekly': + x_counts, y_counts, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + elif self.sampling_period == 'monthly': + x_counts, y_counts, lagged_counts, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + #For now just use the same ... +# count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance) + elif self.features_set == 'full': + f = ooooooooo + if self.sampling_period == 'daily': + x_counts, y_counts, lagged_counts, dow, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + elif self.sampling_period == 'weekly': + x_counts, y_counts, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + elif self.sampling_period == 'monthly': + x_counts, y_counts, lagged_counts, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + + + + # ============================================================================= + # Do train - predict splits + # ============================================================================= if self.sampling_period == 'daily': x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func @@ -241,6 +285,14 @@ def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, m # Combine all page features into single tensor +# scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, +# count_pctl_0, +# count_pctl_5, +# count_pctl_25, +# count_pctl_75, +# count_pctl_95, +# count_pctl_100, +# count_variance]) scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) series_features = tf.expand_dims(flat_features, 0) @@ -291,84 +343,6 @@ def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, m - - def make_features__full(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix, - count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, - count_pctl_75, count_pctl_95, count_pctl_100, count_variance): - """ - Main method. Assembles input data into final tensors - """ - if self.sampling_period == 'daily': - x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) - x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func - elif self.sampling_period == 'weekly': - x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) - elif self.sampling_period == 'monthly': - x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0) - - # Normalize counts - mean = tf.reduce_mean(x_counts) - std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean))) - norm_x_counts = (x_counts - mean) / std - norm_y_counts = (y_counts - mean) / std - norm_lagged_counts = (lagged_counts - mean) / std #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ?? - - # Split lagged counts to train and test - x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) - - # Combine all page features into single tensor - scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, - count_pctl_0, - count_pctl_5, - count_pctl_25, - count_pctl_75, - count_pctl_95, - count_pctl_100, - count_variance]) - flat_features = tf.concat([scalar_features], axis=0) - series_features = tf.expand_dims(flat_features, 0) - - - #Any time dependent feature need to be split into x [train] and y [test] - #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths - - # Train features, depending on measurement frequency - x_features = tf.expand_dims(norm_x_counts, -1) # [n_days] -> [n_days, 1] - if self.sampling_period == 'daily': - x_features = tf.concat([x_features, x_dow, x_woy], axis=1) - elif self.sampling_period == 'weekly': - x_features = tf.concat([x_features, x_woy], axis=1) - elif self.sampling_period == 'monthly': - x_features = tf.concat([x_features, x_moy], axis=1) - #Regardess of period/frequency will have below features: - x_features = tf.concat([x_features, x_lagged, - # Stretch series_features to all training days - # [1, features] -> [n_days, features] - tf.tile(series_features, [self.train_window, 1])], axis=1) - - # Test features - if self.sampling_period == 'daily': - y_features = tf.concat([y_dow, y_woy], axis=1) - elif self.sampling_period == 'weekly': - y_features = y_woy + 0 - elif self.sampling_period == 'monthly': - y_features = y_moy + 0 - #Regardess of period/frequency will have below features: - y_features = tf.concat([y_features, y_lagged, - # Stretch series_features to all testing days - # [1, features] -> [n_days, features] - tf.tile(series_features, [self.predict_window, 1]) - ], axis=1) - -# print(x_features) - - #!!!!! why no lagged_y alnoe, only in y_features??? - #!!!! why no norm_y_counts ????? - return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix - #Must match up with setting self.XYZ = it_tensors below in __init__. - - - def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None, batch_size=127, runs_in_burst=1, verbose=True, predict_window=60, train_window=500, train_completeness_threshold=1, predict_completeness_threshold=1, back_offset=0, @@ -435,18 +409,17 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter # Choose right cutter function for current ModelMode cutter = {ModelMode.TRAIN: self.cut_train, ModelMode.EVAL: self.cut_eval, ModelMode.PREDICT: self.cut_eval} #Choose the right feature maker function, depending on feature_set used: - #feature_maker = {'arturius': self.make_features, 'full': self.make_features__full} - feature_maker = {'arturius': self.make_features, 'full': self.make_features}#!!!!!!just for now always use art +# feature_maker = {'arturius': self.make_features__arturius, 'full': self.make_features__full} # Create dataset, transform features and assemble batches root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch) batch = (root_ds .map(cutter[mode]) .filter(self.reject_filter) - .map(feature_maker[self.features_set], num_parallel_calls=num_threads) + #.map(feature_maker[self.features_set], num_parallel_calls=num_threads) + .map(self.make_features, num_parallel_calls=num_threads) .batch(batch_size) .prefetch(runs_in_burst * 2) ) - self.iterator = batch.make_initializable_iterator() it_tensors = self.iterator.get_next() @@ -464,7 +437,7 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter self.encoder_features_depth = self.time_x.shape[2].value print('self.encoder_features_depth',self.encoder_features_depth) - + def load_vars(self, session): self.inp.restore(session) diff --git a/make_features.py b/make_features.py index b6fc82f..61cb2e4 100755 --- a/make_features.py +++ b/make_features.py @@ -493,6 +493,8 @@ def run(): print(tensors) print(plain) + print(tensors.keys()) + print(plain.keys()) # Store data to the disk VarFeeder(args.data_dir, tensors, plain) From a395049be10621fca8367fe0d26c596f8bdea0d0 Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 9 Jul 2018 09:48:48 -0700 Subject: [PATCH 12/42] input pipe cleanup --- input_pipe.py | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/input_pipe.py b/input_pipe.py index b0989d5..bb827f8 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -226,14 +226,6 @@ def make_features(self, *args): """ Main method. Assembles input data into final tensors """ -# def make_features__arturius(self, x_counts, y_counts, lagged_counts, dow, woy, moy, pf_agent, pf_country, pf_site, page_ix, -# count_median, year_autocorr, quarter_autocorr, count_pctl_100): -# -# def make_features__arturius(self, x_counts, y_counts, dow, lagged_counts, woy, moy, pf_agent, pf_country, pf_site, page_ix, -# count_median, year_autocorr, quarter_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, -# count_pctl_75, count_pctl_95, count_pctl_100, count_variance): - - # ============================================================================= # Unpack the vars depending on which features_set - sampling_period # The order needs to match the output of the cut method. @@ -260,8 +252,6 @@ def make_features(self, *args): elif self.sampling_period == 'monthly': x_counts, y_counts, lagged_counts, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args - - # ============================================================================= # Do train - predict splits # ============================================================================= @@ -408,14 +398,11 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter # Choose right cutter function for current ModelMode cutter = {ModelMode.TRAIN: self.cut_train, ModelMode.EVAL: self.cut_eval, ModelMode.PREDICT: self.cut_eval} - #Choose the right feature maker function, depending on feature_set used: -# feature_maker = {'arturius': self.make_features__arturius, 'full': self.make_features__full} # Create dataset, transform features and assemble batches root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch) batch = (root_ds .map(cutter[mode]) .filter(self.reject_filter) - #.map(feature_maker[self.features_set], num_parallel_calls=num_threads) .map(self.make_features, num_parallel_calls=num_threads) .batch(batch_size) .prefetch(runs_in_burst * 2) @@ -424,16 +411,17 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter it_tensors = self.iterator.get_next() # Assign all tensors to class variables - if self.features_set=='arturius' or self.features_set=='full': - self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \ - self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures - if self.features_set=='simple': +# if self.features_set=='arturius' or self.features_set=='full': + #self.time_x is the tensor of features, regardless of which feature set, so this can stay same. + #But if not doing lagged then can return None for that ??? + self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \ + self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures + """if self.features_set=='simple': pass # if self.features_set=='full': # pass if self.features_set=='full_w_context': - pass - + pass""" self.encoder_features_depth = self.time_x.shape[2].value print('self.encoder_features_depth',self.encoder_features_depth) From a9066aea0a9c690f4a1c8012b14e7d0890ccc213 Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 9 Jul 2018 12:11:52 -0700 Subject: [PATCH 13/42] input pipe features, arturius and full features all samplingperiods work on kaggle data --- hparams.py | 4 +-- input_pipe.py | 49 ++++++++++++++++++----------- make_features.py | 82 ++++++++++++++++++++++++++---------------------- 3 files changed, 78 insertions(+), 57 deletions(-) diff --git a/hparams.py b/hparams.py index dfab587..24e66b6 100755 --- a/hparams.py +++ b/hparams.py @@ -5,8 +5,8 @@ params_s32 = dict( batch_size=256, #train_window=380, - #train_window=283, - train_window=65,#try 65 w our data to see if allows more samples through filter + train_window=283, + #train_window=65,#try 65 w our data to see if allows more samples through filter train_skip_first=0, rnn_depth=267, use_attn=False, diff --git a/input_pipe.py b/input_pipe.py index bb827f8..e2b8e5a 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -111,15 +111,15 @@ def cut(self, counts, start, end): if self.sampling_period=='daily': cropped_dow = self.inp.dow[start:end] cropped_woy = self.inp.woy[start:end] - cropped_moy = 0*cropped_dow #Month information is alreayd contained in week information. COuld incude anyway to be explicit, but for now do not use as a feature +# cropped_moy = 0*cropped_dow #Month information is alreayd contained in week information. COuld incude anyway to be explicit, but for now do not use as a feature elif self.sampling_period=='weekly': cropped_woy = self.inp.woy[start:end] - cropped_dow = 0*cropped_woy - cropped_moy = 0*cropped_woy +# cropped_dow = 0*cropped_woy +# cropped_moy = 0*cropped_woy elif self.sampling_period=='monthly': cropped_moy = self.inp.moy[start:end] - cropped_dow = 0*cropped_moy - cropped_woy = 0*cropped_moy +# cropped_dow = 0*cropped_moy +# cropped_woy = 0*cropped_moy @@ -244,13 +244,15 @@ def make_features(self, *args): #For now just use the same ... # count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance) elif self.features_set == 'full': - f = ooooooooo if self.sampling_period == 'daily': - x_counts, y_counts, lagged_counts, dow, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + x_counts, y_counts, lagged_counts, dow, woy, page_ix, count_median, year_autocorr, quarter_autocorr,\ + count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args elif self.sampling_period == 'weekly': - x_counts, y_counts, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + x_counts, y_counts, lagged_counts, woy, page_ix, count_median, year_autocorr, quarter_autocorr,\ + count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args elif self.sampling_period == 'monthly': - x_counts, y_counts, lagged_counts, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + x_counts, y_counts, lagged_counts, moy, page_ix, count_median, year_autocorr, quarter_autocorr,\ + count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args # ============================================================================= # Do train - predict splits @@ -282,10 +284,19 @@ def make_features(self, *args): # count_pctl_75, # count_pctl_95, # count_pctl_100, -# count_variance]) - scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100])#!!!!!!! if kaggle feats. Else need also the oher quntiles too - flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) - series_features = tf.expand_dims(flat_features, 0) +# count_variance]) + if self.features_set == 'arturius': + scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_100]) + flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) + series_features = tf.expand_dims(flat_features, 0) + elif self.features_set == 'full': + scalar_features = tf.stack([count_median, quarter_autocorr, year_autocorr, count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance]) + #flat_features = tf.concat([pf_agent, pf_country, pf_site, scalar_features], axis=0) + flat_features = tf.concat([scalar_features], axis=0) + series_features = tf.expand_dims(flat_features, 0) + + + # print(scalar_features) #4 # print(flat_features) #18 @@ -328,6 +339,9 @@ def make_features(self, *args): #!!!!! why no lagged_y alnoe, only in y_features??? #!!!! why no norm_y_counts ????? + + print('x_features') + print(x_features) return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix #Must match up with setting self.XYZ = it_tensors below in __init__. @@ -407,6 +421,8 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter .batch(batch_size) .prefetch(runs_in_burst * 2) ) + print('---------------- Done batching ----------------') + print(batch) self.iterator = batch.make_initializable_iterator() it_tensors = self.iterator.get_next() @@ -452,10 +468,7 @@ def page_features(inp: VarFeeder, features_set): raise Exception('not ready yet') elif features_set=='full': -# print(inp.counts) - dummy = tf.zeros_like(inp.counts) -# print(dummy) - d = (inp.counts, dummy, dummy, dummy, + d = (inp.counts, inp.page_ix, inp.count_median, inp.year_autocorr, inp.quarter_autocorr, @@ -465,7 +478,7 @@ def page_features(inp: VarFeeder, features_set): inp.count_pctl_75, inp.count_pctl_95, inp.count_pctl_100, - inp.count_variance) + inp.count_variance) elif features_set=='full_w_context': raise Exception('not ready yet') diff --git a/make_features.py b/make_features.py index 61cb2e4..3845f34 100755 --- a/make_features.py +++ b/make_features.py @@ -33,7 +33,10 @@ def read_all(data_type,sampling_period) -> pd.DataFrame: Reads source data for training/prediction """ def read_file(file): - df = read_cached(file).set_index('Page') + try: + df = read_cached(file).set_index('Page') + except AttributeError: + raise Exception('File not exist, did you specify correct sampling_period?') df.columns = df.columns.astype('M8[D]') return df @@ -345,47 +348,47 @@ def run(): WEEK_NUMBER_MAX = 53. #52. + features_times = pd.date_range(data_start, features_end, freq='D') + if args.sampling_period=='daily': - - features_days = pd.date_range(data_start, features_end, freq='D') - #dow = normalize(features_days.dayofweek.values) + #dow = normalize(features_times.dayofweek.values) week_period = 7 / (2 * np.pi) - dow_norm = features_days.dayofweek.values / week_period #S.dayofweek gives day of the week with Monday=0, Sunday=6 + dow_norm = features_times.dayofweek.values / week_period #S.dayofweek gives day of the week with Monday=0, Sunday=6 dow = np.stack([np.cos(dow_norm), np.sin(dow_norm)], axis=-1) #index of week number, when sampling at DAILY level if WEEK_NUMBER_METHOD=='pandas': - week = features_days.weekofyear.values + week = features_times.weekofyear.values elif WEEK_NUMBER_METHOD=='floor7': - week = np.floor((features_days.dayofyear.values - 1.) /7.) + week = np.floor((features_times.dayofyear.values - 1.) /7.) year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) #To catch longer term trending data, can also include year number. [depending on size of train / prediction windows and random sampling boundaries could be same value over whole series] - year_nmumber = features_days.year + year_number = features_times.year if args.sampling_period=='weekly': #index of week number, when sampling at WEEKLY level (this is different than above) - fff = pd.date_range(data_start, features_end, freq='W') +# features_times = pd.date_range(data_start, features_end, freq='W') #!!!!!!!!!!!!! still need to worry about alignment ... if WEEK_NUMBER_METHOD=='pandas': - week = fff.weekofyear.values + week = features_times.weekofyear.values elif WEEK_NUMBER_METHOD=='floor7': - week = np.floor((fff.dayofyear.values - 1.) /7.) + week = np.floor((features_times.dayofyear.values - 1.) /7.) year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) - year_nmumber = features_days.year + year_number = features_times.year if args.sampling_period=='monthly': #month index (only used if sampling monthly) - fff = pd.date_range(data_start, features_end, freq='M') #!!!!! need to think about alignment of starting month on particular dates .... +# features_times = pd.date_range(data_start, features_end, freq='M') #!!!!! need to think about alignment of starting month on particular dates .... period = 12. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year - moy_norm = fff.month.values / period #not sure if by default this starts on Monday vs Sunday + moy_norm = features_times.month.values / period #not sure if by default this starts on Monday vs Sunday moy = np.stack([np.cos(moy_norm), np.sin(moy_norm)], axis=-1) - year_nmumber = features_days.year + year_number = features_times.year @@ -412,9 +415,9 @@ def run(): count_median=count_median, year_autocorr=year_autocorr, quarter_autocorr=quarter_autocorr, - dow=dow,#N x 2 array since encoded week periodicity as complex number + #dow=dow,#N x 2 array since encoded week periodicity as complex number - woy=woy,#!!!!!!!! + #woy=woy,#!!!!!!!! count_pctl_100=percentiles[5],#max #!!!!!!!!!!!!!!!! just to see what happens: apend one of my features. ) @@ -422,7 +425,7 @@ def run(): tensors = dict( counts=df, count_median=count_median,#this is just the median feature, can put in others too - dow=dow, + #dow=dow, ) elif (args.features_set == 'full') or (args.features_set == 'full_w_context'): @@ -453,35 +456,40 @@ def run(): ) - if args.sampling_period=='daily': - tensors['dow']=dow - tensors['woy']=woy #and want want week number too, aggregating last ~10 days into week 52 - elif args.sampling_period=='weekly': - tensors['woy']=woy - elif args.sampling_period=='monthly': - tensors['moy']=moy - else: - raise Exception('Must specify correct sampling period') + else: + raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full","full_w_context"]') + + + + + if args.sampling_period=='daily': + tensors['dow']=dow + tensors['woy']=woy #and want want week number too, aggregating last ~10 days into week 52 + elif args.sampling_period=='weekly': + tensors['woy']=woy + elif args.sampling_period=='monthly': + tensors['moy']=moy + else: + raise Exception('Must specify correct sampling period') - #If provide other info based on e.g. new location (any features that are not derived purely from the time series) - if args.features_set == 'full_w_context': - tensors['country'] = asdasdasd - tensors['region'] = asdasdasd - tensors['city_population'] = asdasdasd - raise Exception('not implemented yet') - #... can write scraper function to get these ... + """#If provide other info based on e.g. new location (any features that are not derived purely from the time series) + if args.features_set == 'full_w_context': + tensors['country'] = asdasdasd + tensors['region'] = asdasdasd + tensors['city_population'] = asdasdasd + raise Exception('not implemented yet') + #... can write scraper function to get these ...""" - else: - raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full","full_w_context"]') + plain = dict( - features_days=len(features_days), + features_times=len(features_times), data_days=len(df.columns), N_time_series=len(df), data_start=data_start, From 06eacdabed45debdc25c3021387548940e7b8458 Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 9 Jul 2018 14:12:01 -0700 Subject: [PATCH 14/42] predict script add arguments needed; trainer minor updates --- PREDICT.py | 8 ++++---- Readme.md | 14 ++++++++++++-- hparams.py | 6 +++--- trainer.py | 21 +++++++++++++++------ 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/PREDICT.py b/PREDICT.py index e8adbb6..ca3c071 100755 --- a/PREDICT.py +++ b/PREDICT.py @@ -26,8 +26,8 @@ FEATURES_SET = 'arturius'# 'arturius' 'simple' 'full' - - +SAMPLING_PERIOD = 'daily' +DATA_TYPE = 'ours' #'kaggle' #'ours' # ============================================================================= @@ -52,7 +52,7 @@ def mean_smape(true, pred): #read_all funcion loads the (hardcoded) file "data/all.pkl", or otherwise train2.csv print('loading data...') from make_features import read_all -df_all = read_all() +df_all = read_all(DATA_TYPE,SAMPLING_PERIOD) print('df_all.columns') print(df_all.columns) @@ -69,7 +69,7 @@ def mean_smape(true, pred): t_preds = [] for tm in range(3): tf.reset_default_graph() - t_preds.append(predict(FEATURES_SET, paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63, + t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63, n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True)) diff --git a/Readme.md b/Readme.md index 1e11cb9..6ccd8e4 100755 --- a/Readme.md +++ b/Readme.md @@ -50,8 +50,19 @@ GK modifications for own data: python3 make_features.py data/vars kaggle daily full --add_days=63 python3 make_features.py data/vars ours daily full --add_days=63 +#Just in case making new features +cd data +rm -R vars/ +rm -R cpt/ +rm -R cpt_tmp/ +rm -R logs/ +rm *.pkl +cd .. +ll data/ + #no reason to expect 10000 to 11500 is good range to save out. View loss along the way python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 +python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --name TEST_attn_head --hparam_set=TEST_attn_head --name TEST_stacked --hparam_set=TEST_stacked @@ -65,9 +76,8 @@ python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asg To do: -0. -- got working with few examples of our added features (one static, one time varying 2D), now just organize programmatically 0. save log files to view SMAPE etc metrics during training 1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks] -2. PREPROCESS.py - allow downsample in time to weekly, monthly +2. for weekly. monthly inputs, will there be issue in Kaggle code??? 3. Prediction intervals 4. Architecture improvements \ No newline at end of file diff --git a/hparams.py b/hparams.py index 24e66b6..901200c 100755 --- a/hparams.py +++ b/hparams.py @@ -3,10 +3,10 @@ # Manually selected params params_s32 = dict( - batch_size=256, + batch_size=64,#256, #train_window=380, - train_window=283, - #train_window=65,#try 65 w our data to see if allows more samples through filter + #train_window=283, + train_window=30,#try 65 w our data to see if allows more samples through filter train_skip_first=0, rnn_depth=267, use_attn=False, diff --git a/trainer.py b/trainer.py index fae64c1..d98ffd4 100755 --- a/trainer.py +++ b/trainer.py @@ -422,7 +422,7 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model real_train_pages = splitter.splits[0].train_size real_eval_pages = splitter.splits[0].test_size - + items_per_eval = real_eval_pages * eval_pct eval_batches = int(np.ceil(items_per_eval / eval_batch_size)) steps_per_epoch = real_train_pages // batch_size @@ -432,9 +432,17 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model global_step = tf.train.get_or_create_global_step() inc_step = tf.assign_add(global_step, 1) - all_models: List[ModelTrainerV2] = [] + print('real_train_pages', real_train_pages) + print('real_eval_pages', real_eval_pages) + print('batch_size', batch_size) + print('items_per_eval', items_per_eval) + print('eval_batches', eval_batches) + print('steps_per_epoch', steps_per_epoch) + print('eval_every_step', eval_every_step) + + def create_model(features_set, sampling_period, scope, index, prefix, seed): with tf.variable_scope('input') as inp_scope: @@ -448,11 +456,12 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed): rand_seed=seed, train_skip_first=hparams.train_skip_first, back_offset=predict_window if forward_split else 0) inp_scope.reuse_variables() + TCT = .3 #0.01 if side_split: side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size, mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, - train_completeness_threshold=0.01, predict_completeness_threshold=0, + train_completeness_threshold=TCT, predict_completeness_threshold=0, train_window=train_window, rand_seed=seed, runs_in_burst=eval_batches, back_offset=predict_window * (2 if forward_split else 1)) else: @@ -461,7 +470,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed): forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, verbose=verbose, predict_window=predict_window, - train_completeness_threshold=0.01, predict_completeness_threshold=0, + train_completeness_threshold=TCT, predict_completeness_threshold=0, train_window=train_window, rand_seed=seed, runs_in_burst=eval_batches, back_offset=predict_window) else: @@ -637,12 +646,12 @@ def ema_vars(model): has_best_indicator = '↑' else: has_best_indicator = ' ' - status = "%2d: Best top SMAPE=%.3f%s (%s)" % ( + status = "%2d: Best top %.3f%s (%s)" % ( epoch + 1, current_top, has_best_indicator, ",".join(["%.3f" % m.top for m in eval_smape.metrics])) if trainer.has_active(): - status += ", frwd/side best MAE=%.3f/%.3f, SMAPE=%.3f/%.3f; avg MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, %d am" % \ + status += ", frwd/side best MAE=%.3f/%.3f, SMAPE=%.3f/%.3f; avg MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, %d active models" % \ (eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch, eval_mae.avg_epoch, eval_mae_side.avg_epoch, eval_smape.avg_epoch, eval_smape_side.avg_epoch, trainer.has_active()) From c1c6f370d8dedc5ce2222993ba77ef4a583308d5 Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 9 Jul 2018 17:20:22 -0700 Subject: [PATCH 15/42] -- --- PREDICT.py | 7 ++++--- PREPROCESS.py | 4 +++- Readme.md | 2 +- hparams.py | 2 +- trainer.py | 5 ++++- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/PREDICT.py b/PREDICT.py index ca3c071..28ca514 100755 --- a/PREDICT.py +++ b/PREDICT.py @@ -28,6 +28,7 @@ FEATURES_SET = 'arturius'# 'arturius' 'simple' 'full' SAMPLING_PERIOD = 'daily' DATA_TYPE = 'ours' #'kaggle' #'ours' +Nmodels = 3. # ============================================================================= @@ -67,16 +68,16 @@ def mean_smape(true, pred): #preds = predict(paths, default_hparams(), back_offset=0, # n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True) t_preds = [] -for tm in range(3): +for tm in range(3): #!!!!!!!! Nmodels tf.reset_default_graph() t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63, n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True)) # ============================================================================= -# average the 3 models predictions +# average the N models predictions # ============================================================================= -preds = sum(t_preds)/3. +preds = sum(t_preds)/Nmodels # ============================================================================= diff --git a/PREPROCESS.py b/PREPROCESS.py index 6d4b1cd..bc1c8fb 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -390,6 +390,8 @@ def remove_downsample_columns(df, out_of_range_fill_value): #for now assume we do ipmutation THEN aggregation: #df = aggregate(df,sampling_period) + df*= 0. + df += 237. print(df) #SHould end up with a csv that is rows are series (each id), cols are dates @@ -443,7 +445,7 @@ def make_key_csv(df): START_DATE = '2015-01-01' #None END_DATE = '2017-12-31' #None REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful - SAMPLING_PERIOD = 'weekly' #'daily', 'weekly', 'monthly' + SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly' # ============================================================================= diff --git a/Readme.md b/Readme.md index 6ccd8e4..719bae9 100755 --- a/Readme.md +++ b/Readme.md @@ -62,7 +62,7 @@ ll data/ #no reason to expect 10000 to 11500 is good range to save out. View loss along the way python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 -python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 +python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50 --name TEST_attn_head --hparam_set=TEST_attn_head --name TEST_stacked --hparam_set=TEST_stacked diff --git a/hparams.py b/hparams.py index 901200c..21600cd 100755 --- a/hparams.py +++ b/hparams.py @@ -8,7 +8,7 @@ #train_window=283, train_window=30,#try 65 w our data to see if allows more samples through filter train_skip_first=0, - rnn_depth=267, + rnn_depth=27,#267, use_attn=False, attention_depth=64, attention_heads=1, diff --git a/trainer.py b/trainer.py index d98ffd4..fd5d3d5 100755 --- a/trainer.py +++ b/trainer.py @@ -434,6 +434,9 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model all_models: List[ModelTrainerV2] = [] + print('eval_pct', eval_pct) + print('eval_k', eval_k) + print('eval_batch_size', eval_batch_size) print('real_train_pages', real_train_pages) print('real_eval_pages', real_eval_pages) print('batch_size', batch_size) @@ -646,7 +649,7 @@ def ema_vars(model): has_best_indicator = '↑' else: has_best_indicator = ' ' - status = "%2d: Best top %.3f%s (%s)" % ( + status = "%2d: Best top SMAPE=%.3f%s (%s)" % ( epoch + 1, current_top, has_best_indicator, ",".join(["%.3f" % m.top for m in eval_smape.metrics])) From c68ad1e4989a12ededb7400055353829c7fd0fda Mon Sep 17 00:00:00 2001 From: gk Date: Thu, 12 Jul 2018 15:11:02 -0700 Subject: [PATCH 16/42] debugging, cleanup, adding other optimizers --- Adam_HD_optimizer.py | 92 ++++++++++++++++++++++++++++++ PREPROCESS.py | 8 ++- SGDN_HD_optimizer.py | 76 +++++++++++++++++++++++++ classification_models.py | 120 +++++++++++++++++++++++++++++++++++++++ drnn.py | 101 ++++++++++++++++++++++++++++++++ hparams.py | 15 +++-- input_pipe.py | 67 ++++++++++++++++++---- make_features.py | 17 +++--- model.py | 92 ++++++++++++++++++++++++++---- trainer.py | 4 +- 10 files changed, 550 insertions(+), 42 deletions(-) create mode 100644 Adam_HD_optimizer.py create mode 100644 SGDN_HD_optimizer.py create mode 100644 classification_models.py create mode 100644 drnn.py diff --git a/Adam_HD_optimizer.py b/Adam_HD_optimizer.py new file mode 100644 index 0000000..341f44b --- /dev/null +++ b/Adam_HD_optimizer.py @@ -0,0 +1,92 @@ +#Copy paste from https://github.com/zadaianchuk/HyperGradientDescent/blob/master/Adam_HD_optimizer.py +#Hypergradient Descent Optimizer + + + + +from __future__ import division + +import tensorflow as tf + +class AdamHDOptimizer(tf.train.GradientDescentOptimizer): + + def __init__(self, alpha_0, beta =10**(-7), name="HGD", mu=0.99, eps = 10**(-8),type_of_learning_rate ="global"): + super(AdamHDOptimizer, self).__init__(beta, name=name) + + self._mu = mu + self._alpha_0 = alpha_0 + self._beta = beta + self._eps = eps + self._type = type_of_learning_rate + + + def minimize(self, loss, global_step): + + # Algo params as constant tensors + mu = tf.convert_to_tensor(self._mu, dtype=tf.float32) + alpha_0=tf.convert_to_tensor(self._alpha_0, dtype=tf.float32) + beta=tf.convert_to_tensor(self._beta, dtype=tf.float32) + eps = tf.convert_to_tensor(self._eps, dtype=tf.float32) + + var_list = tf.trainable_variables() + + # create and retrieve slot variables for: + # direction of previous step + ds = [self._get_or_make_slot(var, + tf.constant(0.0, tf.float32, var.get_shape()), "direction", "direction") + for var in var_list] + # current learning_rate alpha + if self._type == "global": + alpha = self._get_or_make_slot(alpha_0, alpha_0, "learning_rate", "learning_rate") + else: + alphas = [self._get_or_make_slot(var, + tf.constant(self._alpha_0, tf.float32, var.get_shape()), "learning_rates", "learning_rates") + for var in var_list] + # moving average estimation + ms = [self._get_or_make_slot(var, + tf.constant(0.0, tf.float32, var.get_shape()), "m", "m") + for var in var_list] + vs = [self._get_or_make_slot(var, + tf.constant(0.0, tf.float32, var.get_shape()), "v", "v") + for var in var_list] + # power of mu for bias-corrected first and second moment estimate + mu_power = tf.get_variable("mu_power", shape=(), dtype=tf.float32, trainable=False, initializer=tf.constant_initializer(1.0)) + + # update moving averages of first and second moment: + grads = tf.gradients(loss, var_list) + grads_squared = [tf.square(g) for g in grads] + m_updates = [m.assign(mu*m + (1.0-mu)*g) for m, g in zip(ms, grads)] #new means + v_updates = [v.assign(mu*v + (1.0-mu)*g2) for v, g2 in zip(vs, grads_squared)] + mu_power_update = [tf.assign(mu_power,tf.multiply(mu_power,mu))] + # bais correction of the estimates + with tf.control_dependencies(v_updates+m_updates+mu_power_update): + ms_hat = [tf.divide(m,tf.constant(1.0) - mu_power) for m in ms] + vs_hat = [tf.divide(v,tf.constant(1.0) - mu_power) for v in vs] + + #update of learning rate alpha, main difference between ADAM and ADAM-HD + if self._type == "global": + hypergrad = sum([tf.reduce_sum(tf.multiply(d,g)) for d,g in zip(ds, grads)]) + alphas_update = [alpha.assign(alpha-beta*hypergrad)] + else: + hypergrads = [tf.multiply(d,g) for d,g in zip(ds, grads)] + alphas_update = [alpha.assign(alpha-beta*hypergrad) for alpha,hypergrad in zip(alphas,hypergrads)] + + # update step directions + with tf.control_dependencies(alphas_update): #we want to be sure that alphas calculated using previous step directions + ds_updates=[d.assign(-tf.divide(m, tf.sqrt(v) + self._eps)) for (m,v,d) in zip(ms_hat,vs_hat,ds)] + + # update parameters of the model + with tf.control_dependencies(ds_updates): + if self._type == "global": + dirs = [alpha*d for d in ds] + alpha_norm = alpha + else: + dirs = [alpha*d for d, alpha in zip(ds,alphas)] + alpha_norm = sum([tf.reduce_mean(alpha**2) for alpha in alphas]) + variable_updates = [v.assign_add(d) for v, d in zip(var_list, dirs)] + global_step.assign_add(1) + # add summaries (track alphas changes) + with tf.name_scope("summaries"): + with tf.name_scope("per_iteration"): + alpha_norm_sum=tf.summary.scalar("alpha", alpha_norm, collections=[tf.GraphKeys.SUMMARIES, "per_iteration"]) + return tf.group(*variable_updates) \ No newline at end of file diff --git a/PREPROCESS.py b/PREPROCESS.py index bc1c8fb..0f0c29e 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -352,6 +352,11 @@ def remove_downsample_columns(df, out_of_range_fill_value): dd = pd.DataFrame(dates).T dd['Page'] = u + #Make a good eay cae to overfit + dd*= 0. + dd += u + + #If doing imputation / other #for each series individually #... @@ -390,8 +395,7 @@ def remove_downsample_columns(df, out_of_range_fill_value): #for now assume we do ipmutation THEN aggregation: #df = aggregate(df,sampling_period) - df*= 0. - df += 237. + print(df) #SHould end up with a csv that is rows are series (each id), cols are dates diff --git a/SGDN_HD_optimizer.py b/SGDN_HD_optimizer.py new file mode 100644 index 0000000..549e42b --- /dev/null +++ b/SGDN_HD_optimizer.py @@ -0,0 +1,76 @@ +#Copy paste from https://github.com/zadaianchuk/HyperGradientDescent/blob/master/SGDN_HD_optimizer.py +#Hypergradient Descent Optimizer + + +from __future__ import division + +import tensorflow as tf + +class MomentumSGDHDOptimizer(tf.train.GradientDescentOptimizer): + + def __init__(self, alpha_0, beta =10**(-7), name="HGD", mu=0.95, type_of_learning_rate ="global"): + super(MomentumSGDHDOptimizer, self).__init__(beta, name=name) + self._mu = mu + self._alpha_0 = alpha_0 + self._beta = beta + self._type = type_of_learning_rate + + + def minimize(self, loss, global_step): + + # Algo params as constant tensors + mu = tf.convert_to_tensor(self._mu, dtype=tf.float32) + alpha_0=tf.convert_to_tensor(self._alpha_0, dtype=tf.float32) + beta=tf.convert_to_tensor(self._beta, dtype=tf.float32) + + var_list = tf.trainable_variables() + + # create and retrieve slot variables for: + # direction of previous step + ds = [self._get_or_make_slot(var, + tf.constant(0.0, tf.float32, var.get_shape()), "direction", "direction") + for var in var_list] + # current learning_rate alpha + if self._type == "global": + alpha = self._get_or_make_slot(alpha_0, alpha_0, "learning_rate", "learning_rate") + else: + alphas = [self._get_or_make_slot(var, + tf.constant(self._alpha_0, tf.float32, var.get_shape()), "learning_rates", "learning_rates") + for var in var_list] + # moving average estimation + ms = [self._get_or_make_slot(var, + tf.constant(0.0, tf.float32, var.get_shape()), "m", "m") + for var in var_list] + + # update moving averages of the stochastic gradient: + grads = tf.gradients(loss, var_list) + m_updates = [m.assign(mu*m + (1.0-mu)*g) for m, g in zip(ms, grads)] + + #update of learning rate alpha, it is the main difference between SGD with Nesterov momentum + #and its hypergradient version + if self._type == "global": + hypergrad = sum([tf.reduce_sum(tf.multiply(d,g)) for d,g in zip(ds, grads)]) + alphas_update = [alpha.assign(alpha-beta*hypergrad)] + else: + hypergrads = [tf.multiply(d,g) for d,g in zip(ds, grads)] + alphas_update = [alpha.assign(alpha-beta*hypergrad) for alpha,hypergrad in zip(alphas,hypergrads)] + + # update step directions + with tf.control_dependencies(m_updates+alphas_update): #we want to be sure that alphas calculated using previous step directions + ds_updates=[d.assign(-(mu*m + (1.0-mu)*g)) for (m,d,g) in zip(ms,ds,grads)] + + # update parameters of the model + with tf.control_dependencies(ds_updates): + if self._type == "global": + alpha_norm = alpha + variable_updates = [v.assign_add(alpha*d) for v, d in zip(var_list, ds)] + else: + alpha_norm = sum([tf.reduce_mean(alpha**2) for alpha in alphas]) + variable_updates = [v.assign_add(alpha*d) for v,d,alpha in zip(var_list, ds,alphas)] + global_step.assign_add(1) + + #add summuries (track alphas changes) + with tf.name_scope("summaries"): + with tf.name_scope("per_iteration"): + alpha_sum=tf.summary.scalar("alpha", alpha_norm, collections=[tf.GraphKeys.SUMMARIES, "per_iteration"]) + return tf.group(*variable_updates) \ No newline at end of file diff --git a/classification_models.py b/classification_models.py new file mode 100644 index 0000000..e6b633b --- /dev/null +++ b/classification_models.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +""" +Dilated LSTM Cells in Tensorflow + +From github user "code-terminator" +https://github.com/code-terminator/DilatedRNN/blob/master/models/drnn.py + +based on the paper +Dilated Recurrent Neural Networks +Nov 2017 +Chang et al. +https://arxiv.org/pdf/1710.02224.pdf +""" + +import tensorflow as tf +from drnn import multi_dRNN_with_dilations + +def _contruct_cells(hidden_structs, cell_type): + """ + This function contructs a list of cells. + """ + # error checking + if cell_type not in ["RNN", "LSTM", "GRU"]: + raise ValueError("The cell type is not currently supported.") + + # define cells + cells = [] + for hidden_dims in hidden_structs: + if cell_type == "RNN": + cell = tf.contrib.rnn.BasicRNNCell(hidden_dims) + elif cell_type == "LSTM": + cell = tf.contrib.rnn.BasicLSTMCell(hidden_dims) + elif cell_type == "GRU": + cell = tf.contrib.rnn.GRUCell(hidden_dims) + cells.append(cell) + + return cells + + +def _rnn_reformat(x, input_dims, n_steps): + """ + This function reformat input to the shape that standard RNN can take. + + Inputs: + x -- a tensor of shape (batch_size, n_steps, input_dims). + Outputs: + x_reformat -- a list of 'n_steps' tenosrs, each has shape (batch_size, input_dims). + """ + # permute batch_size and n_steps + x_ = tf.transpose(x, [1, 0, 2]) + # reshape to (n_steps*batch_size, input_dims) + x_ = tf.reshape(x_, [-1, input_dims]) + # split to get a list of 'n_steps' tensors of shape (batch_size, input_dims) + x_reformat = tf.split(x_, n_steps, 0) + + return x_reformat + + +def drnn_classification(x, + hidden_structs, + dilations, + n_steps, + n_classes, + input_dims=1, + cell_type="RNN"): + """ + This function construct a multilayer dilated RNN for classifiction. + Inputs: + x -- a tensor of shape (batch_size, n_steps, input_dims). + hidden_structs -- a list, each element indicates the hidden node dimension of each layer. + dilations -- a list, each element indicates the dilation of each layer. + n_steps -- the length of the sequence. + n_classes -- the number of classes for the classification. + input_dims -- the input dimension. + cell_type -- the type of the RNN cell, should be in ["RNN", "LSTM", "GRU"]. + + Outputs: + pred -- the prediction logits at the last timestamp and the last layer of the RNN. + 'pred' does not pass any output activation functions. + """ + # error checking + assert (len(hidden_structs) == len(dilations)) + + # reshape inputs + x_reformat = _rnn_reformat(x, input_dims, n_steps) + + # construct a list of cells + cells = _contruct_cells(hidden_structs, cell_type) + + # define dRNN structures + layer_outputs = multi_dRNN_with_dilations(cells, x_reformat, dilations) + + if dilations[0] == 1: + # dilation starts at 1, no data dependency lost + # define the output layer + weights = tf.Variable(tf.random_normal(shape=[hidden_structs[-1], + n_classes])) + bias = tf.Variable(tf.random_normal(shape=[n_classes])) + # define prediction + pred = tf.add(tf.matmul(layer_outputs[-1], weights), bias) + else: + # dilation starts not at 1, needs to fuse the output + + # define output layer + weights = tf.Variable(tf.random_normal(shape=[hidden_structs[ + -1] * dilations[0], n_classes])) + bias = tf.Variable(tf.random_normal(shape=[n_classes])) + + # concat hidden_outputs + for idx, i in enumerate(range(-dilations[0], 0, 1)): + if idx == 0: + hidden_outputs_ = layer_outputs[i] + else: + hidden_outputs_ = tf.concat( + [hidden_outputs_, layer_outputs[i]], + axis=1) + + pred = tf.add(tf.matmul(hidden_outputs_, weights), bias) + + return pred \ No newline at end of file diff --git a/drnn.py b/drnn.py new file mode 100644 index 0000000..3fb5799 --- /dev/null +++ b/drnn.py @@ -0,0 +1,101 @@ +""" +Dilated LSTM Cells in Tensorflow + +From github user "code-terminator" +https://github.com/code-terminator/DilatedRNN/blob/master/models/drnn.py + +based on the paper +Dilated Recurrent Neural Networks +Nov 2017 +Chang et al. +https://arxiv.org/pdf/1710.02224.pdf +""" + +import copy +import itertools +import numpy as np +import tensorflow as tf + +def dRNN(cell, inputs, rate, scope='default'): + """ + This function constructs a layer of dilated RNN. + Inputs: + cell -- the dilation operations is implemented independent of the RNN cell. + In theory, any valid tensorflow rnn cell should work. + inputs -- the input for the RNN. inputs should be in the form of + a list of 'n_steps' tenosrs. Each has shape (batch_size, input_dims) + rate -- the rate here refers to the 'dilations' in the orginal WaveNet paper. + scope -- variable scope. + Outputs: + outputs -- the outputs from the RNN. + """ + n_steps = len(inputs) + if rate < 0 or rate >= n_steps: + raise ValueError('The \'rate\' variable needs to be adjusted.') + print "Building layer: %s, input length: %d, dilation rate: %d, input dim: %d." % ( + scope, n_steps, rate, inputs[0].get_shape()[1]) + + # make the length of inputs divide 'rate', by using zero-padding + EVEN = (n_steps % rate) == 0 + if not EVEN: + # Create a tensor in shape (batch_size, input_dims), which all elements are zero. + # This is used for zero padding + zero_tensor = tf.zeros_like(inputs[0]) + dialated_n_steps = n_steps // rate + 1 + print "=====> %d time points need to be padded. " % ( + dialated_n_steps * rate - n_steps) + print "=====> Input length for sub-RNN: %d" % (dialated_n_steps) + for i_pad in xrange(dialated_n_steps * rate - n_steps): + inputs.append(zero_tensor) + else: + dialated_n_steps = n_steps // rate + print "=====> Input length for sub-RNN: %d" % (dialated_n_steps) + + # now the length of 'inputs' divide rate + # reshape it in the format of a list of tensors + # the length of the list is 'dialated_n_steps' + # the shape of each tensor is [batch_size * rate, input_dims] + # by stacking tensors that "colored" the same + + # Example: + # n_steps is 5, rate is 2, inputs = [x1, x2, x3, x4, x5] + # zero-padding --> [x1, x2, x3, x4, x5, 0] + # we want to have --> [[x1; x2], [x3; x4], [x_5; 0]] + # which the length is the ceiling of n_steps/rate + dilated_inputs = [tf.concat(inputs[i * rate:(i + 1) * rate], + axis=0) for i in range(dialated_n_steps)] + + # building a dialated RNN with reformated (dilated) inputs + dilated_outputs, _ = tf.contrib.rnn.static_rnn( + cell, dilated_inputs, + dtype=tf.float32, scope=scope) + + # reshape output back to the input format as a list of tensors with shape [batch_size, input_dims] + # split each element of the outputs from size [batch_size*rate, input_dims] to + # [[batch_size, input_dims], [batch_size, input_dims], ...] with length = rate + splitted_outputs = [tf.split(output, rate, axis=0) + for output in dilated_outputs] + unrolled_outputs = [output + for sublist in splitted_outputs for output in sublist] + # remove padded zeros + outputs = unrolled_outputs[:n_steps] + + return outputs + + +def multi_dRNN_with_dilations(cells, inputs, dilations): + """ + This function constucts a multi-layer dilated RNN. + Inputs: + cells -- A list of RNN cells. + inputs -- A list of 'n_steps' tensors, each has shape (batch_size, input_dims). + dilations -- A list of integers with the same length of 'cells' indicates the dilations for each layer. + Outputs: + x -- A list of 'n_steps' tensors, as the outputs for the top layer of the multi-dRNN. + """ + assert (len(cells) == len(dilations)) + x = copy.copy(inputs) + for cell, dilation in zip(cells, dilations): + scope_name = "multi_dRNN_dilation_%d" % dilation + x = dRNN(cell, x, dilation, scope=scope_name) + return x \ No newline at end of file diff --git a/hparams.py b/hparams.py index 21600cd..5eaeeef 100755 --- a/hparams.py +++ b/hparams.py @@ -3,12 +3,12 @@ # Manually selected params params_s32 = dict( - batch_size=64,#256, + batch_size=123,#256, #train_window=380, - #train_window=283, - train_window=30,#try 65 w our data to see if allows more samples through filter + train_window=283, + #train_window=30,#try 65 w our data to see if allows more samples through filter train_skip_first=0, - rnn_depth=27,#267, + rnn_depth=267, use_attn=False, attention_depth=64, attention_heads=1, @@ -49,14 +49,13 @@ # Test setting with multiple attention heads #python3 trainer.py --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --no_eval --no_forward_split --asgd_decay=0.99 --max_steps=11500 --save_from_step=200 params_TEST_attn_head = dict( - batch_size=256, - #train_window=380, + batch_size=64,#256, train_window=283, train_skip_first=0, - rnn_depth=267, + rnn_depth=27,#267, use_attn=True,#!!!!!!!!!!!!!!!! Set True attention_depth=64, - attention_heads=2,#!!!!!!!!!!!!!!!! Set True + attention_heads=1,#!!!!!! encoder_readout_dropout=0.4768781146510798, encoder_rnn_layers=1, diff --git a/input_pipe.py b/input_pipe.py index e2b8e5a..4e8b3c2 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -22,6 +22,12 @@ def __init__(self, test_set: List[tf.Tensor], train_set: List[tf.Tensor], test_s class Splitter: + """ + This is the splitter used when side_split + (vs. FakeSplitter when not side_split [when forward_split]) + + Is typical train-test split + """ def cluster_pages(self, cluster_idx: tf.Tensor): """ Shuffles pages so all user_agents of each unique pages stays together in a shuffled list @@ -62,14 +68,42 @@ def prepare_split(i): train_sampled_size = int(round(train_size * train_sampling)) test_idx = splits[i][:test_sampled_size] train_idx = complements[i][:train_sampled_size] + +# print(test_size) +# print(train_size) +# print(test_sampled_size) +# print(train_sampled_size) +# print(test_idx) +# print(train_idx) + #When doing --side_split validation option, was getting a type error + #when creating test_set, tran_set list comprehensions: change dtype here for idx + test_idx = tf.cast(test_idx, tf.int32) + train_idx = tf.cast(train_idx, tf.int32) + + test_idx = tf.Print(test_idx, ['test_idx',tf.shape(test_idx),test_idx]) + train_idx = tf.Print(train_idx, ['train_idx',tf.shape(train_idx),train_idx]) + """48354 + 96709 + 48354 + 96709 + Tensor("strided_slice_1:0", shape=(48354,), dtype=float32, device=/device:CPU:0) + Tensor("strided_slice_2:0", shape=(96709,), dtype=float32, device=/device:CPU:0)""" test_set = [tf.gather(tensor, test_idx, name=mk_name('test', tensor)) for tensor in tensors] tran_set = [tf.gather(tensor, train_idx, name=mk_name('train', tensor)) for tensor in tensors] +# print(test_set) +# print(tran_set) return Split(test_set, tran_set, test_sampled_size, train_sampled_size) self.splits = [prepare_split(i) for i in range(n_splits)] class FakeSplitter: + """ + This is the splitter used when forward_split + (vs. Splitter when not forward_split [when side_split]) + + Is typical train-test split + """ def __init__(self, tensors: List[tf.Tensor], n_splits, seed, test_sampling=1.0): total_series = tensors[0].shape[0].value N_time_series = int(round(total_series * test_sampling)) @@ -80,7 +114,7 @@ def mk_name(prefix, tensor): def prepare_split(i): idx = tf.random_shuffle(tf.range(0, N_time_series, dtype=tf.int32), seed + i) train_tensors = [tf.gather(tensor, idx, name=mk_name('shfl', tensor)) for tensor in tensors] - if test_sampling < 1.0: + if test_sampling < 1.0: #Only use subset of time series = test_sampling sampled_idx = idx[:N_time_series] test_tensors = [tf.gather(tensor, sampled_idx, name=mk_name('shfl_test', tensor)) for tensor in tensors] else: @@ -97,7 +131,7 @@ def cut(self, counts, start, end): :param counts: counts timeseries :param start: start index :param end: end index - :return: tuple (train_counts, test_counts, dow, lagged_counts) + :return: tuple (train_counts, test_counts, lagged_counts, [dow,woy,moy]) """ # Pad counts to ensure we have enough array length for prediction counts = tf.concat([counts, tf.fill([self.predict_window], np.NaN)], axis=0) @@ -178,22 +212,24 @@ def cut_train(self, counts, *args): :param args: pass-through data, will be appended to result :return: result of cut() + args """ - n_days = self.predict_window + self.train_window + n_timesteps = self.predict_window + self.train_window # How much free space we have to choose starting day - free_space = self.inp.data_days - n_days - self.back_offset - self.start_offset + free_space = self.inp.data_days - n_timesteps - self.back_offset - self.start_offset if self.verbose: + #!!!!!! doesn't really matter since this is just printout, but would need to change for WEEKLY / MONTHLY lower_train_start = self.inp.data_start + pd.Timedelta(self.start_offset, 'D') - lower_test_end = lower_train_start + pd.Timedelta(n_days, 'D') + lower_test_end = lower_train_start + pd.Timedelta(n_timesteps, 'D') lower_test_start = lower_test_end - pd.Timedelta(self.predict_window, 'D') upper_train_start = self.inp.data_start + pd.Timedelta(free_space - 1, 'D') - upper_test_end = upper_train_start + pd.Timedelta(n_days, 'D') + upper_test_end = upper_train_start + pd.Timedelta(n_timesteps, 'D') upper_test_start = upper_test_end - pd.Timedelta(self.predict_window, 'D') print(f"Free space for training: {free_space} days.") print(f" Lower train {lower_train_start}, prediction {lower_test_start}..{lower_test_end}") print(f" Upper train {upper_train_start}, prediction {upper_test_start}..{upper_test_end}") # Random starting point offset = tf.random_uniform((), self.start_offset, free_space, dtype=tf.int32, seed=self.rand_seed) - end = offset + n_days +# offset = tf.Print(offset,['offset',tf.shape(offset),offset]) + end = offset + n_timesteps # Cut all the things return self.cut(counts, offset, end) + args @@ -211,6 +247,7 @@ def cut_eval(self, counts, *args): def reject_filter(self, x_counts, y_counts, *args): """ Rejects timeseries having too many zero datapoints (more than self.max_train_empty) + [by this point, NANs would have already been converted to 0's, this is is NAN's U 0's] """ if self.verbose: print("max empty %d train %d predict" % (self.max_train_empty, self.max_predict_empty)) @@ -270,7 +307,7 @@ def make_features(self, *args): std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean))) norm_x_counts = (x_counts - mean) / std norm_y_counts = (y_counts - mean) / std - norm_lagged_counts = (lagged_counts - mean) / std #!!!!!! seems like there is some leakage in time here??? The y lagged are normalized in a way that is a function of the y data ?? + norm_lagged_counts = (lagged_counts - mean) / std # Split lagged counts to train and test x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) @@ -308,7 +345,7 @@ def make_features(self, *args): #the time INdependent features [constant per fixture] will just be tiled same way either way except diff lengths # Train features, depending on measurement frequency - x_features = tf.expand_dims(norm_x_counts, -1) # [n_days] -> [n_days, 1] + x_features = tf.expand_dims(norm_x_counts, -1) # [n_timesteps] -> [n_timesteps, 1] if self.sampling_period == 'daily': x_features = tf.concat([x_features, x_dow, x_woy], axis=1) elif self.sampling_period == 'weekly': @@ -318,7 +355,7 @@ def make_features(self, *args): #Regardess of period/frequency will have below features: x_features = tf.concat([x_features, x_lagged, # Stretch series_features to all training days - # [1, features] -> [n_days, features] + # [1, features] -> [n_timesteps, features] tf.tile(series_features, [self.train_window, 1])], axis=1) # Test features @@ -331,7 +368,7 @@ def make_features(self, *args): #Regardess of period/frequency will have below features: y_features = tf.concat([y_features, y_lagged, # Stretch series_features to all testing days - # [1, features] -> [n_days, features] + # [1, features] -> [n_timesteps, features] tf.tile(series_features, [self.predict_window, 1]) ], axis=1) @@ -407,6 +444,14 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter self.mode = mode self.verbose = verbose + + print('max_train_empty',self.max_train_empty) + print('max_predict_empty',self.max_predict_empty) + print('train_window',self.train_window) + print('predict_window',self.predict_window) + print('attn_window',self.attn_window) + + # Reserve more processing threads for eval/predict because of larger batches num_threads = 3 if mode == ModelMode.TRAIN else 6 diff --git a/make_features.py b/make_features.py index 3845f34..5eb8642 100755 --- a/make_features.py +++ b/make_features.py @@ -113,7 +113,7 @@ def single_autocorr(series, lag): def batch_autocorr(data, lag, starts, ends, threshold, backoffset=0): """ Calculate autocorrelation for batch (many time series at once) - :param data: Time series, shape [N_time_series, n_days] + :param data: Time series, shape [N_time_series, n_timesteps] :param lag: Autocorrelation lag :param starts: Start index for each series :param ends: End index for each series @@ -123,8 +123,8 @@ def batch_autocorr(data, lag, starts, ends, threshold, backoffset=0): autocorrelation value is NaN """ n_series = data.shape[0] - n_days = data.shape[1] - max_end = n_days - backoffset + n_timesteps = data.shape[1] + max_end = n_timesteps - backoffset corr = np.empty(n_series, dtype=np.float64) support = np.empty(n_series, dtype=np.float64) for i in range(n_series): @@ -149,21 +149,21 @@ def find_start_end(data: np.ndarray): """ Calculates start and end of real traffic data. Start is an index of first non-zero, non-NaN value, end is index of last non-zero, non-NaN value - :param data: Time series, shape [N_time_series, n_days] + :param data: Time series, shape [N_time_series, n_timesteps] :return: """ N_time_series = data.shape[0] - n_days = data.shape[1] + n_timesteps = data.shape[1] start_idx = np.full(N_time_series, -1, dtype=np.int32) end_idx = np.full(N_time_series, -1, dtype=np.int32) for page in range(N_time_series): # scan from start to the end - for day in range(n_days): + for day in range(n_timesteps): if not np.isnan(data[page, day]) and data[page, day] > 0: start_idx[page] = day break # reverse scan, from end to start - for day in range(n_days - 1, -1, -1): + for day in range(n_timesteps - 1, -1, -1): if not np.isnan(data[page, day]) and data[page, day] > 0: end_idx[page] = day break @@ -284,7 +284,6 @@ def run(): # Get the data df, nans, starts, ends = prepare_data(args.start, args.end, args.valid_threshold, args.data_type, args.sampling_period) - # ============================================================================= # STATIC FEATURES @@ -294,7 +293,7 @@ def run(): data_start, data_end = df.columns[0], df.columns[-1] # We have to project some date-dependent features (day of week, etc) to the future dates for prediction - features_end = data_end + pd.Timedelta(args.add_days, unit='D') + features_end = data_end + pd.Timedelta(args.add_days, unit='D') #!!!!!!!!!!! will need to change for WEEKLY MONTHLY sampled print(f"start: {data_start}, end:{data_end}, features_end:{features_end}") # Group unique pages by agents diff --git a/model.py b/model.py index 6145305..faf9fae 100755 --- a/model.py +++ b/model.py @@ -15,6 +15,17 @@ # RNN = tf.contrib.cudnn_rnn.CudnnRNNRelu + +def debug_tensor_print(tensor): + """ + Debugging mode: + Print info about a tensor in realtime + """ + tensor_list = [tensor.name, tf.shape(tensor), tensor] + tensor = tf.Print(tensor, tensor_list) + return tensor + + def default_init(seed): # replica of tf.glorot_uniform_initializer(seed=seed) return layers.variance_scaling_initializer(factor=1.0, @@ -237,7 +248,10 @@ def decode_predictions(decoder_readout, inp: InputPipe): batch_readout = tf.transpose(decoder_readout) batch_std = tf.expand_dims(inp.norm_std, -1) batch_mean = tf.expand_dims(inp.norm_mean, -1) - return batch_readout * batch_std + batch_mean + + ret = batch_readout * batch_std + batch_mean +# ret = tf.Print(ret, ['ret:',tf.shape(ret),ret, 'batch_readout:',batch_readout, 'batch_std:',batch_std, 'batch_mean',batch_mean]) + return ret def calc_loss(predictions, true_y, additional_mask=None): @@ -263,7 +277,11 @@ def calc_loss(predictions, true_y, additional_mask=None): def make_train_op(loss, ema_decay=None, prefix=None): - optimizer = COCOB() + #optimizer = COCOB() + ##train.AdamOptimizer train.GradientDescentOptimizer + optimizer = tf.train.AdamOptimizer() #!!!!!try simpler optimizer on our data. +# optimizer = tf.train.GradientDescentOptimizer(1e-9) #!!!!!try simpler optimizer on our data. + glob_step = tf.train.get_global_step() # Add regularization losses @@ -391,6 +409,9 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a fingerprint, seed=seed) # Run decoder + #... = decoder(encoder_state, attn_features, prediction_inputs, previous_y) + print('inp.norm_x[:, -1]',inp.norm_x[:, -1]) + print('inp.time_y',inp.time_y) decoder_targets, decoder_outputs = self.decoder(encoder_state, attn_features if hparams.use_attn else None, inp.time_y, inp.norm_x[:, -1]) @@ -399,7 +420,13 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.predict_window) # Get final denormalized predictions - self.predictions = decode_predictions(decoder_targets, inp) + vv = decode_predictions(decoder_targets, inp) + vv = tf.Print(vv, ['decode_predictions',vv,tf.shape(vv)]) + self.predictions = vv +# print('self.predictions (still log1p(counts))') +# print(self.predictions) + + # Calculate losses and build training op if inp.mode == ModelMode.PREDICT: @@ -416,9 +443,12 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a else: self.mae, smape_loss, self.smape, self.loss_item_count = calc_loss(self.predictions, inp.true_y, additional_mask=loss_mask) + #from calc_loss: + #mae_loss, smape_loss(true_y, predictions, weights), calc_smape_rounded(true_y, predictions, weights), tf.size(true_y) + if is_train: # Sum all losses - total_loss = smape_loss + enc_stab_loss + dec_stab_loss + enc_activation_loss + dec_activation_loss + total_loss = smape_loss + enc_stab_loss + dec_stab_loss + enc_activation_loss + dec_activation_loss #!!!!!!!! put in pinball loss instead of SMAPE when doing quantiles self.train_op, self.glob_norm, self.ema = make_train_op(total_loss, asgd_decay, prefix=graph_prefix) @@ -458,6 +488,20 @@ def build_cell(idx): else: cell = build_cell(0) + + #!!!!!! on our data, when doing side_split, encoder_state is fine [no NANs], + #but when doing walk_forward, some rows (instances) are all NANs (and the others all defined), + #then eventually every instance becomes NANs + N_nans = tf.reduce_sum(tf.cast(tf.is_nan(encoder_state), tf.float32)) + tt = tf.cast(tf.is_nan(encoder_state), tf.float32) + ff = tf.reduce_sum(tt,axis=1) + ggg = tf.cast(tf.equal(ff, ff*0.+267.), tf.float32) + N_all_NAN_encoder_states = tf.reduce_sum(ggg) + total = tf.reduce_prod(tf.shape(encoder_state)) + encoder_state = tf.Print(encoder_state,['encoder_state', tf.shape(encoder_state), encoder_state, 'N_nans', N_nans, 'total', total, 'N_all_NAN_encoder_states', N_all_NAN_encoder_states]) + + + nest.assert_same_structure(encoder_state, cell.state_size) predict_days = self.inp.predict_window assert prediction_inputs.shape[1] == predict_days @@ -470,11 +514,13 @@ def build_cell(idx): # Stop condition for decoding loop def cond_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray): + #!!!!! ???? Need to change when doing as weekly data??? return time < predict_days # FC projecting layer to get single predicted value from RNN output def project_output(tensor): - return tf.layers.dense(tensor, 1, name='decoder_output_proj', kernel_initializer=self.default_init()) + N_pctls=1 #!!!!!!!!!! quantiles + return tf.layers.dense(tensor, N_pctls, name='decoder_output_proj', kernel_initializer=self.default_init()) def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray): """ @@ -496,13 +542,15 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_ # Append previous predicted value + attention vector to input features next_input = tf.concat([prev_output, features, attn], axis=1) else: - # Append previous predicted value to input features next_input = tf.concat([prev_output, features], axis=1) + # Append previous predicted value to input features # Run RNN cell output, state = cell(next_input, prev_state) # Make prediction from RNN outputs - projected_output = project_output(output) + projected_output = project_output(output) #!!!!!!!!!! quantiles + projected_output = tf.Print(projected_output, ['time',time,'projected_output',projected_output,tf.shape(projected_output),'output',output,tf.shape(output),'state',state,tf.shape(state) ,'prev_output',prev_output,tf.shape(prev_output) ,'features',features,tf.shape(features),features[1,:18]]) + # Append step results to the buffer arrays if return_raw_outputs: array_outputs = array_outputs.write(time, output) @@ -515,13 +563,37 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_ tf.expand_dims(previous_y, -1), encoder_state, tf.TensorArray(dtype=tf.float32, size=predict_days), - tf.TensorArray(dtype=tf.float32, size=predict_days) if return_raw_outputs else tf.constant(0)] + tf.TensorArray(dtype=tf.float32, size=predict_days) if return_raw_outputs else tf.constant(0)] #!!!!!!! size= ... x N_pctls # Run the loop - _, _, _, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init) - + _time, _projected_output, _state, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init) + + + print('decoder') +# print('_time',_time) +# _time = debug_tensor_print(_time) +# print('_projected_output',_projected_output) +# _projected_output = debug_tensor_print(_projected_output) +# print('_state',_state) +# _state = debug_tensor_print(_state) + + +# targets_ta_tensor = tf.convert_to_tensor(targets_ta) +# targets_ta_tensor = tf.Print(targets_ta_tensor,[targets_ta_tensor]) +# print('targets_ta',targets_ta) +# print('outputs_ta',outputs_ta) # Get final tensors from buffer arrays targets = targets_ta.stack() # [time, batch_size, 1] -> [time, batch_size] targets = tf.squeeze(targets, axis=-1) raw_outputs = outputs_ta.stack() if return_raw_outputs else None + +# print('targets',targets) + #!!!!!!!!!!! why targets becomes NANs ????? +# why targets NANs? + targets = debug_tensor_print(targets) #63 x 245, except for first 2 prints for each new iteration it is 63 x 64 +# raw_outputs = debug_tensor_print(raw_outputs) #is 63 x 64 x 267 + +# print_list = ['_time', _time.name, tf.shape(_time), _time] +# raw_outputs = tf.Print(raw_outputs, print_list) + return targets, raw_outputs diff --git a/trainer.py b/trainer.py index fd5d3d5..470bb50 100755 --- a/trainer.py +++ b/trainer.py @@ -459,7 +459,7 @@ def create_model(features_set, sampling_period, scope, index, prefix, seed): rand_seed=seed, train_skip_first=hparams.train_skip_first, back_offset=predict_window if forward_split else 0) inp_scope.reuse_variables() - TCT = .3 #0.01 + TCT = 0.01 if side_split: side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size, mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, @@ -740,7 +740,7 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, else: predictions += cp_predictions predictions /= len(checkpoints) - offset = pd.Timedelta(back_offset, 'D') + offset = pd.Timedelta(back_offset, 'D') #!!!!!!!!!!!! need to change these lines when sampling WEEKLY MONTHLY start_prediction = inp.data_end + pd.Timedelta('1D') - offset end_prediction = start_prediction + pd.Timedelta(predict_window - 1, 'D') predictions.columns = pd.date_range(start_prediction, end_prediction) From e33979ec1d0a534b9c33d8c99416bdb1a01b224e Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 16 Jul 2018 10:51:08 -0700 Subject: [PATCH 17/42] doing median of last 4 weeks imputation for daily sampled data --- PREPROCESS.py | 113 ++++++++++++++++++++++++++++++++++++++++++++++---- Readme.md | 29 +++++++++---- 2 files changed, 128 insertions(+), 14 deletions(-) diff --git a/PREPROCESS.py b/PREPROCESS.py index 0f0c29e..0388596 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -206,12 +206,22 @@ def imputation__simple(df,imputation_method): cols = new_df.columns.tolist() new_df = new_df[cols[-1:]+cols[:-1]] new_df.reset_index(drop=True,inplace=True) - return new_df + return new_df + + + if (imputation_method == 'median') or (imputation_method == 'mean'): df = imputation__simple(df,imputation_method) - + +# if imputation_method == 'lagKmedian': +# #First get rid of the big blocks of mising values [more than 1 seasonality long] +## df = imputation_big_gaps(df) +# #Then deal with the short missing holes +# N_seasons = 4 +# df = imputation_lagKmedian(df,N_seasons) + else: raise Exception('not implemented other methods yet') @@ -233,6 +243,79 @@ def imputation__simple(df,imputation_method): +def imputation_lagKmedian_single_series(df,seasonality,N_seasons): + """ + Fill in short missing gaps by replacing missing value with: + median over last K weeks for that day. + E.g. Monday is missing, so use median count over 4 previous Mondays + + Intended for short holes. Remove longer ones in chunks of length seasonality. + + For now assuming that big chunk removal is done AFTER this step. + """ + #If the whole series is empty (all -1): + if np.alltrue(df.drop(columns='Page').values==-1): + return df + + max_block_length = seasonality - 1 + offsets = np.arange(-max_block_length,1,1) + + cols = list(df.columns) + cols = cols[:-1]#only the date cols., not the "Page" col +# N_timesteps = len(cols) +# print(cols) +# print(N_timesteps) + c = df['Page'].values + _ = df.drop(columns=['Page']) +# print(_.values) + missing_inds = np.where(_<0.)[1] + + + if missing_inds.size > 0: + #Means there are some missing values + #So scan through the data and fill in bad values, + #starting after the first real data [ignore all -1's that occur before + #time series starts for real] + first_real_ind = np.where(_>=0.)[1][0] + missing_inds = missing_inds[missing_inds>first_real_ind] +# print(missing_inds) + + for mi in missing_inds: + #Only fill in those gaps that are small holes (less than 1 seasonality) + #Check that this particular missing val is not in a missing block + #that has >= 1 seasonality size: +# print(mi) + in_block = False + + for off in offsets: +# print(_.values) + block_inds = np.arange(mi+off,mi+off+seasonality,1) +# print(block_inds) +# print(block_inds, [i in missing_inds for i in block_inds]) + if np.alltrue([i in missing_inds for i in block_inds]): + in_block = True + break +# x = _.values[0][mi+off : mi+off+seasonality] +# if np.alltrue(x==-1): + if in_block: + continue + #If it is not in a completely missing block [at least 1 value is recorded], then do lag K median: + prev_K_inds = np.arange(mi-seasonality, max(0, mi - N_seasons*(seasonality+1)), -seasonality).tolist() + t = _[_.columns[prev_K_inds]].values + t = t[t>=0] + imputed_val = np.median(t) + #If all K previous timesteps were -1, then would give nan, so set manually to -1: + if np.isnan(imputed_val):# == np.nan: + imputed_val = -1 + _[_.columns[mi]] = imputed_val + +# g = np.where(_<0.)[1] +# g = g[g>first_real_ind] +# print(g) +# print('\n'*3) + _['Page'] = c + return _ + def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, start_date=None, end_date=None): @@ -352,9 +435,23 @@ def remove_downsample_columns(df, out_of_range_fill_value): dd = pd.DataFrame(dates).T dd['Page'] = u - #Make a good eay cae to overfit - dd*= 0. - dd += u + print(i,u) + if imputation_method=='lagKmedian': + if sampling_period=='daily': + N_seasons = 4 + seasonality = 7 + elif sampling_period=='weekly': + N_seasons = 4 + seasonality = 1 + dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons) + +# if i == 58: +# v=eeeeee + #Make a good easy case to overfit + DEBUG = False + if DEBUG: + dd*= 0. + dd += u #If doing imputation / other @@ -369,6 +466,8 @@ def remove_downsample_columns(df, out_of_range_fill_value): df.reset_index(drop=True,inplace=True) + + #If we did aggregation, then above reogranization will have many of the columns Nan / -1, #since e.g. went from daily to weekly, then 6 days of the week will look empty. So remove them. if sampling_period=='weekly': @@ -389,7 +488,7 @@ def remove_downsample_columns(df, out_of_range_fill_value): #Imputation, dealing with missing seasonality blocks / out of phase - if imputation_method: + if imputation_method=='median' or imputation_method=='mean': df = do_imputation(df,imputation_method) #Could do impoutation then downsampling, vs. downsampling then imputation ... unclear which is better here in general. #for now assume we do ipmutation THEN aggregation: @@ -445,7 +544,7 @@ def make_key_csv(df): # ============================================================================= # TOTAL COMPLETED TRIPS: myDataDir = r"/Users/kocher/Desktop/forecasting/exData/totalCompletedTripsDaily" - IMPUTATION_METHOD = None #'median' #'STL' #None + IMPUTATION_METHOD = 'lagKmedian' #'median' #'STL' #'lagKmedian' #None START_DATE = '2015-01-01' #None END_DATE = '2017-12-31' #None REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful diff --git a/Readme.md b/Readme.md index 719bae9..b7fef7d 100755 --- a/Readme.md +++ b/Readme.md @@ -43,12 +43,11 @@ See also [detailed model description](how_it_works.md) GK modifications for own data: 1. PREPROCESS.py - Maximize reuse of existing architecture: just put my data in exact same format as Kaggle competition csv's -2. $source activate gktf +2. $source activate gktf. #previously set up a conda environment w/ Python 3.6, tensorflow 1.4.0, to match same versions as Kaggle solution 3. $cd ..../kaggle-web-traffic 4. $python3 PREPROCESS.py 5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period -python3 make_features.py data/vars kaggle daily full --add_days=63 -python3 make_features.py data/vars ours daily full --add_days=63 +python3 make_features.py data/vars kaggle daily full --add_days=85 #Just in case making new features cd data @@ -60,12 +59,26 @@ rm *.pkl cd .. ll data/ +python3 make_features.py data/vars ours daily full --add_days=63 +python3 make_features.py data/vars kaggle daily full --add_days=63 + + + + + #no reason to expect 10000 to 11500 is good range to save out. View loss along the way python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50 ---name TEST_attn_head --hparam_set=TEST_attn_head +python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50 --name TEST_stacked --hparam_set=TEST_stacked +--no_eval +--side_split + +python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --side_split + + + 7. $python3 PREDICT.py @@ -76,8 +89,10 @@ python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_de To do: -0. save log files to view SMAPE etc metrics during training -1. finish PREPROCESS.py to do better imputation using basic forecasting method [just use STL or Theta to fill in small gaps; otherwise remove blocks] -2. for weekly. monthly inputs, will there be issue in Kaggle code??? +0. print out the SMAPE for the actual data [current is doing SMAPE of the unrounded log1p(data) which will likely be much smaller than for real] +1. Visualizations of predictions on our own data +1. why encoder_state NANs in it [is it train predict window completeness thresholds?] + +2. for weekly. monthly inputs, need to change few places in tensorflow code 3. Prediction intervals 4. Architecture improvements \ No newline at end of file From e555ee19e3acfb12c5dc5882c83b48c3dd61814d Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 16 Jul 2018 13:19:30 -0700 Subject: [PATCH 18/42] changed preprocessing to NANs instead of -1 which fixed the SMAPE 2 issue --- .gitignore | 6 ++++-- PREPROCESS.py | 45 +++++++++++++++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index fc3b504..8be5e29 100644 --- a/.gitignore +++ b/.gitignore @@ -14,5 +14,7 @@ data/submission.csv.gz data/* */.DS_STORE .DS_STORE -images/* -ex_figs/* +images/ +ex_figs/ + +*.png diff --git a/PREPROCESS.py b/PREPROCESS.py index 0388596..f61b34a 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -85,7 +85,7 @@ def get_earliest_latest_dates(df): -def __missing_vals_distribution(df): +def __missing_vals_distribution(df,out_of_range_fill_value): """ Look at two things: - What fraction of our time series are desne vs. have >= 1 missing value? @@ -96,7 +96,7 @@ def __missing_vals_distribution(df): start/end missing, nd intermedite gaps have been filled with -1 """ - def make_cdf(v): + def make_cdf(v,out_of_range_fill_value): c = Counter(v) x = list(c.keys()) x = np.array(x) -1 #-1 to go from diff in days from present data -> gap length @@ -113,7 +113,7 @@ def make_cdf(v): #get fraction dense vs sparse: dd = df.values[:,1:] - sparse = (dd==-1).sum(axis=1) + sparse = (dd==out_of_range_fill_value).sum(axis=1) Nsparse = float((sparse>0).sum()) print(Nsparse) Ntotal = float(dd.shape[0]) @@ -127,12 +127,12 @@ def make_cdf(v): #not officially starting yet, or it got closed out. all_gaps = [] for row in dd: - inds = np.where(row!=-1)[0] + inds = np.where(row!=out_of_range_fill_value)[0] x = np.diff(inds) t = list(x[x>1]) if len(t)>0: all_gaps.extend(t) - make_cdf(all_gaps) + make_cdf(all_gaps,out_of_range_fill_value) @@ -243,7 +243,7 @@ def imputation__simple(df,imputation_method): -def imputation_lagKmedian_single_series(df,seasonality,N_seasons): +def imputation_lagKmedian_single_series(df,seasonality,N_seasons,out_of_range_fill_value): """ Fill in short missing gaps by replacing missing value with: median over last K weeks for that day. @@ -253,8 +253,8 @@ def imputation_lagKmedian_single_series(df,seasonality,N_seasons): For now assuming that big chunk removal is done AFTER this step. """ - #If the whole series is empty (all -1): - if np.alltrue(df.drop(columns='Page').values==-1): + #If the whole series is empty (all -1/NAN): + if np.alltrue(df.drop(columns='Page').values==out_of_range_fill_value): return df max_block_length = seasonality - 1 @@ -295,8 +295,6 @@ def imputation_lagKmedian_single_series(df,seasonality,N_seasons): if np.alltrue([i in missing_inds for i in block_inds]): in_block = True break -# x = _.values[0][mi+off : mi+off+seasonality] -# if np.alltrue(x==-1): if in_block: continue #If it is not in a completely missing block [at least 1 value is recorded], then do lag K median: @@ -306,7 +304,7 @@ def imputation_lagKmedian_single_series(df,seasonality,N_seasons): imputed_val = np.median(t) #If all K previous timesteps were -1, then would give nan, so set manually to -1: if np.isnan(imputed_val):# == np.nan: - imputed_val = -1 + imputed_val = out_of_range_fill_value _[_.columns[mi]] = imputed_val # g = np.where(_<0.)[1] @@ -399,6 +397,17 @@ def remove_downsample_columns(df, out_of_range_fill_value): df.drop(columns=bad_cols,inplace=True) return df + def make_index_col_left(df): + """ + Make sure order as expected by putting page col left + """ + id_col_name = 'Page' + cols = df.columns.tolist() + cols.remove(id_col_name) + + df = df[ [id_col_name] + cols] + return df + #Rename columns to be as in Kaggle data: df.rename(columns={'id':'Page'},inplace=True) @@ -413,7 +422,7 @@ def remove_downsample_columns(df, out_of_range_fill_value): latest = min(latest,end_date) idx = pd.date_range(earliest,latest) #!!!!!! fro now doing daily. When doing weekly also keep with default freq='D' . If change to 'W' alignment gets messed up. Just do daily 'D', then later can correct easily. - OUT_OF_RANGE_FILL_VALUE = -1. #np.NaN #0 #puttign as nan casts to float and cannot convert to int + OUT_OF_RANGE_FILL_VALUE = np.NaN #0 #-1 #puttign as nan casts to float and cannot convert to int #Do aggregation from DAILY --> WEEKLY before doing any kind of imputation @@ -422,6 +431,12 @@ def remove_downsample_columns(df, out_of_range_fill_value): df = aggregate_to_weekly(df, AGGREGATION_TYPE) + #Some id's [15,16] have their missing values recorded as "-1" + #vs. later id's have their missing values simply missing from the original csv + #So for those id's that actually have -1, convert to NAN first: + df.replace(-1.,np.nan,inplace=True) + + #Reorganize data for each id (->"Page") unique_ids = pd.unique(df['Page']) df_list = [] @@ -443,7 +458,7 @@ def remove_downsample_columns(df, out_of_range_fill_value): elif sampling_period=='weekly': N_seasons = 4 seasonality = 1 - dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons) + dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons,OUT_OF_RANGE_FILL_VALUE) # if i == 58: # v=eeeeee @@ -482,7 +497,7 @@ def remove_downsample_columns(df, out_of_range_fill_value): # ============================================================================= #VERBOSE = False #if VERBOSE: - # __missing_vals_distribution(df) + # __missing_vals_distribution(df,OUT_OF_RANGE_FILL_VALUE) @@ -495,6 +510,8 @@ def remove_downsample_columns(df, out_of_range_fill_value): #df = aggregate(df,sampling_period) + #Reorder some things just in case + df = make_index_col_left(df) print(df) #SHould end up with a csv that is rows are series (each id), cols are dates From ea234ba48b6a8d7dc360b3c4642eb89c0e6a3711 Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 16 Jul 2018 16:05:24 -0700 Subject: [PATCH 19/42] future predictions on our data working --- PREDICT.py | 148 ++++++++++++++++++++++++++++++++--------------------- model.py | 6 +-- trainer.py | 12 ++++- 3 files changed, 102 insertions(+), 64 deletions(-) diff --git a/PREDICT.py b/PREDICT.py index 28ca514..67ac93c 100755 --- a/PREDICT.py +++ b/PREDICT.py @@ -24,11 +24,20 @@ - -FEATURES_SET = 'arturius'# 'arturius' 'simple' 'full' +# ============================================================================= +# PARAMETRS +# ============================================================================= +FEATURES_SET = 'full'# 'arturius' 'simple' 'full' SAMPLING_PERIOD = 'daily' DATA_TYPE = 'ours' #'kaggle' #'ours' -Nmodels = 3. +Nmodels = 3 +PARAM_SETTING = 's32' #Which of the parameter settings to use [s32 is the default Kaggle one, with a few thigns modified as I want] +PARAM_SETTING_FULL_NAME = hparams.params_s32 #Which of the parameter settings to use corresponding to the PARAM_SETTING. The mapping is defined in hparams.py at the end in "sets = {'s32':params_s32,..." + + + + + # ============================================================================= @@ -62,33 +71,36 @@ def mean_smape(true, pred): # # ============================================================================= prev = df_all#.loc[:,:'2017-07-08'] -paths = [p for p in tf.train.get_checkpoint_state('data/cpt/s32').all_model_checkpoint_paths] +paths = [p for p in tf.train.get_checkpoint_state(f'data/cpt/{PARAM_SETTING}').all_model_checkpoint_paths] #tf.reset_default_graph() #preds = predict(paths, default_hparams(), back_offset=0, # n_models=3, target_model=0, seed=2, batch_size=2048, asgd=True) t_preds = [] -for tm in range(3): #!!!!!!!! Nmodels +for tm in range(Nmodels): tf.reset_default_graph() - t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63, - n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True)) + t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(PARAM_SETTING_FULL_NAME), back_offset=0, predict_window=63, + n_models=Nmodels, target_model=tm, seed=2, batch_size=2048, asgd=True)) # ============================================================================= # average the N models predictions # ============================================================================= -preds = sum(t_preds)/Nmodels +preds = sum(t_preds)/float(Nmodels) # ============================================================================= # look at missing # ============================================================================= missing_pages = prev.index.difference(preds.index) +print('missing_pages',missing_pages) # Use zeros for missing pages rmdf = pd.DataFrame(index=missing_pages, - data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns) -f_preds = preds.append(rmdf).sort_index() - + data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns) +if DATA_TYPE=='kaggle': + f_preds = preds.append(rmdf).sort_index() +elif DATA_TYPE=='ours': + f_preds = preds # Use zero for negative predictions f_preds[f_preds < 0.5] = 0 # Rouns predictions to nearest int @@ -96,14 +108,16 @@ def mean_smape(true, pred): + +print(f_preds) + # ============================================================================= # save out all predictions all days (for our stuff will be relevant, for his Kaggle maybe just needed one day) # ============================================================================= -firstK = 1000 #for size issues, for now while dev, just a few to look at -ggg = f_preds.iloc[:firstK] -ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True) - - +#firstK = 1000 #for size issues, for now while dev, just a few to look at +#ggg = f_preds.iloc[:firstK] +#ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True) +f_preds.to_csv('data/all_predictions_ours.csv.gz', compression='gzip', index=False, header=True) @@ -122,60 +136,76 @@ def mean_smape(true, pred): randomK = 1000 print('Saving figs of {} time series as checks'.format(randomK)) pagenames = list(f_preds.index) -pages = np.random.choice(pagenames, size=randomK, replace=False) +pages = np.random.choice(pagenames, size=min(randomK,len(pagenames)), replace=False) +N = pages.size for jj, page in enumerate(pages): + print(f"{jj} of {N}") plt.figure() - #prev.loc[page].fillna(0).plot(logy=True) - f_preds.loc[page].fillna(0).plot(logy=True) - #gt.loc[page].fillna(0).plot(logy=True) - f_preds.loc[page].plot(logy=True) + if DATA_TYPE=='kaggle': + prev.loc[page].fillna(0).plot()#logy=True) + f_preds.loc[page].fillna(0).plot(logy=True) + elif DATA_TYPE=='ours': + prev.loc[int(page)].plot() + f_preds.loc[page].plot() plt.title(page) + if not os.path.exists('ex_figs'): + os.mkdir('ex_figs') pathname = os.path.join('ex_figs', 'fig_{}.png'.format(jj)) plt.savefig(pathname) + plt.close() +#Cannot view on the AWS so move to local: +#zip -r ex_figs.zip ex_figs +#cp ex_figs.zip /home/...../sync -# ============================================================================= -# load, maniupalte test data -# ============================================================================= -def read_keys(): - import os.path - key_file = 'data/keys2.pkl' - if os.path.exists(key_file): - return pd.read_pickle(key_file) - else: - print('Reading keys...') - raw_keys = pd.read_csv('data/key_2.csv.zip') - print('Processing keys...') - pagedate = raw_keys.Page.str.rsplit('_', expand=True, n=1).rename(columns={0:'page',1:'date_str'}) - keys = raw_keys.drop('Page', axis=1).assign(page=pagedate.page, date=pd.to_datetime(pagedate.date_str)) - del raw_keys, pagedate - print('Pivoting keys...') - pkeys = keys.pivot(index='page', columns='date', values='Id') - print('Storing keys...') - pkeys.to_pickle(key_file) - return pkeys -keys = read_keys() - -# ============================================================================= -# -# ============================================================================= -subm_preds = f_preds.loc[:, '2017-09-13':] -assert np.all(subm_preds.index == keys.index) -assert np.all(subm_preds.columns == keys.columns) -answers = pd.DataFrame({'Id':keys.values.flatten(), 'Visits':np.round(subm_preds).astype(np.int64).values.flatten()}) -answers.to_csv('data/submission.csv.gz', compression='gzip', index=False, header=True) - - - -print('f_preds') -print(f_preds) - -print('missing') -print(prev.loc[missing_pages, '2016-12-15':]) \ No newline at end of file + + + + +#For the Kaggle data, can also output compeition submission format: +if DATA_TYPE=='kaggle': + # ============================================================================= + # load, maniupalte test data + # ============================================================================= + def read_keys(): + import os.path + key_file = 'data/keys2.pkl' + if os.path.exists(key_file): + return pd.read_pickle(key_file) + else: + print('Reading keys...') + raw_keys = pd.read_csv('data/key_2.csv.zip') + print('Processing keys...') + pagedate = raw_keys.Page.str.rsplit('_', expand=True, n=1).rename(columns={0:'page',1:'date_str'}) + keys = raw_keys.drop('Page', axis=1).assign(page=pagedate.page, date=pd.to_datetime(pagedate.date_str)) + del raw_keys, pagedate + print('Pivoting keys...') + pkeys = keys.pivot(index='page', columns='date', values='Id') + print('Storing keys...') + pkeys.to_pickle(key_file) + return pkeys + keys = read_keys() + + # ============================================================================= + # + # ============================================================================= + subm_preds = f_preds.loc[:, '2017-09-13':] + assert np.all(subm_preds.index == keys.index) + assert np.all(subm_preds.columns == keys.columns) + answers = pd.DataFrame({'Id':keys.values.flatten(), 'Visits':np.round(subm_preds).astype(np.int64).values.flatten()}) + answers.to_csv('data/submission.csv.gz', compression='gzip', index=False, header=True) + + + + print('f_preds') + print(f_preds) + + print('missing') + print(prev.loc[missing_pages, '2016-12-15':]) \ No newline at end of file diff --git a/model.py b/model.py index faf9fae..bb7b831 100755 --- a/model.py +++ b/model.py @@ -421,7 +421,7 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a # Get final denormalized predictions vv = decode_predictions(decoder_targets, inp) - vv = tf.Print(vv, ['decode_predictions',vv,tf.shape(vv)]) +# vv = tf.Print(vv, ['decode_predictions',vv,tf.shape(vv)]) self.predictions = vv # print('self.predictions (still log1p(counts))') # print(self.predictions) @@ -498,7 +498,7 @@ def build_cell(idx): ggg = tf.cast(tf.equal(ff, ff*0.+267.), tf.float32) N_all_NAN_encoder_states = tf.reduce_sum(ggg) total = tf.reduce_prod(tf.shape(encoder_state)) - encoder_state = tf.Print(encoder_state,['encoder_state', tf.shape(encoder_state), encoder_state, 'N_nans', N_nans, 'total', total, 'N_all_NAN_encoder_states', N_all_NAN_encoder_states]) +# encoder_state = tf.Print(encoder_state,['encoder_state', tf.shape(encoder_state), encoder_state, 'N_nans', N_nans, 'total', total, 'N_all_NAN_encoder_states', N_all_NAN_encoder_states]) @@ -549,7 +549,7 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_ output, state = cell(next_input, prev_state) # Make prediction from RNN outputs projected_output = project_output(output) #!!!!!!!!!! quantiles - projected_output = tf.Print(projected_output, ['time',time,'projected_output',projected_output,tf.shape(projected_output),'output',output,tf.shape(output),'state',state,tf.shape(state) ,'prev_output',prev_output,tf.shape(prev_output) ,'features',features,tf.shape(features),features[1,:18]]) +# projected_output = tf.Print(projected_output, ['time',time,'projected_output',projected_output,tf.shape(projected_output),'output',output,tf.shape(output),'state',state,tf.shape(state) ,'prev_output',prev_output,tf.shape(prev_output) ,'features',features,tf.shape(features),features[1,:18]]) # Append step results to the buffer arrays if return_raw_outputs: diff --git a/trainer.py b/trainer.py index 470bb50..eea2c9f 100755 --- a/trainer.py +++ b/trainer.py @@ -718,8 +718,16 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, pred, x, pname = sess.run([model.predictions, model.inp.true_x, model.inp.page_ix]) else: pred, pname = sess.run([model.predictions, model.inp.page_ix]) - utf_names = [str(name, 'utf-8') for name in pname] + + #Our data already has page names (id's) as ints, so this decoding won't work, so just do str(id) + try: + utf_names = [str(name, 'utf-8') for name in pname] + except UnicodeDecodeError: + utf_names = [str(name) for name in pname] + pred_df = pd.DataFrame(index=utf_names, data=np.expm1(pred)) + print(pred_df) + pred_buffer.append(pred_df) if return_x: # noinspection PyUnboundLocalVariable @@ -739,7 +747,7 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, predictions = cp_predictions else: predictions += cp_predictions - predictions /= len(checkpoints) + predictions /= len(checkpoints) #Since it is averaging predictions over the chckpoints offset = pd.Timedelta(back_offset, 'D') #!!!!!!!!!!!! need to change these lines when sampling WEEKLY MONTHLY start_prediction = inp.data_end + pd.Timedelta('1D') - offset end_prediction = start_prediction + pd.Timedelta(predict_window - 1, 'D') From 3b53e6a4223e038f6f80b18ae085186a0d3ad078 Mon Sep 17 00:00:00 2001 From: gk Date: Thu, 19 Jul 2018 01:06:10 -0700 Subject: [PATCH 20/42] multirun train-val bash script --- PREDICT.py | 16 ++--- PREPROCESS.py | 109 ++++++++++++++++++++++++++++------ RUN_MANY_TRAIN_VAL_WINDOWS.sh | 52 ++++++++++++++++ Readme.md | 20 +++++-- hparams.py | 2 +- model.py | 60 +++++++++---------- trainer.py | 15 ++--- 7 files changed, 206 insertions(+), 68 deletions(-) create mode 100644 RUN_MANY_TRAIN_VAL_WINDOWS.sh diff --git a/PREDICT.py b/PREDICT.py index 67ac93c..a3ea201 100755 --- a/PREDICT.py +++ b/PREDICT.py @@ -33,7 +33,7 @@ Nmodels = 3 PARAM_SETTING = 's32' #Which of the parameter settings to use [s32 is the default Kaggle one, with a few thigns modified as I want] PARAM_SETTING_FULL_NAME = hparams.params_s32 #Which of the parameter settings to use corresponding to the PARAM_SETTING. The mapping is defined in hparams.py at the end in "sets = {'s32':params_s32,..." - +OUTPUT_DIR = 'output' @@ -117,7 +117,7 @@ def mean_smape(true, pred): #firstK = 1000 #for size issues, for now while dev, just a few to look at #ggg = f_preds.iloc[:firstK] #ggg.to_csv('data/all_days_submission.csv.gz', compression='gzip', index=False, header=True) -f_preds.to_csv('data/all_predictions_ours.csv.gz', compression='gzip', index=False, header=True) +f_preds.to_csv(f'{OUTPUT_DIR}/all_predictions_ours.csv.gz', compression='gzip', index=False, header=True) @@ -148,17 +148,17 @@ def mean_smape(true, pred): prev.loc[int(page)].plot() f_preds.loc[page].plot() plt.title(page) - if not os.path.exists('ex_figs'): - os.mkdir('ex_figs') - pathname = os.path.join('ex_figs', 'fig_{}.png'.format(jj)) + if not os.path.exists(OUTPUT_DIR): + os.mkdir(OUTPUT_DIR) + pathname = os.path.join(OUTPUT_DIR, 'fig_{}.png'.format(jj)) plt.savefig(pathname) plt.close() #Cannot view on the AWS so move to local: -#zip -r ex_figs.zip ex_figs -#cp ex_figs.zip /home/...../sync +#zip -r output.zip output +#cp output.zip /home/...../sync @@ -200,7 +200,7 @@ def read_keys(): assert np.all(subm_preds.index == keys.index) assert np.all(subm_preds.columns == keys.columns) answers = pd.DataFrame({'Id':keys.values.flatten(), 'Visits':np.round(subm_preds).astype(np.int64).values.flatten()}) - answers.to_csv('data/submission.csv.gz', compression='gzip', index=False, header=True) + answers.to_csv(f'{OUTPUT_DIR}/submission.csv.gz', compression='gzip', index=False, header=True) diff --git a/PREPROCESS.py b/PREPROCESS.py index f61b34a..00c4695 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -23,6 +23,7 @@ from collections import Counter from copy import deepcopy +from scipy.signal import medfilt def load_my_data(myDataDir): @@ -205,7 +206,7 @@ def imputation__simple(df,imputation_method): #Put "Page" at left cols = new_df.columns.tolist() new_df = new_df[cols[-1:]+cols[:-1]] - new_df.reset_index(drop=True,inplace=True) + new_df.reset_index(drop=True,inplace=True) return new_df @@ -316,6 +317,89 @@ def imputation_lagKmedian_single_series(df,seasonality,N_seasons,out_of_range_fi + +def data_augmentation(df, jitter_pcts_list=[.05,.10,.15], do_low_pass_filter=True, additive_trend=False): + """ + Do some basic data augmentation with a few different options. + Then output Cartesian product of all these variations as the final set. + + Any missing point (NAN) will be left as NAN, but others will be modified in some way + """ + + def jitter__uniform_pcts(df, jitter_pcts_list, N_perturbations): + """ + On each observed value (non-NAN), add +/- jitter up to some + percent of the observed value. Either positive or negative. + If the count is small, then just leave it, otherwise perturb + (always leaving counts positive). + """ + page = df['Page'].values[0] + cols = df.columns + x = df.drop(columns=['Page']).values[0] + dflist = [] + for uniform_jitter in jitter_pcts_list: + ids = [str(page) + '__unijit{}_'.format(str(uniform_jitter)) + str(kk+1) for kk in range(N_perturbations)] + _ = np.zeros((N_perturbations,x.size)) + f = lambda i: np.random.uniform(-uniform_jitter*i,uniform_jitter*i) if not np.isnan(i) else np.nan + for kk in range(N_perturbations): + _[kk] = [i + f(i) for i in x]# ).reshape(1,x.size) + d = {cols[i]:_[:,i] for i in range(x.size)} + df = pd.DataFrame(data=d) + df['Page'] = ids + dflist += [df] + df = pd.concat(dflist,axis=0) + df.reset_index(drop=True,inplace=True) + return df + + + def add_trend(df, slopes_list): + """ + On each observed value (non-NAN), add +/- X_t, where X_t is from a + linear trend with given slope, applied across whole series. + + Could change the character of the time series a lot so maybe not so good? + """ + return df + + def low_pass_filter(df, filter_type, kernel_size): + """ + Low-pass filter the data with some kind of kernel, with some kernel size. + + Is going to smooth out the data a lot, not sure if this will change the + time series too much to be good?? + """ + page = df['Page'].values[0] + cols = df.columns + x = df.drop(columns=['Page']).values[0] + ids = [str(page) + '__{0}{1}'.format(filter_type.func_name,kernel_size)] + y = filter_type(x,kernel_size=kernel_size) + _ = np.where(np.invert(np.isnan(x)),y,np.nan) + d = {cols[i]:_[i] for i in range(x.size)} + df = pd.DataFrame(data=d,index=[0]) + df['Page'] = ids + return df + + + #For each method, do 5x random + N_perturbations = 5 + dflist = [df] + if jitter_pcts_list: + dflist += [jitter__uniform_pcts(df, jitter_pcts_list, N_perturbations)] + if do_low_pass_filter: + filter_type = medfilt + kernel_size = 7 + dflist += [low_pass_filter(df, filter_type, kernel_size)] + if additive_trend: + slopes_list = [-1.1, 1.1] + dflist += [add_trend(df, slopes_list)] +# if autoencoder: +# #Run through autoencoder, do VAE, get resulting series + + df = pd.concat(dflist,axis=0) + return df + + + def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, start_date=None, end_date=None): """ Take my data and format it exactly as needed to use for the Kaggle seq2seq @@ -450,7 +534,7 @@ def make_index_col_left(df): dd = pd.DataFrame(dates).T dd['Page'] = u - print(i,u) + print(i,u, 'of {}'.format(unique_ids[-1])) if imputation_method=='lagKmedian': if sampling_period=='daily': N_seasons = 4 @@ -460,18 +544,8 @@ def make_index_col_left(df): seasonality = 1 dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons,OUT_OF_RANGE_FILL_VALUE) -# if i == 58: -# v=eeeeee - #Make a good easy case to overfit - DEBUG = False - if DEBUG: - dd*= 0. - dd += u - - - #If doing imputation / other - #for each series individually - #... + #Data augmentation + dd = data_augmentation(dd) df_list.append(dd) @@ -481,8 +555,6 @@ def make_index_col_left(df): df.reset_index(drop=True,inplace=True) - - #If we did aggregation, then above reogranization will have many of the columns Nan / -1, #since e.g. went from daily to weekly, then 6 days of the week will look empty. So remove them. if sampling_period=='weekly': @@ -566,7 +638,7 @@ def make_key_csv(df): END_DATE = '2017-12-31' #None REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly' - + RANDOM_SEED = None # ============================================================================= # MAIN @@ -578,6 +650,9 @@ def make_key_csv(df): print('myDataDir',myDataDir) print('SAMPLING_PERIOD',SAMPLING_PERIOD) + #Seed random number generator in case of doing data augmentation: + np.random.seed(RANDOM_SEED) + #Load df = load_my_data(myDataDir) diff --git a/RUN_MANY_TRAIN_VAL_WINDOWS.sh b/RUN_MANY_TRAIN_VAL_WINDOWS.sh new file mode 100644 index 0000000..6a21d8d --- /dev/null +++ b/RUN_MANY_TRAIN_VAL_WINDOWS.sh @@ -0,0 +1,52 @@ +#chmod 777 RUN_MANY_TRAIN_VAL_WINDOWS.sh +#./RUN_MANY_TRAIN_VAL_WINDOWS.sh +#Run over many train_window - predict_window length pairs +#Compile results, analyze performance as (2D) heatmap + + +#TRAIN_WINDOWS="1 2 5 10 20 50 100 150 200 250 300" +#VALIDATION_WINDOWS="1 2 5 10 20 50 100" +#e.g. TRAIN_WINDOWS has NAN SMAPE -> 2 problem with as big as size 50 + +TRAIN_WINDOWS="100 150" +VALIDATION_WINDOWS="33 66" +#just to test... +MAX_EPOCH=2 + + +#One time clean up +cd data +rm -R vars/ +rm -R cpt/ +rm -R cpt_tmp/ +rm -R logs/ +rm *.pkl +cd .. +#ls -l data/ + + +for v in $VALIDATION_WINDOWS; do + #Clea up between feature sets + cd data + rm -R vars/ + rm -R cpt_tmp/ + rm *.pkl + cd .. + #Create the features for our data + echo 'running make_features.py with --add_days='$v + python3 make_features.py data/vars ours daily full --add_days=$v + for t in $TRAIN_WINDOWS; do + echo 'validation window = '$v 'validation window = '$t + echo 'running trainer.py' + NAME="val$v-train$t" + echo 'NAAME='$NAME + python3 trainer.py full daily --name $NAME --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=$MAX_EPOCH --patience=5 --verbose --predict_window=$v --train_window=$t + done +done + + + +#Now that all training is done, can run predictions +#python3 PREDICT.py !!!!!make window sizes as params + +#now make heatmaps of performance: \ No newline at end of file diff --git a/Readme.md b/Readme.md index b7fef7d..240a090 100755 --- a/Readme.md +++ b/Readme.md @@ -47,7 +47,7 @@ GK modifications for own data: 3. $cd ..../kaggle-web-traffic 4. $python3 PREPROCESS.py 5. $python3 make_features.py data/vars kaggle daily arturius --add_days=63 #need to specify the data directory (data/vars) and feature_set {kaggle, simple, full, full_w_context} depending on using default Arturius kaggle vs. own custom for this application; and specify sampling period -python3 make_features.py data/vars kaggle daily full --add_days=85 +python3 make_features.py data/vars kaggle daily full --add_days=63 #Just in case making new features cd data @@ -74,8 +74,14 @@ python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head --no_eval --side_split +--max_epoch=1000 +--save_from_step=1 +--verbose + + +python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50 --train_window=100 -python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --side_split +python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=1000 --patience=50 --verbose --side_split @@ -90,9 +96,13 @@ python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_de To do: 0. print out the SMAPE for the actual data [current is doing SMAPE of the unrounded log1p(data) which will likely be much smaller than for real] -1. Visualizations of predictions on our own data -1. why encoder_state NANs in it [is it train predict window completeness thresholds?] +0. SMAPEs on ground truth 2018 +1. why encoder_state NANs in it for small train window lengths [is it train/predict window completeness thresholds?] +1. performance heatmaps 2. for weekly. monthly inputs, need to change few places in tensorflow code 3. Prediction intervals -4. Architecture improvements \ No newline at end of file +4. Architecture improvements: his is not the usual encoder-decoder: add C context vector to every decoder step +4. bi, di, MH +5. custom attention +6. VAE aug \ No newline at end of file diff --git a/hparams.py b/hparams.py index 5eaeeef..0bfd3e6 100755 --- a/hparams.py +++ b/hparams.py @@ -5,7 +5,7 @@ params_s32 = dict( batch_size=123,#256, #train_window=380, - train_window=283, +# train_window=283,#now make this a bash input to do train-validation window size performance heatmaps #train_window=30,#try 65 w our data to see if allows more samples through filter train_skip_first=0, rnn_depth=267, diff --git a/model.py b/model.py index bb7b831..c2b228d 100755 --- a/model.py +++ b/model.py @@ -414,15 +414,16 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a print('inp.time_y',inp.time_y) decoder_targets, decoder_outputs = self.decoder(encoder_state, attn_features if hparams.use_attn else None, - inp.time_y, inp.norm_x[:, -1]) + inp.time_y, inp.norm_x[:, -1]) #in decoder function def: inp.time_y = "prediction_inputs"; inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd)) # Decoder activation losses dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.predict_window) dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.predict_window) # Get final denormalized predictions - vv = decode_predictions(decoder_targets, inp) + self.predictions = decode_predictions(decoder_targets, inp) +# vv = decode_predictions(decoder_targets, inp) # vv = tf.Print(vv, ['decode_predictions',vv,tf.shape(vv)]) - self.predictions = vv +# self.predictions = vv # print('self.predictions (still log1p(counts))') # print(self.predictions) @@ -492,19 +493,19 @@ def build_cell(idx): #!!!!!! on our data, when doing side_split, encoder_state is fine [no NANs], #but when doing walk_forward, some rows (instances) are all NANs (and the others all defined), #then eventually every instance becomes NANs - N_nans = tf.reduce_sum(tf.cast(tf.is_nan(encoder_state), tf.float32)) - tt = tf.cast(tf.is_nan(encoder_state), tf.float32) - ff = tf.reduce_sum(tt,axis=1) - ggg = tf.cast(tf.equal(ff, ff*0.+267.), tf.float32) - N_all_NAN_encoder_states = tf.reduce_sum(ggg) - total = tf.reduce_prod(tf.shape(encoder_state)) +# N_nans = tf.reduce_sum(tf.cast(tf.is_nan(encoder_state), tf.float32)) +# tt = tf.cast(tf.is_nan(encoder_state), tf.float32) +# ff = tf.reduce_sum(tt,axis=1) +# ggg = tf.cast(tf.equal(ff, ff*0.+267.), tf.float32) +# N_all_NAN_encoder_states = tf.reduce_sum(ggg) +# total = tf.reduce_prod(tf.shape(encoder_state)) # encoder_state = tf.Print(encoder_state,['encoder_state', tf.shape(encoder_state), encoder_state, 'N_nans', N_nans, 'total', total, 'N_all_NAN_encoder_states', N_all_NAN_encoder_states]) nest.assert_same_structure(encoder_state, cell.state_size) - predict_days = self.inp.predict_window - assert prediction_inputs.shape[1] == predict_days + predict_timesteps = self.inp.predict_window + assert prediction_inputs.shape[1] == predict_timesteps # [batch_size, time, input_depth] -> [time, batch_size, input_depth] inputs_by_time = tf.transpose(prediction_inputs, [1, 0, 2]) @@ -513,19 +514,18 @@ def build_cell(idx): return_raw_outputs = self.hparams.decoder_stability_loss > 0.0 or self.hparams.decoder_activation_loss > 0.0 # Stop condition for decoding loop - def cond_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray): - #!!!!! ???? Need to change when doing as weekly data??? - return time < predict_days + def cond_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray): + return timestep < predict_timesteps # FC projecting layer to get single predicted value from RNN output def project_output(tensor): N_pctls=1 #!!!!!!!!!! quantiles return tf.layers.dense(tensor, N_pctls, name='decoder_output_proj', kernel_initializer=self.default_init()) - def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray): + def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray): """ Main decoder loop - :param time: Day number + :param timestep: timestep number :param prev_output: Output(prediction) from previous step :param prev_state: RNN state tensor from previous step :param array_targets: Predictions, each step will append new value to this array @@ -533,12 +533,12 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_ :return: """ # RNN inputs for current step - features = inputs_by_time[time] + features = inputs_by_time[timestep] # [batch, predict_window, readout_depth * n_heads] -> [batch, readout_depth * n_heads] if attn_features is not None: # [batch_size, 1] + [batch_size, input_depth] - attn = attn_features[:, time, :] + attn = attn_features[:, timestep, :] # Append previous predicted value + attention vector to input features next_input = tf.concat([prev_output, features, attn], axis=1) else: @@ -549,28 +549,28 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_ output, state = cell(next_input, prev_state) # Make prediction from RNN outputs projected_output = project_output(output) #!!!!!!!!!! quantiles -# projected_output = tf.Print(projected_output, ['time',time,'projected_output',projected_output,tf.shape(projected_output),'output',output,tf.shape(output),'state',state,tf.shape(state) ,'prev_output',prev_output,tf.shape(prev_output) ,'features',features,tf.shape(features),features[1,:18]]) +# projected_output = tf.Print(projected_output, ['timestep',timestep,'projected_output',projected_output,tf.shape(projected_output),'output',output,tf.shape(output),'state',state,tf.shape(state) ,'prev_output',prev_output,tf.shape(prev_output) ,'features',features,tf.shape(features),features[1,:18]]) # Append step results to the buffer arrays if return_raw_outputs: - array_outputs = array_outputs.write(time, output) - array_targets = array_targets.write(time, projected_output) - # Increment time and return - return time + 1, projected_output, state, array_targets, array_outputs + array_outputs = array_outputs.write(timestep, output) + array_targets = array_targets.write(timestep, projected_output) + # Increment timestep and return + return timestep + 1, projected_output, state, array_targets, array_outputs #!!!!!! quantiles: projected_output will be diff dims # Initial values for loop loop_init = [tf.constant(0, dtype=tf.int32), tf.expand_dims(previous_y, -1), encoder_state, - tf.TensorArray(dtype=tf.float32, size=predict_days), - tf.TensorArray(dtype=tf.float32, size=predict_days) if return_raw_outputs else tf.constant(0)] #!!!!!!! size= ... x N_pctls + tf.TensorArray(dtype=tf.float32, size=predict_timesteps), + tf.TensorArray(dtype=tf.float32, size=predict_timesteps) if return_raw_outputs else tf.constant(0)] #!!!!!!! size= ... x N_pctls # Run the loop - _time, _projected_output, _state, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init) + _timestep, _projected_output, _state, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init) print('decoder') -# print('_time',_time) -# _time = debug_tensor_print(_time) +# print('_timestep',_timestep) +# _timestep = debug_tensor_print(_timestep) # print('_projected_output',_projected_output) # _projected_output = debug_tensor_print(_projected_output) # print('_state',_state) @@ -590,10 +590,10 @@ def loop_fn(time, prev_output, prev_state, array_targets: tf.TensorArray, array_ # print('targets',targets) #!!!!!!!!!!! why targets becomes NANs ????? # why targets NANs? - targets = debug_tensor_print(targets) #63 x 245, except for first 2 prints for each new iteration it is 63 x 64 +# targets = debug_tensor_print(targets) #63 x 245, except for first 2 prints for each new iteration it is 63 x 64 # raw_outputs = debug_tensor_print(raw_outputs) #is 63 x 64 x 267 -# print_list = ['_time', _time.name, tf.shape(_time), _time] +# print_list = ['_timestep', _timestep.name, tf.shape(_timestep), _timestep] # raw_outputs = tf.Print(raw_outputs, print_list) return targets, raw_outputs diff --git a/trainer.py b/trainer.py index eea2c9f..d43c846 100755 --- a/trainer.py +++ b/trainer.py @@ -400,14 +400,14 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model seed=None, logdir='data/logs', max_epoch=100, patience=2, train_sampling=1.0, eval_sampling=1.0, eval_memsize=5, gpu=0, gpu_allow_growth=False, save_best_model=False, forward_split=False, write_summaries=False, verbose=False, asgd_decay=None, tqdm=True, - side_split=True, max_steps=None, save_from_step=None, do_eval=True, predict_window=63): + side_split=True, max_steps=None, save_from_step=None, do_eval=True, predict_window=63, train_window=283): eval_k = int(round(26214 * eval_memsize / n_models)) eval_batch_size = int( eval_k / (hparams.rnn_depth * hparams.encoder_rnn_layers)) # 128 -> 1024, 256->512, 512->256 eval_pct = 0.1 batch_size = hparams.batch_size - train_window = hparams.train_window +# train_window = hparams.train_window tf.reset_default_graph() if seed: tf.set_random_seed(seed) @@ -673,15 +673,14 @@ def ema_vars(model): def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1, - target_model=0, asgd=False, seed=1, batch_size=1024): + target_model=0, asgd=False, seed=1, batch_size=1024, train_window=283): with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): inp = VarFeeder.read_vars("data/vars") - pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, #!!!!!!!!!!!!!!!! page_features - n_epoch=1, verbose=verbose, + pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, train_completeness_threshold=0.01, predict_window=predict_window, - predict_completeness_threshold=0.0, train_window=hparams.train_window, + predict_completeness_threshold=0.0, train_window=train_window,#hparams.train_window, back_offset=back_offset) asgd_decay = 0.99 if asgd else None if n_models == 1: @@ -754,7 +753,8 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, predictions.columns = pd.date_range(start_prediction, end_prediction) if return_x: x = pd.concat(x_buffer) - start_data = inp.data_end - pd.Timedelta(hparams.train_window - 1, 'D') - back_offset + #start_data = inp.data_end - pd.Timedelta(hparams.train_window - 1, 'D') - back_offset + start_data = inp.data_end - pd.Timedelta(train_window - 1, 'D') - back_offset #!!!!!now for heatmaps end_data = inp.data_end - back_offset x.columns = pd.date_range(start_data, end_data) return predictions, x @@ -790,6 +790,7 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, parser.add_argument('--max_steps', type=int, help="Stop training after max steps") parser.add_argument('--save_from_step', type=int, help="Save model on each evaluation (10 evals per epoch), starting from this step") parser.add_argument('--predict_window', default=63, type=int, help="Number of days to predict") + parser.add_argument('--train_window', default=283, type=int, help="Train window chunk size")#Now that we want to do train size - val size performance heatmaps args = parser.parse_args() param_dict = dict(vars(args)) From 96132942d4d9ee686d8b6aee2a7e4b44d64c4c74 Mon Sep 17 00:00:00 2001 From: gk Date: Thu, 19 Jul 2018 01:50:12 -0700 Subject: [PATCH 21/42] added year-2010 / (2020-2010) as feature --- input_pipe.py | 49 +++++++++++++++++++++++++++--------------------- make_features.py | 34 +++++++++++++++++---------------- 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/input_pipe.py b/input_pipe.py index 4e8b3c2..69d1d52 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -131,7 +131,7 @@ def cut(self, counts, start, end): :param counts: counts timeseries :param start: start index :param end: end index - :return: tuple (train_counts, test_counts, lagged_counts, [dow,woy,moy]) + :return: tuple (train_counts, test_counts, lagged_counts, [dow,woy,moy,year]) """ # Pad counts to ensure we have enough array length for prediction counts = tf.concat([counts, tf.fill([self.predict_window], np.NaN)], axis=0) @@ -155,6 +155,8 @@ def cut(self, counts, start, end): # cropped_dow = 0*cropped_moy # cropped_woy = 0*cropped_moy + #ANd use year as a feature to get long term trend + cropped_year = self.inp.year[start:end] # ============================================================================= @@ -192,11 +194,11 @@ def cut(self, counts, start, end): if self.features_set=='arturius' or self.features_set=='full':#for now, for full just do sam [include lagged] if self.sampling_period=='daily': - return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy + return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy, cropped_year if self.sampling_period=='weekly': - return x_counts, y_counts, lagged_count, cropped_woy + return x_counts, y_counts, lagged_count, cropped_woy, cropped_year if self.sampling_period=='monthly': - return x_counts, y_counts, lagged_count, cropped_moy + return x_counts, y_counts, lagged_count, cropped_moy, cropped_year # elif self.features_set=='full': # return aaaaaaaaaaa #can drop lagged @@ -273,22 +275,22 @@ def make_features(self, *args): print(args) if self.features_set == 'arturius': if self.sampling_period == 'daily': - x_counts, y_counts, lagged_counts, dow, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + x_counts, y_counts, lagged_counts, dow, woy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args elif self.sampling_period == 'weekly': - x_counts, y_counts, lagged_counts, woy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + x_counts, y_counts, lagged_counts, woy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args elif self.sampling_period == 'monthly': - x_counts, y_counts, lagged_counts, moy, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + x_counts, y_counts, lagged_counts, moy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args #For now just use the same ... # count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance) elif self.features_set == 'full': if self.sampling_period == 'daily': - x_counts, y_counts, lagged_counts, dow, woy, page_ix, count_median, year_autocorr, quarter_autocorr,\ + x_counts, y_counts, lagged_counts, dow, woy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\ count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args elif self.sampling_period == 'weekly': - x_counts, y_counts, lagged_counts, woy, page_ix, count_median, year_autocorr, quarter_autocorr,\ + x_counts, y_counts, lagged_counts, woy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\ count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args elif self.sampling_period == 'monthly': - x_counts, y_counts, lagged_counts, moy, page_ix, count_median, year_autocorr, quarter_autocorr,\ + x_counts, y_counts, lagged_counts, moy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\ count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args # ============================================================================= @@ -302,6 +304,11 @@ def make_features(self, *args): elif self.sampling_period == 'monthly': x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0) + #Already did a manual kind of scaling for year in make_features.py so don't need to normalize here... + x_year, y_year = tf.split(year, [self.train_window, self.predict_window], axis=0) + x_year = tf.expand_dims(x_year,axis=1) + y_year = tf.expand_dims(y_year,axis=1) + # Normalize counts mean = tf.reduce_mean(x_counts) std = tf.sqrt(tf.reduce_mean(tf.squared_difference(x_counts, mean))) @@ -347,11 +354,11 @@ def make_features(self, *args): # Train features, depending on measurement frequency x_features = tf.expand_dims(norm_x_counts, -1) # [n_timesteps] -> [n_timesteps, 1] if self.sampling_period == 'daily': - x_features = tf.concat([x_features, x_dow, x_woy], axis=1) + x_features = tf.concat([x_features, x_dow, x_woy, x_year], axis=1) elif self.sampling_period == 'weekly': - x_features = tf.concat([x_features, x_woy], axis=1) + x_features = tf.concat([x_features, x_woy, x_year], axis=1) elif self.sampling_period == 'monthly': - x_features = tf.concat([x_features, x_moy], axis=1) + x_features = tf.concat([x_features, x_moy, x_year], axis=1) #Regardess of period/frequency will have below features: x_features = tf.concat([x_features, x_lagged, # Stretch series_features to all training days @@ -360,11 +367,11 @@ def make_features(self, *args): # Test features if self.sampling_period == 'daily': - y_features = tf.concat([y_dow, y_woy], axis=1) + y_features = tf.concat([y_dow, y_woy, y_year], axis=1) elif self.sampling_period == 'weekly': - y_features = y_woy + 0 + y_features = tf.concat([y_woy, y_year], axis=1) elif self.sampling_period == 'monthly': - y_features = y_moy + 0 + y_features = tf.concat([y_moy, y_year], axis=1) #Regardess of period/frequency will have below features: y_features = tf.concat([y_features, y_lagged, # Stretch series_features to all testing days @@ -500,7 +507,7 @@ def page_features(inp: VarFeeder, features_set): So do not need to pass in here the time-varying ones like day of week, month of year, lagged, etc. - DO NOT return dow, woy, moy + DO NOT return dow, woy, moy, year """ if features_set=='arturius': @@ -509,8 +516,8 @@ def page_features(inp: VarFeeder, features_set): inp.count_pctl_100 ) - elif features_set=='simple': - raise Exception('not ready yet') +# elif features_set=='simple': +# raise Exception('not ready yet') elif features_set=='full': d = (inp.counts, @@ -525,7 +532,7 @@ def page_features(inp: VarFeeder, features_set): inp.count_pctl_100, inp.count_variance) - elif features_set=='full_w_context': - raise Exception('not ready yet') +# elif features_set=='full_w_context': +# raise Exception('not ready yet') return d \ No newline at end of file diff --git a/make_features.py b/make_features.py index 5eb8642..d1061db 100755 --- a/make_features.py +++ b/make_features.py @@ -346,6 +346,11 @@ def run(): WEEK_NUMBER_METHOD = 'floor7'#'pandas' #'floor7' WEEK_NUMBER_MAX = 53. #52. + REFERENCE_FIRST_YEAR = 2010 #Use the year number as a feature, calculated as (year-REF_1)/(REF_2 - REF_1) to put on smaller scale + #(must be careful about normalizing on the fly within window, where depending on window size, most observations will have same year number, and 0 variance) + #so do this manual scaling instead of standard mean-var scaling + REFERENCE_LAST_YEAR = 2020 + features_times = pd.date_range(data_start, features_end, freq='D') @@ -364,9 +369,6 @@ def run(): woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) - #To catch longer term trending data, can also include year number. [depending on size of train / prediction windows and random sampling boundaries could be same value over whole series] - year_number = features_times.year - if args.sampling_period=='weekly': #index of week number, when sampling at WEEKLY level (this is different than above) @@ -379,7 +381,6 @@ def run(): year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) - year_number = features_times.year if args.sampling_period=='monthly': #month index (only used if sampling monthly) @@ -387,8 +388,9 @@ def run(): period = 12. / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year moy_norm = features_times.month.values / period #not sure if by default this starts on Monday vs Sunday moy = np.stack([np.cos(moy_norm), np.sin(moy_norm)], axis=-1) - year_number = features_times.year + #To catch longer term trending data, can also include year number. [depending on size of train / prediction windows and random sampling boundaries could be same value over whole series] + year = (features_times.year - REFERENCE_FIRST_YEAR)/float(REFERENCE_LAST_YEAR-REFERENCE_FIRST_YEAR) # Assemble indices for quarterly lagged data @@ -420,12 +422,12 @@ def run(): count_pctl_100=percentiles[5],#max #!!!!!!!!!!!!!!!! just to see what happens: apend one of my features. ) - elif args.features_set == 'simple': - tensors = dict( - counts=df, - count_median=count_median,#this is just the median feature, can put in others too - #dow=dow, - ) +# elif args.features_set == 'simple': +# tensors = dict( +# counts=df, +# count_median=count_median,#this is just the median feature, can put in others too +# #dow=dow, +# ) elif (args.features_set == 'full') or (args.features_set == 'full_w_context'): tensors = dict( @@ -450,16 +452,13 @@ def run(): # series_length=series_length,#length of series [number of samples] to get idea of how much history a series has #number nonzero #Other time-frequency/scale features + #tsfresh features #... - - ) else: raise Exception(f'features_set must be specified\nOne of ["arturius","simple","full","full_w_context"]') - - if args.sampling_period=='daily': tensors['dow']=dow @@ -470,8 +469,11 @@ def run(): tensors['moy']=moy else: raise Exception('Must specify correct sampling period') + + #Also use year number as feature + tensors['year']=year - + """#If provide other info based on e.g. new location (any features that are not derived purely from the time series) if args.features_set == 'full_w_context': tensors['country'] = asdasdasd From a6ededdb106a0fa3df9cd25faa93dc645d4c3416 Mon Sep 17 00:00:00 2001 From: gk Date: Thu, 19 Jul 2018 03:21:12 -0700 Subject: [PATCH 22/42] added encoder-decoder context to every decoder timestep --- hparams.py | 7 +++++++ model.py | 25 +++++++++++++++++++------ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/hparams.py b/hparams.py index 0bfd3e6..9b694b3 100755 --- a/hparams.py +++ b/hparams.py @@ -40,6 +40,13 @@ encoder_activation_loss=1e-06, # max 0.001 decoder_stability_loss=0.0, # max 100 decoder_activation_loss=5e-06, # max 0.001 + + #Kaggle model architecture is more like a basic many-to-many RNN, not really a + #usual encoder-decoder architecture since computational graph does not have + #connections from encoded representation to each decoder time step (only to 1st + #decoder timestep). Set below to True to use encoder-decoder; set False to use + #Kaggle architecture not really true encoder-decoder + RECURSIVE_W_ENCODER_CONTEXT=True, ) diff --git a/model.py b/model.py index c2b228d..b315e13 100755 --- a/model.py +++ b/model.py @@ -16,6 +16,8 @@ + + def debug_tensor_print(tensor): """ Debugging mode: @@ -237,7 +239,7 @@ def smape_loss(true, predicted, weights): return tf.losses.compute_weighted_loss(smape, weights, loss_collection=None) -def decode_predictions(decoder_readout, inp: InputPipe): +def decode_predictions(decoder_readout, inp: InputPipe):#!!!!!quantiles """ Converts normalized prediction values to log1p(pageviews), e.g. reverts normalization :param decoder_readout: Decoder output, shape [n_days, batch] @@ -245,7 +247,7 @@ def decode_predictions(decoder_readout, inp: InputPipe): :return: """ # [n_days, batch] -> [batch, n_days] - batch_readout = tf.transpose(decoder_readout) + batch_readout = tf.transpose(decoder_readout) #!!!!!quantiles batch_std = tf.expand_dims(inp.norm_std, -1) batch_mean = tf.expand_dims(inp.norm_mean, -1) @@ -254,7 +256,7 @@ def decode_predictions(decoder_readout, inp: InputPipe): return ret -def calc_loss(predictions, true_y, additional_mask=None): +def calc_loss(predictions, true_y, additional_mask=None):#!!!!!quantiles """ Calculates losses, ignoring NaN true values (assigning zero loss to them) :param predictions: Predicted values @@ -276,7 +278,7 @@ def calc_loss(predictions, true_y, additional_mask=None): weights), tf.size(true_y) -def make_train_op(loss, ema_decay=None, prefix=None): +def make_train_op(loss, ema_decay=None, prefix=None):#!!!!!quantiles #optimizer = COCOB() ##train.AdamOptimizer train.GradientDescentOptimizer optimizer = tf.train.AdamOptimizer() #!!!!!try simpler optimizer on our data. @@ -473,9 +475,13 @@ def build_cell(idx): has_dropout = hparams.decoder_input_dropout[idx] < 1 \ or hparams.decoder_state_dropout[idx] < 1 or hparams.decoder_output_dropout[idx] < 1 + #context size alone may be as big as decoder state size?? Then input-> hidden would be a down projection... + #so maybe do a projection down, on the encoder side first [e.g. encoder output??] then better here... if self.is_train and has_dropout: attn_depth = attn_features.shape[-1].value if attn_features is not None else 0 - input_size = attn_depth + prediction_inputs.shape[-1].value + 1 if idx == 0 else self.hparams.rnn_depth + context_depth = encoder_state.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT is not None else 0 + input_size = attn_depth + context_depth + prediction_inputs.shape[-1].value + 1 if idx == 0 else self.hparams.rnn_depth + input_size = tf.Print(input_size, ['attn_depth',tf.shape(attn_depth),attn_depth, 'context_depth',tf.shape(context_depth),context_depth, 'input_size',tf.shape(input_size),input_size])#!!!!!!!!!! cell = rnn.DropoutWrapper(cell, dtype=tf.float32, input_size=input_size, variational_recurrent=hparams.decoder_variational_dropout[idx], input_keep_prob=hparams.decoder_input_dropout[idx], @@ -505,7 +511,7 @@ def build_cell(idx): nest.assert_same_structure(encoder_state, cell.state_size) predict_timesteps = self.inp.predict_window - assert prediction_inputs.shape[1] == predict_timesteps + assert prediction_inputs.shape[1] == predict_timesteps #!!!!!!!quantiles # [batch_size, time, input_depth] -> [time, batch_size, input_depth] inputs_by_time = tf.transpose(prediction_inputs, [1, 0, 2]) @@ -545,6 +551,13 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar next_input = tf.concat([prev_output, features], axis=1) # Append previous predicted value to input features + #If using more of a typical encoder-decoder, also have encoder context each time: + if self.hparams.RECURSIVE_W_ENCODER_CONTEXT: +# encoder_state = tf.Print(next_input,['encoder_state',tf.shape(encoder_state),encoder_state]) + next_input = tf.concat([next_input, encoder_state], axis=1) +# next_input = tf.Print(next_input,['next_input',tf.shape(next_input),next_input]) + + # Run RNN cell output, state = cell(next_input, prev_state) # Make prediction from RNN outputs From d5277fd7f06ad6bf59e87a8f57253763d7926f7e Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 23 Jul 2018 14:54:16 -0700 Subject: [PATCH 23/42] misc. and starting on random series sizes --- PREDICT.py | 2 +- PREPROCESS.py | 5 +- RUN_MANY_TRAIN_VAL_WINDOWS.sh | 6 +-- Readme.md | 18 +++++-- __init__.py | 0 hparams.py | 9 +++- input_pipe.py | 98 +++++++++++++++++++++-------------- make_features.py | 55 ++++++++++++++++++++ model.py | 90 +++++++++++++++++++++----------- trainer.py | 75 +++++++++++++++++++-------- 10 files changed, 257 insertions(+), 101 deletions(-) create mode 100644 __init__.py diff --git a/PREDICT.py b/PREDICT.py index a3ea201..befdad6 100755 --- a/PREDICT.py +++ b/PREDICT.py @@ -79,7 +79,7 @@ def mean_smape(true, pred): t_preds = [] for tm in range(Nmodels): tf.reset_default_graph() - t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(PARAM_SETTING_FULL_NAME), back_offset=0, predict_window=63, + t_preds.append(predict(FEATURES_SET, SAMPLING_PERIOD, paths, build_hparams(PARAM_SETTING_FULL_NAME), back_offset=0, history_window_size=283, horizon_window_size=63, n_models=Nmodels, target_model=tm, seed=2, batch_size=2048, asgd=True)) diff --git a/PREPROCESS.py b/PREPROCESS.py index 00c4695..a103c04 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -318,7 +318,7 @@ def imputation_lagKmedian_single_series(df,seasonality,N_seasons,out_of_range_fi -def data_augmentation(df, jitter_pcts_list=[.05,.10,.15], do_low_pass_filter=True, additive_trend=False): +def data_augmentation(df, jitter_pcts_list=[.05,.01], do_low_pass_filter=True, additive_trend=False): """ Do some basic data augmentation with a few different options. Then output Cartesian product of all these variations as the final set. @@ -332,6 +332,9 @@ def jitter__uniform_pcts(df, jitter_pcts_list, N_perturbations): percent of the observed value. Either positive or negative. If the count is small, then just leave it, otherwise perturb (always leaving counts positive). + + Just do at most 1 or 2 percent jitter to not corrupt to much, + ~ magnitude of measurement noise. """ page = df['Page'].values[0] cols = df.columns diff --git a/RUN_MANY_TRAIN_VAL_WINDOWS.sh b/RUN_MANY_TRAIN_VAL_WINDOWS.sh index 6a21d8d..fd21a5f 100644 --- a/RUN_MANY_TRAIN_VAL_WINDOWS.sh +++ b/RUN_MANY_TRAIN_VAL_WINDOWS.sh @@ -1,6 +1,6 @@ #chmod 777 RUN_MANY_TRAIN_VAL_WINDOWS.sh #./RUN_MANY_TRAIN_VAL_WINDOWS.sh -#Run over many train_window - predict_window length pairs +#Run over many history_window_size - horizon_window_size length pairs #Compile results, analyze performance as (2D) heatmap @@ -36,11 +36,11 @@ for v in $VALIDATION_WINDOWS; do echo 'running make_features.py with --add_days='$v python3 make_features.py data/vars ours daily full --add_days=$v for t in $TRAIN_WINDOWS; do - echo 'validation window = '$v 'validation window = '$t + echo 'history window = '$t 'horizon window = '$v echo 'running trainer.py' NAME="val$v-train$t" echo 'NAAME='$NAME - python3 trainer.py full daily --name $NAME --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=$MAX_EPOCH --patience=5 --verbose --predict_window=$v --train_window=$t + python3 trainer.py full daily --name $NAME --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=$MAX_EPOCH --patience=5 --verbose --horizon_window_size=$v --history_window_size=$t done done diff --git a/Readme.md b/Readme.md index 240a090..ca66990 100755 --- a/Readme.md +++ b/Readme.md @@ -59,17 +59,22 @@ rm *.pkl cd .. ll data/ -python3 make_features.py data/vars ours daily full --add_days=63 -python3 make_features.py data/vars kaggle daily full --add_days=63 +python3 make_features.py data/vars ours daily full --add_days=50 +#python3 make_features.py data/vars kaggle daily full --add_days=63 + +python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=5 --horizon_window_size=50 --history_window_size=100 --max_epoch=10 + +---------------------------------------------------------------------------------------------------------------------------------------------------------- + #no reason to expect 10000 to 11500 is good range to save out. View loss along the way python3 trainer.py arturius daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 -python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50 -python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50 +python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=50 +python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=50 --name TEST_stacked --hparam_set=TEST_stacked --no_eval @@ -79,7 +84,10 @@ python3 trainer.py full daily --name TEST_attn_head --hparam_set=TEST_attn_head --verbose -python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --predict_window=50 --train_window=100 +python3 trainer.py full daily --name wEncDec --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=63 --history_window_size=100 --max_epoch=10 + +python3 trainer.py full daily --name noEncDec --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=10 --horizon_window_size=63 --history_window_size=100 --max_epoch=10 + python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10 --max_epoch=1000 --patience=50 --verbose --side_split diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hparams.py b/hparams.py index 9b694b3..4b28e7e 100755 --- a/hparams.py +++ b/hparams.py @@ -46,7 +46,14 @@ #connections from encoded representation to each decoder time step (only to 1st #decoder timestep). Set below to True to use encoder-decoder; set False to use #Kaggle architecture not really true encoder-decoder - RECURSIVE_W_ENCODER_CONTEXT=True, + RECURSIVE_W_ENCODER_CONTEXT=True, + + #Instead of fixed size windows, do training phase over range of window sizes + #drawn uniformly from [a,b]. Another form of randomization/regularization, + #but more importantly this way model can generalize to different lengths so + #we can more fairly assess performance over range of history/horizon windows: + history_window_size_minmax=[7,365], + horizon_window_size_minmax=[7,60], ) diff --git a/input_pipe.py b/input_pipe.py index 69d1d52..351bb3e 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -134,10 +134,11 @@ def cut(self, counts, start, end): :return: tuple (train_counts, test_counts, lagged_counts, [dow,woy,moy,year]) """ # Pad counts to ensure we have enough array length for prediction - counts = tf.concat([counts, tf.fill([self.predict_window], np.NaN)], axis=0) + counts = tf.concat([counts, tf.fill([self.horizon_window_size], np.NaN)], axis=0) cropped_count = counts[start:end] - - +# cropped_count = tf.Print(cropped_count,['INPUT PIPE > CUT > cropped_count',tf.shape(cropped_count), 'start', start, 'end', end]) +# cropped_count = tf.Print(cropped_count,['self.history_window_size', self.history_window_size, 'self.horizon_window_size', self.horizon_window_size]) + # ============================================================================= # Ordinal periodic variables # which features are here depends on what the sampling period is for the data @@ -185,7 +186,7 @@ def cut(self, counts, start, end): #Will always have the count series (the series we predict on): # Split for train and test - x_counts, y_counts = tf.split(cropped_count, [self.train_window, self.predict_window], axis=0) + x_counts, y_counts = tf.split(cropped_count, [self.history_window_size, self.horizon_window_size], axis=0) # Convert NaN to zero in for train data x_counts = tf.where(tf.is_nan(x_counts), tf.zeros_like(x_counts), x_counts) @@ -214,17 +215,17 @@ def cut_train(self, counts, *args): :param args: pass-through data, will be appended to result :return: result of cut() + args """ - n_timesteps = self.predict_window + self.train_window + n_timesteps = self.horizon_window_size + self.history_window_size # How much free space we have to choose starting day free_space = self.inp.data_days - n_timesteps - self.back_offset - self.start_offset if self.verbose: #!!!!!! doesn't really matter since this is just printout, but would need to change for WEEKLY / MONTHLY lower_train_start = self.inp.data_start + pd.Timedelta(self.start_offset, 'D') lower_test_end = lower_train_start + pd.Timedelta(n_timesteps, 'D') - lower_test_start = lower_test_end - pd.Timedelta(self.predict_window, 'D') + lower_test_start = lower_test_end - pd.Timedelta(self.horizon_window_size, 'D') upper_train_start = self.inp.data_start + pd.Timedelta(free_space - 1, 'D') upper_test_end = upper_train_start + pd.Timedelta(n_timesteps, 'D') - upper_test_start = upper_test_end - pd.Timedelta(self.predict_window, 'D') + upper_test_start = upper_test_end - pd.Timedelta(self.horizon_window_size, 'D') print(f"Free space for training: {free_space} days.") print(f" Lower train {lower_train_start}, prediction {lower_test_start}..{lower_test_end}") print(f" Upper train {upper_train_start}, prediction {upper_test_start}..{upper_test_end}") @@ -238,12 +239,12 @@ def cut_train(self, counts, *args): def cut_eval(self, counts, *args): """ Cuts segment of time series for evaluation. - Always cuts train_window + predict_window length segment beginning at start_offset point + Always cuts history_window_size + horizon_window_size length segment beginning at start_offset point :param counts: counts timeseries :param args: pass-through data, will be appended to result :return: result of cut() + args """ - end = self.start_offset + self.train_window + self.predict_window + end = self.start_offset + self.history_window_size + self.horizon_window_size return self.cut(counts, self.start_offset, end) + args def reject_filter(self, x_counts, y_counts, *args): @@ -297,15 +298,15 @@ def make_features(self, *args): # Do train - predict splits # ============================================================================= if self.sampling_period == 'daily': - x_dow, y_dow = tf.split(dow, [self.train_window, self.predict_window], axis=0) - x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) #need to see how to fit in woy into inputs to this func + x_dow, y_dow = tf.split(dow, [self.history_window_size, self.horizon_window_size], axis=0) + x_woy, y_woy = tf.split(woy, [self.history_window_size, self.horizon_window_size], axis=0) #need to see how to fit in woy into inputs to this func elif self.sampling_period == 'weekly': - x_woy, y_woy = tf.split(woy, [self.train_window, self.predict_window], axis=0) + x_woy, y_woy = tf.split(woy, [self.history_window_size, self.horizon_window_size], axis=0) elif self.sampling_period == 'monthly': - x_moy, y_moy = tf.split(moy, [self.train_window, self.predict_window], axis=0) + x_moy, y_moy = tf.split(moy, [self.history_window_size, self.horizon_window_size], axis=0) #Already did a manual kind of scaling for year in make_features.py so don't need to normalize here... - x_year, y_year = tf.split(year, [self.train_window, self.predict_window], axis=0) + x_year, y_year = tf.split(year, [self.history_window_size, self.horizon_window_size], axis=0) x_year = tf.expand_dims(x_year,axis=1) y_year = tf.expand_dims(y_year,axis=1) @@ -317,7 +318,7 @@ def make_features(self, *args): norm_lagged_counts = (lagged_counts - mean) / std # Split lagged counts to train and test - x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.train_window, self.predict_window], axis=0) + x_lagged, y_lagged = tf.split(norm_lagged_counts, [self.history_window_size, self.horizon_window_size], axis=0) # Combine all page features into single tensor @@ -363,7 +364,7 @@ def make_features(self, *args): x_features = tf.concat([x_features, x_lagged, # Stretch series_features to all training days # [1, features] -> [n_timesteps, features] - tf.tile(series_features, [self.train_window, 1])], axis=1) + tf.tile(series_features, [self.history_window_size, 1])], axis=1) # Test features if self.sampling_period == 'daily': @@ -376,7 +377,7 @@ def make_features(self, *args): y_features = tf.concat([y_features, y_lagged, # Stretch series_features to all testing days # [1, features] -> [n_timesteps, features] - tf.tile(series_features, [self.predict_window, 1]) + tf.tile(series_features, [self.horizon_window_size, 1]) ], axis=1) # print(x_features) @@ -386,13 +387,14 @@ def make_features(self, *args): print('x_features') print(x_features) + print(x_features.shape) return x_counts, x_features, norm_x_counts, x_lagged, y_counts, y_features, norm_y_counts, mean, std, flat_features, page_ix #Must match up with setting self.XYZ = it_tensors below in __init__. def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iterable[tf.Tensor], N_time_series: int, mode: ModelMode, n_epoch=None, - batch_size=127, runs_in_burst=1, verbose=True, predict_window=60, train_window=500, + batch_size=127, runs_in_burst=1, verbose=True, horizon_window_size=60, history_window_size=500, train_completeness_threshold=1, predict_completeness_threshold=1, back_offset=0, train_skip_first=0, rand_seed=None): """ @@ -407,8 +409,8 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter :param batch_size: :param runs_in_burst: How many batches can be consumed at short time interval (burst). Multiplicator for prefetch() :param verbose: Print additional information during graph construction - :param predict_window: Number of days to predict - :param train_window: Use train_window days for traning + :param horizon_window_size: Number of days to predict + :param history_window_size: Use history_window_size days for traning :param train_completeness_threshold: Percent of zero datapoints allowed in train timeseries. :param predict_completeness_threshold: Percent of zero datapoints allowed in test/predict timeseries. :param back_offset: Don't use back_offset days at the end of timeseries @@ -430,32 +432,35 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter mode, inp.data_days, inp.data_start, inp.data_end, inp.features_end)) if mode == ModelMode.TRAIN: - # reserve predict_window at the end for validation - assert inp.data_days - predict_window > predict_window + train_window, \ + # reserve horizon_window_size at the end for validation + assert inp.data_days - horizon_window_size > horizon_window_size + history_window_size, \ "Predict+train window length (+predict window for validation) is larger than total number of days in dataset" self.start_offset = train_skip_first elif mode == ModelMode.EVAL or mode == ModelMode.PREDICT: - self.start_offset = inp.data_days - train_window - back_offset + self.start_offset = inp.data_days - history_window_size - back_offset if verbose: train_start = inp.data_start + pd.Timedelta(self.start_offset, 'D') - eval_start = train_start + pd.Timedelta(train_window, 'D') - end = eval_start + pd.Timedelta(predict_window - 1, 'D') + eval_start = train_start + pd.Timedelta(history_window_size, 'D') + end = eval_start + pd.Timedelta(horizon_window_size - 1, 'D') print("Train start %s, predict start %s, end %s" % (train_start, eval_start, end)) assert self.start_offset >= 0 - self.train_window = train_window - self.predict_window = predict_window - self.attn_window = train_window - predict_window + 1 - self.max_train_empty = int(round(train_window * (1 - train_completeness_threshold))) - self.max_predict_empty = int(round(predict_window * (1 - predict_completeness_threshold))) + self.history_window_size = history_window_size #!!!!!!!!!!!random resize + self.horizon_window_size = horizon_window_size#!!!!!!!!!!!random resize + self.attn_window = history_window_size - horizon_window_size + 1#!!!!!!!!!!!random resize + self.max_train_empty = int(round(history_window_size * (1 - train_completeness_threshold)))#!!!!!!!!!!!random resize + self.max_predict_empty = int(round(horizon_window_size * (1 - predict_completeness_threshold)))#!!!!!!!!!!!random resize self.mode = mode self.verbose = verbose + + self.train_completeness_threshold = train_completeness_threshold + self.predict_completeness_threshold = predict_completeness_threshold print('max_train_empty',self.max_train_empty) print('max_predict_empty',self.max_predict_empty) - print('train_window',self.train_window) - print('predict_window',self.predict_window) + print('history_window_size',self.history_window_size) + print('horizon_window_size',self.horizon_window_size) print('attn_window',self.attn_window) @@ -465,14 +470,29 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter # Choose right cutter function for current ModelMode cutter = {ModelMode.TRAIN: self.cut_train, ModelMode.EVAL: self.cut_eval, ModelMode.PREDICT: self.cut_eval} # Create dataset, transform features and assemble batches + #features is a list of tensors (one tensor per feature: counts, page_ix, ..., count_variance) + print('features',features) +# features = tf.Print(features,['features',tf.shape(features),features]) root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch) - batch = (root_ds - .map(cutter[mode]) - .filter(self.reject_filter) - .map(self.make_features, num_parallel_calls=num_threads) - .batch(batch_size) - .prefetch(runs_in_burst * 2) - ) +# print(root_ds.output_classes, root_ds.output_shapes, root_ds.output_types,) + print(root_ds.output_shapes) +# batch = (root_ds +# .map(cutter[mode]) +# .filter(self.reject_filter) +# .map(self.make_features, num_parallel_calls=num_threads) +# .batch(batch_size) +# .prefetch(runs_in_burst * 2) +# ) + batch = root_ds.map(cutter[mode]).filter(self.reject_filter).map(self.make_features, num_parallel_calls=num_threads) + print('batch MFM', batch) + + batch = batch.batch(batch_size) + print('batch B', batch) + + batch = batch.prefetch(runs_in_burst * 2) + print('batch P', batch) + batch = (batch) + print('---------------- Done batching ----------------') print(batch) self.iterator = batch.make_initializable_iterator() diff --git a/make_features.py b/make_features.py index d1061db..170d183 100755 --- a/make_features.py +++ b/make_features.py @@ -265,6 +265,61 @@ def normalize(values: np.ndarray): return (values - values.mean()) / np.std(values) + + + + + +def encode_fixed_date_holidays__daily(dates_series): + """ + Encode holidays and shoulder days, for holidays that occur yearly on fixed + dates. + For daily sampled data only. + + In USA: + Christmas, New Year, 4th of July, Halloween, Cinco de Mayo + Valentine's Day, Veteran's Day + + other international: + ... + """ + return dates_series + + +# ============================================================================= +# MOVING holidays [variable date] +# ============================================================================= +def encode_thanksgiving__daily(dates_series): + """ + Encode Thanksgiving holiday and shoulder days. + For daily sampled data only. + """ +# 4th Thurs of Novmber... +# if (month==11) and (dayofweek=='Thurs') and (22<=dayofmonth<=28) + return dates_series + +def encode_easter__daily(dates_series): + """ + Encode Easter holiday and shoulder days. + For daily sampled data only. + """ + return dates_series + +#Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day + +def encode_custom_dates__daily(dates_series,dates_list): + """ + Encode custom days and optionally shoulder days. + For daily sampled data only. + + E.g. Superbowl Sunday + suberbowl_dates = ['2014-2-2','2015-2-1','2016-2-7','2017-2-5','2018-2-4','2019-2-3'] + shoulders = [...] + """ + return dates_series + + + def run(): parser = argparse.ArgumentParser(description='Prepare data') parser.add_argument('data_dir') diff --git a/model.py b/model.py index b315e13..81cc5d8 100755 --- a/model.py +++ b/model.py @@ -7,8 +7,11 @@ from tensorflow.python.util import nest from cocob import COCOB +from Adam_HD_optimizer import AdamHDOptimizer +from SGDN_HD_optimizer import MomentumSGDHDOptimizer from input_pipe import InputPipe, ModelMode + GRAD_CLIP_THRESHOLD = 10 RNN = cudnn_rnn.CudnnGRU # RNN = tf.contrib.cudnn_rnn.CudnnLSTM @@ -173,7 +176,7 @@ def attn_readout_v3(readout, attn_window, attn_heads, page_features, seed): # [batch(readout_depth), width, channels] -> [batch, height=1, width, channels] inp = readout[:, tf.newaxis, :, :] - # attn_window = train_window - predict_window + 1 + # attn_window = history_window_size - horizon_window_size + 1 # [batch, attn_window * n_heads] filter_logits = tf.layers.dense(page_features, attn_window * attn_heads, name="attn_focus", kernel_initializer=default_init(seed) @@ -191,15 +194,15 @@ def attn_readout_v3(readout, attn_window, attn_heads, page_features, seed): # [width(attn_window), channels(batch), n_heads] -> [height(1), width(attn_window), channels(batch), multiplier(n_heads)] attn_filter = attns_max[tf.newaxis, :, :, :] - # [batch(readout_depth), height=1, width=n_days, channels=batch] -> [batch(readout_depth), height=1, width=predict_window, channels=batch*n_heads] + # [batch(readout_depth), height=1, width=n_days, channels=batch] -> [batch(readout_depth), height=1, width=horizon_window_size, channels=batch*n_heads] averaged = tf.nn.depthwise_conv2d_native(inp, attn_filter, [1, 1, 1, 1], 'VALID') - # [batch, height=1, width=predict_window, channels=readout_depth*n_neads] -> [batch(depth), predict_window, batch*n_heads] + # [batch, height=1, width=horizon_window_size, channels=readout_depth*n_neads] -> [batch(depth), horizon_window_size, batch*n_heads] attn_features = tf.squeeze(averaged, 1) - # [batch(depth), predict_window, batch*n_heads] -> [batch*n_heads, predict_window, depth] + # [batch(depth), horizon_window_size, batch*n_heads] -> [batch*n_heads, horizon_window_size, depth] attn_features = tf.transpose(attn_features, [2, 1, 0]) - # [batch * n_heads, predict_window, depth] -> n_heads * [batch, predict_window, depth] + # [batch * n_heads, horizon_window_size, depth] -> n_heads * [batch, horizon_window_size, depth] heads = [attn_features[head_no::attn_heads] for head_no in range(attn_heads)] - # n_heads * [batch, predict_window, depth] -> [batch, predict_window, depth*n_heads] + # n_heads * [batch, horizon_window_size, depth] -> [batch, horizon_window_size, depth*n_heads] result = tf.concat(heads, axis=-1) # attn_diag = tf.unstack(attns_max, axis=-1) return result, None @@ -215,11 +218,14 @@ def calc_smape_rounded(true, predicted, weights): """ n_valid = tf.reduce_sum(weights) true_o = tf.round(tf.expm1(true)) - pred_o = tf.maximum(tf.round(tf.expm1(predicted)), 0.0) + pred_o = tf.maximum(tf.round(tf.expm1(predicted)), 0.0) #!!!!!!! for us we could even clip at 1, since 0 means measurement was missing summ = tf.abs(true_o) + tf.abs(pred_o) zeros = summ < 0.01 raw_smape = tf.abs(pred_o - true_o) / summ * 2.0 smape = tf.where(zeros, tf.zeros_like(summ, dtype=tf.float32), raw_smape) + #!!!!!!!!!!! since summ is sum of absolute values of 2 rounded things, is only < .01 if is exactly = 0. For our data, this should NEVER happen, would mean unmeasured NAN, so actually this is exactly the SMAPE we want + +# smape = tf.Print(smape, ['pred_o',tf.shape(pred_o),pred_o, 'pred_o not round clip',tf.expm1(predicted), 'true_o',tf.shape(true_o),true_o, 'smape', smape, 'raw_smape', raw_smape]) return tf.reduce_sum(smape * weights) / n_valid @@ -279,10 +285,19 @@ def calc_loss(predictions, true_y, additional_mask=None):#!!!!!quantiles def make_train_op(loss, ema_decay=None, prefix=None):#!!!!!quantiles - #optimizer = COCOB() - ##train.AdamOptimizer train.GradientDescentOptimizer - optimizer = tf.train.AdamOptimizer() #!!!!!try simpler optimizer on our data. -# optimizer = tf.train.GradientDescentOptimizer(1e-9) #!!!!!try simpler optimizer on our data. +# OPTIMIZER=#'SGDN-HD',#'COCOB',#'ADAM',#'SGDN-HD',#'ADAM-HD' +# if OPTIMIZER=='COCOB': +# optimizer = COCOB() +# if OPTIMIZER=='ADAM': +# optimizer = tf.train.AdamOptimizer() +# if OPTIMIZER=='SGD': +# optimizer = tf.train.GradientDescentOptimizer(1e-9) +# if OPTIMIZER=='SGDN-HD': +# optimizer = MomentumSGDHDOptimizer() +# if OPTIMIZER=='ADAM-HD': +# optimizer = AdamHDOptimizer() +# optimizer=MomentumSGDHDOptimizer(alpha_0=1e-1)#bad SMAPEs for various orders of magnitude alpha_0 + optimizer = tf.train.AdamOptimizer() glob_step = tf.train.get_global_step() @@ -313,7 +328,7 @@ def make_train_op(loss, ema_decay=None, prefix=None):#!!!!!quantiles return training_op, glob_norm, ema -def convert_cudnn_state_v2(h_state, hparams, seed, c_state=None, dropout=1.0): +def convert_cudnn_state_v3(h_state, hparams, seed, c_state=None, dropout=1.0): """ Converts RNN state tensor from cuDNN representation to TF RNNCell compatible representation. :param h_state: tensor [num_layers, batch_size, depth] @@ -335,13 +350,23 @@ def wrap_dropout(structure): # encoder_layers > decoder_layers: get outputs of upper encoder layers # encoder_layers < decoder_layers: feed encoder outputs to lower decoder layers, feed zeros to top layers h_layers = tf.unstack(h_state) + + #Regardless of relative number of layers in encoder vs. decoder, simple approach is + #use topmost encoder layer hidden state as the (fixed) context + encoded_representation = wrap_dropout(h_layers[-1]) + #above uses a different random dropout for the "encoded representaiton" than the actual top level output. + #This is possibly a good regularization thing since we dont expect the final hidden state to be perfect summar/context vector, + #so a little randomness is probably good here. + #vs. below using topmost level exactly same dropout mask: _[-1] if hparams.encoder_rnn_layers >= hparams.decoder_rnn_layers: - return squeeze(wrap_dropout(h_layers[hparams.encoder_rnn_layers - hparams.decoder_rnn_layers:])) + _ = wrap_dropout(h_layers[hparams.encoder_rnn_layers - hparams.decoder_rnn_layers:]) + return squeeze(_), _[-1] #Use the topmost hidden state of the encoder as the encoded representaiton +# return squeeze(_), encoded_representation #Use the topmost hidden state of the encoder as the encoded representaiton else: lower_inputs = wrap_dropout(h_layers) upper_inputs = [tf.zeros_like(h_layers[0]) for _ in range(hparams.decoder_rnn_layers - hparams.encoder_rnn_layers)] - return squeeze(lower_inputs + upper_inputs) + return squeeze(lower_inputs + upper_inputs), lower_inputs[-1] #Use the topmost hidden state of the encoder as the encoded representaiton def rnn_stability_loss(rnn_output, beta): @@ -381,7 +406,7 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a :param seed: :param graph_prefix: Subgraph prefix for multi-model graph :param asgd_decay: Decay for SGD averaging - :param loss_mask: Additional mask for losses calculation (one value for each prediction day), shape=[predict_window] + :param loss_mask: Additional mask for losses calculation (one value for each prediction day), shape=[horizon_window_size] """ self.is_train = is_train self.inp = inp @@ -392,13 +417,15 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a encoder_output, h_state, c_state = make_encoder(inp.time_x, inp.encoder_features_depth, is_train, hparams, seed, transpose_output=False) # Encoder activation losses - enc_stab_loss = rnn_stability_loss(encoder_output, hparams.encoder_stability_loss / inp.train_window) - enc_activation_loss = rnn_activation_loss(encoder_output, hparams.encoder_activation_loss / inp.train_window) + enc_stab_loss = rnn_stability_loss(encoder_output, hparams.encoder_stability_loss / inp.history_window_size) + enc_activation_loss = rnn_activation_loss(encoder_output, hparams.encoder_activation_loss / inp.history_window_size) # Convert state from cuDNN representation to TF RNNCell-compatible representation - encoder_state = convert_cudnn_state_v2(h_state, hparams, c_state, + encoder_state, summary_z = convert_cudnn_state_v3(h_state, hparams, c_state, dropout=hparams.gate_dropout if is_train else 1.0) - +# encoder_state = tf.Print(encoder_state, ['encoder_state',tf.shape(encoder_state),encoder_state]) +# summary_z = tf.Print(summary_z, ['summary_z',tf.shape(summary_z),summary_z]) + # Attention calculations # Compress encoder outputs enc_readout = compressed_readout(encoder_output, hparams, @@ -416,10 +443,11 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a print('inp.time_y',inp.time_y) decoder_targets, decoder_outputs = self.decoder(encoder_state, attn_features if hparams.use_attn else None, + summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None, inp.time_y, inp.norm_x[:, -1]) #in decoder function def: inp.time_y = "prediction_inputs"; inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd)) # Decoder activation losses - dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.predict_window) - dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.predict_window) + dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.horizon_window_size) + dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.horizon_window_size) # Get final denormalized predictions self.predictions = decode_predictions(decoder_targets, inp) @@ -459,12 +487,12 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a def default_init(self, seed_add=0): return default_init(self.seed + seed_add) - def decoder(self, encoder_state, attn_features, prediction_inputs, previous_y): + def decoder(self, encoder_state, attn_features, summary_z, prediction_inputs, previous_y): """ :param encoder_state: shape [batch_size, encoder_rnn_depth] :param prediction_inputs: features for prediction days, tensor[batch_size, time, input_depth] :param previous_y: Last day pageviews, shape [batch_size] - :param attn_features: Additional features from attention layer, shape [batch, predict_window, readout_depth*n_heads] + :param attn_features: Additional features from attention layer, shape [batch, horizon_window_size, readout_depth*n_heads] :return: decoder rnn output """ hparams = self.hparams @@ -479,7 +507,8 @@ def build_cell(idx): #so maybe do a projection down, on the encoder side first [e.g. encoder output??] then better here... if self.is_train and has_dropout: attn_depth = attn_features.shape[-1].value if attn_features is not None else 0 - context_depth = encoder_state.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT is not None else 0 + context_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT is not None else 0 #Should just be the encoder RNN depth + print('attn_depth',attn_depth, 'context_depth',context_depth) input_size = attn_depth + context_depth + prediction_inputs.shape[-1].value + 1 if idx == 0 else self.hparams.rnn_depth input_size = tf.Print(input_size, ['attn_depth',tf.shape(attn_depth),attn_depth, 'context_depth',tf.shape(context_depth),context_depth, 'input_size',tf.shape(input_size),input_size])#!!!!!!!!!! cell = rnn.DropoutWrapper(cell, dtype=tf.float32, input_size=input_size, @@ -510,7 +539,7 @@ def build_cell(idx): nest.assert_same_structure(encoder_state, cell.state_size) - predict_timesteps = self.inp.predict_window + predict_timesteps = self.inp.horizon_window_size assert prediction_inputs.shape[1] == predict_timesteps #!!!!!!!quantiles # [batch_size, time, input_depth] -> [time, batch_size, input_depth] @@ -541,7 +570,7 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar # RNN inputs for current step features = inputs_by_time[timestep] - # [batch, predict_window, readout_depth * n_heads] -> [batch, readout_depth * n_heads] + # [batch, horizon_window_size, readout_depth * n_heads] -> [batch, readout_depth * n_heads] if attn_features is not None: # [batch_size, 1] + [batch_size, input_depth] attn = attn_features[:, timestep, :] @@ -553,11 +582,14 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar #If using more of a typical encoder-decoder, also have encoder context each time: if self.hparams.RECURSIVE_W_ENCODER_CONTEXT: -# encoder_state = tf.Print(next_input,['encoder_state',tf.shape(encoder_state),encoder_state]) - next_input = tf.concat([next_input, encoder_state], axis=1) + next_input = tf.concat([next_input, summary_z], axis=1) #!!!!!!!!summary_z[-1] +# if self.hparams.encoder_rnn_layers == 1: +# next_input = tf.concat([next_input, summary_z], axis=1) #!!!!!!!!summary_z[-1] +# elif self.hparams.encoder_rnn_layers > 1: +# next_input = tf.concat([next_input, summary_z[-1]], axis=1) #!!!!!!!!summary_z[-1] # next_input = tf.Print(next_input,['next_input',tf.shape(next_input),next_input]) - + # Run RNN cell output, state = cell(next_input, prev_state) # Make prediction from RNN outputs diff --git a/trainer.py b/trainer.py index d43c846..b2b2749 100755 --- a/trainer.py +++ b/trainer.py @@ -400,14 +400,14 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model seed=None, logdir='data/logs', max_epoch=100, patience=2, train_sampling=1.0, eval_sampling=1.0, eval_memsize=5, gpu=0, gpu_allow_growth=False, save_best_model=False, forward_split=False, write_summaries=False, verbose=False, asgd_decay=None, tqdm=True, - side_split=True, max_steps=None, save_from_step=None, do_eval=True, predict_window=63, train_window=283): + side_split=True, max_steps=None, save_from_step=None, do_eval=True):#, horizon_window_size=63, history_window_size=283): eval_k = int(round(26214 * eval_memsize / n_models)) eval_batch_size = int( eval_k / (hparams.rnn_depth * hparams.encoder_rnn_layers)) # 128 -> 1024, 256->512, 512->256 eval_pct = 0.1 batch_size = hparams.batch_size -# train_window = hparams.train_window +# history_window_size = hparams.history_window_size tf.reset_default_graph() if seed: tf.set_random_seed(seed) @@ -446,36 +446,58 @@ def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_model print('eval_every_step', eval_every_step) + def random_draw_history_and_horizon_window_sizes(trainer): + """ + Want to not only have random start end, but also variable size chunks for + history and horizon sizes in TRAINING phase. + (in prediction phase, use fixed sizes, and then for different sizes see how performance is.) + """ + history = np.random.randint(low=hparams.history_window_size_minmax[0],high=hparams.history_window_size_minmax[1]+1) + horizon = np.random.randint(low=hparams.horizon_window_size_minmax[0],high=hparams.horizon_window_size_minmax[1]+1) + for TT in trainer.trainers: + TT.train_model.inp.history_window_size = history + TT.train_model.inp.horizon_window_size = horizon + TT.train_model.inp.attn_window = history - horizon + 1 + TT.train_model.inp.max_train_empty = int(round(history * (1 - TT.train_model.inp.train_completeness_threshold))) + TT.train_model.inp.max_predict_empty = int(round(horizon * (1 - TT.train_model.inp.predict_completeness_threshold))) + return trainer + + + def create_model(features_set, sampling_period, scope, index, prefix, seed): + #Just dummy filler, not important what value + history_dummy = 111 + horizon_dummy = 42 + with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): split = splitter.splits[index] pipe = InputPipe(features_set, sampling_period, inp, features=split.train_set, N_time_series=split.train_size, mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose, train_completeness_threshold=train_completeness_threshold, - predict_completeness_threshold=train_completeness_threshold, train_window=train_window, - predict_window=predict_window, + predict_completeness_threshold=train_completeness_threshold, history_window_size=history_dummy, + horizon_window_size=horizon_dummy, rand_seed=seed, train_skip_first=hparams.train_skip_first, - back_offset=predict_window if forward_split else 0) + back_offset=horizon_dummy if forward_split else 0) inp_scope.reuse_variables() TCT = 0.01 if side_split: side_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size, mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, - verbose=verbose, predict_window=predict_window, + verbose=verbose, horizon_window_size=horizon_dummy, train_completeness_threshold=TCT, predict_completeness_threshold=0, - train_window=train_window, rand_seed=seed, runs_in_burst=eval_batches, - back_offset=predict_window * (2 if forward_split else 1)) + history_window_size=history_dummy, rand_seed=seed, runs_in_burst=eval_batches, + back_offset=horizon_dummy * (2 if forward_split else 1)) else: side_eval_pipe = None if forward_split: - forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size,#!!!!!!!!!!!!!!!! page_features + forward_eval_pipe = InputPipe(features_set, sampling_period, inp, features=split.test_set, N_time_series=split.test_size, mode=ModelMode.EVAL, batch_size=eval_batch_size, n_epoch=None, - verbose=verbose, predict_window=predict_window, + verbose=verbose, horizon_window_size=horizon_dummy, train_completeness_threshold=TCT, predict_completeness_threshold=0, - train_window=train_window, rand_seed=seed, runs_in_burst=eval_batches, - back_offset=predict_window) + history_window_size=history_dummy, rand_seed=seed, runs_in_burst=eval_batches, + back_offset=horizon_dummy) else: forward_eval_pipe = None avg_sgd = asgd_decay is not None @@ -600,8 +622,17 @@ def ema_vars(model): tqr = range(steps_per_epoch) for _ in tqr: + #!!!!!!!!!! Variable random length train predict windows + #Random draw the train, predict window lengths + print(_) + trainer = random_draw_history_and_horizon_window_sizes(trainer) +# print('+++++++++++++++', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers]) +# print('--------', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers]) + try: step = trainer.train_step(sess, epoch) +# print('+-+-+-+-+-+-+-', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers]) +# print('0000000000000', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers]) except tf.errors.OutOfRangeError: break # if beholder: @@ -672,15 +703,15 @@ def ema_vars(model): return np.mean(best_epoch_smape, dtype=np.float64) -def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, verbose=False, predict_window=6, back_offset=0, n_models=1, - target_model=0, asgd=False, seed=1, batch_size=1024, train_window=283): +def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, verbose=False, horizon_window_size=6, back_offset=0, n_models=1, + target_model=0, asgd=False, seed=1, batch_size=1024, history_window_size=283): #For predict: allow horizon_window_size to be fixed with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): inp = VarFeeder.read_vars("data/vars") pipe = InputPipe(features_set, sampling_period, inp, page_features(inp, features_set), inp.N_time_series, mode=ModelMode.PREDICT, batch_size=batch_size, train_completeness_threshold=0.01, - predict_window=predict_window, - predict_completeness_threshold=0.0, train_window=train_window,#hparams.train_window, + horizon_window_size=horizon_window_size, + predict_completeness_threshold=0.0, history_window_size=history_window_size,#hparams.history_window_size, back_offset=back_offset) asgd_decay = 0.99 if asgd else None if n_models == 1: @@ -749,12 +780,12 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, predictions /= len(checkpoints) #Since it is averaging predictions over the chckpoints offset = pd.Timedelta(back_offset, 'D') #!!!!!!!!!!!! need to change these lines when sampling WEEKLY MONTHLY start_prediction = inp.data_end + pd.Timedelta('1D') - offset - end_prediction = start_prediction + pd.Timedelta(predict_window - 1, 'D') + end_prediction = start_prediction + pd.Timedelta(horizon_window_size - 1, 'D') predictions.columns = pd.date_range(start_prediction, end_prediction) if return_x: x = pd.concat(x_buffer) - #start_data = inp.data_end - pd.Timedelta(hparams.train_window - 1, 'D') - back_offset - start_data = inp.data_end - pd.Timedelta(train_window - 1, 'D') - back_offset #!!!!!now for heatmaps + #start_data = inp.data_end - pd.Timedelta(hparams.history_window_size - 1, 'D') - back_offset + start_data = inp.data_end - pd.Timedelta(history_window_size - 1, 'D') - back_offset #!!!!!now for heatmaps end_data = inp.data_end - back_offset x.columns = pd.date_range(start_data, end_data) return predictions, x @@ -789,8 +820,8 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, parser.add_argument('--no_tqdm', default=True, dest='tqdm', action='store_false', help="Don't use tqdm for status display during training") parser.add_argument('--max_steps', type=int, help="Stop training after max steps") parser.add_argument('--save_from_step', type=int, help="Save model on each evaluation (10 evals per epoch), starting from this step") - parser.add_argument('--predict_window', default=63, type=int, help="Number of days to predict") - parser.add_argument('--train_window', default=283, type=int, help="Train window chunk size")#Now that we want to do train size - val size performance heatmaps +# parser.add_argument('--horizon_window_size', default=63, type=int, help="Number of days to predict") +# parser.add_argument('--history_window_size', default=283, type=int, help="Train window chunk size")#Now that we want to do train size - val size performance heatmaps args = parser.parse_args() param_dict = dict(vars(args)) @@ -801,7 +832,7 @@ def predict(features_set, sampling_period, checkpoints, hparams, return_x=False, # hparams = build_hparams() # result = train("definc_attn", hparams, n_models=1, train_sampling=1.0, eval_sampling=1.0, patience=5, multi_gpu=True, # save_best_model=False, gpu=0, eval_memsize=15, seed=5, verbose=True, forward_split=False, - # write_summaries=True, side_split=True, do_eval=False, predict_window=63, asgd_decay=None, max_steps=11500, + # write_summaries=True, side_split=True, do_eval=False, horizon_window_size=63, asgd_decay=None, max_steps=11500, # save_from_step=10500) # print("Training result:", result) From 0a67384deca6968cbddb6b7cfe7d0a8850b3fb57 Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 23 Jul 2018 19:13:41 -0700 Subject: [PATCH 24/42] starting on holidays --- input_pipe.py | 72 ++++++++++---- make_features.py | 97 ++++++++++++++++-- model.py | 10 +- trainer.py | 254 +++++++++++++++++++++++------------------------ 4 files changed, 273 insertions(+), 160 deletions(-) diff --git a/input_pipe.py b/input_pipe.py index 351bb3e..a1a7d0e 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -464,6 +464,17 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter print('attn_window',self.attn_window) + def random_draw_new_window_sizes(): + history = np.random.randint(low=7,high=120+1) + horizon = np.random.randint(low=7,high=60+1) + self.history_window_size = history + self.horizon_window_size = horizon + self.attn_window = history - horizon + 1 + self.max_train_empty = int(round(history * (1 - self.train_completeness_threshold))) + self.max_predict_empty = int(round(horizon * (1 - self.predict_completeness_threshold))) + + + # Reserve more processing threads for eval/predict because of larger batches num_threads = 3 if mode == ModelMode.TRAIN else 6 @@ -472,26 +483,47 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter # Create dataset, transform features and assemble batches #features is a list of tensors (one tensor per feature: counts, page_ix, ..., count_variance) print('features',features) -# features = tf.Print(features,['features',tf.shape(features),features]) - root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch) -# print(root_ds.output_classes, root_ds.output_shapes, root_ds.output_types,) - print(root_ds.output_shapes) -# batch = (root_ds -# .map(cutter[mode]) -# .filter(self.reject_filter) -# .map(self.make_features, num_parallel_calls=num_threads) -# .batch(batch_size) -# .prefetch(runs_in_burst * 2) -# ) - batch = root_ds.map(cutter[mode]).filter(self.reject_filter).map(self.make_features, num_parallel_calls=num_threads) - print('batch MFM', batch) + + +# for _ in range(max(n_epoch,20)): +## random_draw_new_window_sizes() +# print('max_train_empty',self.max_train_empty) +# print('max_predict_empty',self.max_predict_empty) +# print('history_window_size',self.history_window_size) +# print('horizon_window_size',self.horizon_window_size) +# print('attn_window',self.attn_window) +# +# root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch) +# # print(root_ds.output_classes, root_ds.output_shapes, root_ds.output_types,) +# print('root_ds.output_shapes',root_ds.output_shapes) +# print('root_ds.output_types',root_ds.output_types) +# # batch = (root_ds +# # .map(cutter[mode]) +# # .filter(self.reject_filter) +# # .map(self.make_features, num_parallel_calls=num_threads) +# # .batch(batch_size) +# # .prefetch(runs_in_burst * 2) +# # ) +# +# #TEST:change horisoron jiostory +# batch = root_ds.map(cutter[mode]).filter(self.reject_filter).map(self.make_features, num_parallel_calls=num_threads) +# print('batch MFM', batch) +# +# batch = batch.batch(batch_size) +# print('batch B', batch) +# +# batch = batch.prefetch(runs_in_burst * 2) +# print('batch P', batch) +# batch = (batch) - batch = batch.batch(batch_size) - print('batch B', batch) - - batch = batch.prefetch(runs_in_burst * 2) - print('batch P', batch) - batch = (batch) + root_ds = tf.data.Dataset.from_tensor_slices(tuple(features)).repeat(n_epoch) + batch = (root_ds + .map(cutter[mode]) + .filter(self.reject_filter) + .map(self.make_features, num_parallel_calls=num_threads) + .batch(batch_size) + .prefetch(runs_in_burst * 2) + ) print('---------------- Done batching ----------------') print(batch) @@ -504,6 +536,7 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter #But if not doing lagged then can return None for that ??? self.true_x, self.time_x, self.norm_x, self.lagged_x, self.true_y, self.time_y, self.norm_y, self.norm_mean, \ self.norm_std, self.series_features, self.page_ix = it_tensors #!!!!!!!!!!!!! names hardcoded ned to change to my fgeatures + print(self.true_x) """if self.features_set=='simple': pass # if self.features_set=='full': @@ -513,6 +546,7 @@ def __init__(self, features_set, sampling_period, inp: VarFeeder, features: Iter self.encoder_features_depth = self.time_x.shape[2].value print('self.encoder_features_depth',self.encoder_features_depth) + print('self.time_x.shape',self.time_x.shape) def load_vars(self, session): self.inp.restore(session) diff --git a/make_features.py b/make_features.py index 170d183..f8db776 100755 --- a/make_features.py +++ b/make_features.py @@ -270,7 +270,10 @@ def normalize(values: np.ndarray): -def encode_fixed_date_holidays__daily(dates_series): + + + +def get_fixed_date_holidays__daily(dates_series, month_day): """ Encode holidays and shoulder days, for holidays that occur yearly on fixed dates. @@ -289,23 +292,25 @@ def encode_fixed_date_holidays__daily(dates_series): # ============================================================================= # MOVING holidays [variable date] # ============================================================================= -def encode_thanksgiving__daily(dates_series): +def get_thanksgivings__daily(dates_series): """ - Encode Thanksgiving holiday and shoulder days. - For daily sampled data only. + Get Thanksgiving holiday dates within the few years time range """ # 4th Thurs of Novmber... # if (month==11) and (dayofweek=='Thurs') and (22<=dayofmonth<=28) - return dates_series + thanksgiving_dates = [] + #... + return thanksgiving_dates -def encode_easter__daily(dates_series): +def get_Easters__daily(dates_series): """ - Encode Easter holiday and shoulder days. - For daily sampled data only. + Get Easter holiday dates within the few years time range """ - return dates_series + easter_dates = [] + #... + return easter_dates + -#Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day def encode_custom_dates__daily(dates_series,dates_list): """ @@ -319,6 +324,78 @@ def encode_custom_dates__daily(dates_series,dates_list): return dates_series +def encode_all_holidays__daily(dates_series): + """ + Encode all fixed and moving holidays, and corresponding holiday shoulders. + Intended for daily sampled data only. + """ + + def spiral_encoding(dates_series, holiday_date, shoulder): + """ + Encode holiday and shoulders as a spiral: + Rotation over 2pi, with radius goes from 0 to 1 [on holiday] back to 0 + """ + Ndays = len(dates_series) + r = np.zeros(Ndays) + r[holiday_date] = 1. + r[holiday_date-shoulder:holiday_date] = np.linspace(0., 1., shoulder) #!!!!!!! + r[holiday_date+1:holiday_date+shoulder+1] = np.linspace(1., 0., shoulder)#!!!!!!! + theta = np.zeros(Ndays) + theta[holiday_date-shoulder:holiday_date+shoulder+1] = (np.pi/(2.*shoulder + 1))*np.linspace(0., 1., 2*shoulder+1) #!!!!!!! + holiday_encoding = np.vstack((r*np.cos(theta), r*np.sin(theta))) + return holiday_encoding + + Ndays = len(dates_series) + + #Fixed Holidays [add other international ones as needed]: + xmas_dates = get_fixed_date_holidays__daily(dates_series, '12-25') + new_years_dates = get_fixed_date_holidays__daily(dates_series, '01-01') + july4_dates = get_fixed_date_holidays__daily(dates_series, '07-04') + halloween_dates = get_fixed_date_holidays__daily(dates_series, '10-31') + cincodemayo_dates = get_fixed_date_holidays__daily(dates_series, '05-05') + valentines_dates = get_fixed_date_holidays__daily(dates_series, '02-14') + veterans_dates = get_fixed_date_holidays__daily(dates_series, '11-11') + #taxday_dates = get_fixed_date_holidays__daily(dates_series, '04-15') + + + #Rule Based Moving Holidays + thanksgiving_dates = get_thanksgivings__daily(dates_series) + easter_dates = get_Easters__daily(dates_series) + #... Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day + #Custom / Single Event moving Holidays + suberbowl_dates = ['2014-2-2','2015-2-1','2016-2-7','2017-2-5','2018-2-4','2019-2-3'] + + #Dict of holiday dates: shoulder halfwidth [-S, -S+1, ..., holiday, holiday+1, ..., holiday+S] + #for now just use 3 as the shoulder width for all "major" holidays, 0 or 1 for "minor" holidays + #Use ODD numbers for shoulder sizes + holidays = {xmas_dates:3, + new_years_dates:3, + july4_dates:1, + halloween_dates:1, + cincodemayo_dates:1, + valentines_dates:1, + veterans_dates:1, + + thanksgiving_dates:3, + easter_dates:1, + + suberbowl_dates:1, + } + + #Assume additive holiday effects: (which should almost never matter anyway + #for small shoulders unless there is overlap beteen some holidays. E.g. with shoulder=3, + #Christmas and New Year's do NOT overlap.) + _ = np.zeros((2,Ndays)) + encoded_holidays = pd.DataFrame(_,index=date_series) + #Iterate through each holiday, accumulating the effect: + for hd, shoulder in holidays.items(): + #Since date series is potentially over few years, could have e.g. several Christmas furing that time range + for holiday_date in hd: + holiday_encoding = spiral_encoding(dates_series, holiday_date, shoulder) + xxxxx += holiday_encoding + return encoded_holidays + + def run(): parser = argparse.ArgumentParser(description='Prepare data') diff --git a/model.py b/model.py index 81cc5d8..6856988 100755 --- a/model.py +++ b/model.py @@ -430,11 +430,13 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a # Compress encoder outputs enc_readout = compressed_readout(encoder_output, hparams, dropout=hparams.encoder_readout_dropout if is_train else 1.0, seed=seed) + # Calculate fingerprint from input features - fingerprint_inp = tf.concat([inp.lagged_x, tf.expand_dims(inp.norm_x, -1)], axis=-1) - fingerprint = make_fingerprint(fingerprint_inp, is_train, hparams.fingerprint_fc_dropout, seed) - # Calculate attention vector - attn_features, attn_weights = attn_readout_v3(enc_readout, inp.attn_window, hparams.attention_heads, + if hparams.use_attn: + fingerprint_inp = tf.concat([inp.lagged_x, tf.expand_dims(inp.norm_x, -1)], axis=-1) + fingerprint = make_fingerprint(fingerprint_inp, is_train, hparams.fingerprint_fc_dropout, seed) + # Calculate attention vector + attn_features, attn_weights = attn_readout_v3(enc_readout, inp.attn_window, hparams.attention_heads, fingerprint, seed=seed) # Run decoder diff --git a/trainer.py b/trainer.py index b2b2749..8ed1e28 100755 --- a/trainer.py +++ b/trainer.py @@ -267,133 +267,133 @@ def has_active(self): return len(self.active()) -class ModelTrainer: - def __init__(self, train_model, eval_model, model_no=0, summary_writer=None, keep_best=5, patience=None): - self.train_model = train_model - self.eval_model = eval_model - self.stopped = False - self.smooth_train_mae = Ema() - self.smooth_train_smape = Ema() - self.smooth_eval_mae = Ema(0.5) - self.smooth_eval_smape = Ema(0.5) - self.smooth_grad = Ema(0.9) - self.summary_writer = summary_writer - self.model_no = model_no - self.best_top_n_loss = [] - self.keep_best = keep_best - self.best_step = 0 - self.patience = patience - self.train_pipe = train_model.inp - self.eval_pipe = eval_model.inp - self.epoch_mae = [] - self.epoch_smape = [] - self.last_epoch = -1 - - @property - def train_ops(self): - model = self.train_model - return [model.train_op, model.update_ema, model.summaries, model.mae, model.smape, model.glob_norm] - - def process_train_results(self, run_results, offset, global_step, write_summary): - offset += 2 - summaries, mae, smape, glob_norm = run_results[offset:offset + 4] - results = self.smooth_train_mae(mae), self.smooth_train_smape(smape), self.smooth_grad(glob_norm) - if self.summary_writer and write_summary: - self.summary_writer.add_summary(summaries, global_step=global_step) - return np.array(results) - - @property - def eval_ops(self): - model = self.eval_model - return [model.mae, model.smape] - - @property - def eval_len(self): - return len(self.eval_ops) - - @property - def train_len(self): - return len(self.train_ops) - - @property - def best_top_loss(self): - return -np.array(self.best_top_n_loss).mean() - - @property - def best_epoch_mae(self): - return min(self.epoch_mae) if self.epoch_mae else np.NaN - - @property - def mean_epoch_mae(self): - return np.mean(self.epoch_mae) if self.epoch_mae else np.NaN - - @property - def mean_epoch_smape(self): - return np.mean(self.epoch_smape) if self.epoch_smape else np.NaN - - @property - def best_epoch_smape(self): - return min(self.epoch_smape) if self.epoch_smape else np.NaN - - def remember_for_epoch(self, epoch, mae, smape): - if epoch > self.last_epoch: - self.last_epoch = epoch - self.epoch_mae = [] - self.epoch_smape = [] - self.epoch_mae.append(mae) - self.epoch_smape.append(smape) - - @property - def best_epoch_metrics(self): - return np.array([self.best_epoch_mae, self.best_epoch_smape]) - - @property - def mean_epoch_metrics(self): - return np.array([self.mean_epoch_mae, self.mean_epoch_smape]) - - def process_eval_results(self, run_results, offset, global_step, epoch): - totals = np.zeros(self.eval_len, np.float) - for result in run_results: - items = np.array(result[offset:offset + self.eval_len]) - totals += items - results = totals / len(run_results) - mae, smape = results - if self.summary_writer and global_step > 200: - summary = tf.Summary(value=[ - tf.Summary.Value(tag=f"test/MAE_{self.model_no}", simple_value=mae), - tf.Summary.Value(tag=f"test/SMAPE_{self.model_no}", simple_value=smape), - ]) - self.summary_writer.add_summary(summary, global_step=global_step) - smooth_mae = self.smooth_eval_mae(mae) - smooth_smape = self.smooth_eval_smape(smape) - self.remember_for_epoch(epoch, mae, smape) - - current_loss = -smooth_smape - - prev_best_n = np.mean(self.best_top_n_loss) if self.best_top_n_loss else -np.inf - if self.best_top_n_loss: - log.debug("Current loss=%.3f, old best=%.3f, wait steps=%d", -current_loss, - -max(self.best_top_n_loss), global_step - self.best_step) - - if len(self.best_top_n_loss) >= self.keep_best: - heapq.heappushpop(self.best_top_n_loss, current_loss) - else: - heapq.heappush(self.best_top_n_loss, current_loss) - log.debug("Best loss=%.3f, top_5 avg loss=%.3f, top_5=%s", - -max(self.best_top_n_loss), -np.mean(self.best_top_n_loss), - ",".join(["%.3f" % -mae for mae in self.best_top_n_loss])) - new_best_n = np.mean(self.best_top_n_loss) - - new_best = new_best_n > prev_best_n - if new_best: - self.best_step = global_step - log.debug("New best step %d, current loss=%.3f", global_step, -current_loss) - else: - step_count = global_step - self.best_step - if step_count > self.patience: - self.stopped = True - - return mae, smape, new_best, smooth_mae, smooth_smape +#class ModelTrainer: +# def __init__(self, train_model, eval_model, model_no=0, summary_writer=None, keep_best=5, patience=None): +# self.train_model = train_model +# self.eval_model = eval_model +# self.stopped = False +# self.smooth_train_mae = Ema() +# self.smooth_train_smape = Ema() +# self.smooth_eval_mae = Ema(0.5) +# self.smooth_eval_smape = Ema(0.5) +# self.smooth_grad = Ema(0.9) +# self.summary_writer = summary_writer +# self.model_no = model_no +# self.best_top_n_loss = [] +# self.keep_best = keep_best +# self.best_step = 0 +# self.patience = patience +# self.train_pipe = train_model.inp +# self.eval_pipe = eval_model.inp +# self.epoch_mae = [] +# self.epoch_smape = [] +# self.last_epoch = -1 +# +# @property +# def train_ops(self): +# model = self.train_model +# return [model.train_op, model.update_ema, model.summaries, model.mae, model.smape, model.glob_norm] +# +# def process_train_results(self, run_results, offset, global_step, write_summary): +# offset += 2 +# summaries, mae, smape, glob_norm = run_results[offset:offset + 4] +# results = self.smooth_train_mae(mae), self.smooth_train_smape(smape), self.smooth_grad(glob_norm) +# if self.summary_writer and write_summary: +# self.summary_writer.add_summary(summaries, global_step=global_step) +# return np.array(results) +# +# @property +# def eval_ops(self): +# model = self.eval_model +# return [model.mae, model.smape] +# +# @property +# def eval_len(self): +# return len(self.eval_ops) +# +# @property +# def train_len(self): +# return len(self.train_ops) +# +# @property +# def best_top_loss(self): +# return -np.array(self.best_top_n_loss).mean() +# +# @property +# def best_epoch_mae(self): +# return min(self.epoch_mae) if self.epoch_mae else np.NaN +# +# @property +# def mean_epoch_mae(self): +# return np.mean(self.epoch_mae) if self.epoch_mae else np.NaN +# +# @property +# def mean_epoch_smape(self): +# return np.mean(self.epoch_smape) if self.epoch_smape else np.NaN +# +# @property +# def best_epoch_smape(self): +# return min(self.epoch_smape) if self.epoch_smape else np.NaN +# +# def remember_for_epoch(self, epoch, mae, smape): +# if epoch > self.last_epoch: +# self.last_epoch = epoch +# self.epoch_mae = [] +# self.epoch_smape = [] +# self.epoch_mae.append(mae) +# self.epoch_smape.append(smape) +# +# @property +# def best_epoch_metrics(self): +# return np.array([self.best_epoch_mae, self.best_epoch_smape]) +# +# @property +# def mean_epoch_metrics(self): +# return np.array([self.mean_epoch_mae, self.mean_epoch_smape]) +# +# def process_eval_results(self, run_results, offset, global_step, epoch): +# totals = np.zeros(self.eval_len, np.float) +# for result in run_results: +# items = np.array(result[offset:offset + self.eval_len]) +# totals += items +# results = totals / len(run_results) +# mae, smape = results +# if self.summary_writer and global_step > 200: +# summary = tf.Summary(value=[ +# tf.Summary.Value(tag=f"test/MAE_{self.model_no}", simple_value=mae), +# tf.Summary.Value(tag=f"test/SMAPE_{self.model_no}", simple_value=smape), +# ]) +# self.summary_writer.add_summary(summary, global_step=global_step) +# smooth_mae = self.smooth_eval_mae(mae) +# smooth_smape = self.smooth_eval_smape(smape) +# self.remember_for_epoch(epoch, mae, smape) +# +# current_loss = -smooth_smape +# +# prev_best_n = np.mean(self.best_top_n_loss) if self.best_top_n_loss else -np.inf +# if self.best_top_n_loss: +# log.debug("Current loss=%.3f, old best=%.3f, wait steps=%d", -current_loss, +# -max(self.best_top_n_loss), global_step - self.best_step) +# +# if len(self.best_top_n_loss) >= self.keep_best: +# heapq.heappushpop(self.best_top_n_loss, current_loss) +# else: +# heapq.heappush(self.best_top_n_loss, current_loss) +# log.debug("Best loss=%.3f, top_5 avg loss=%.3f, top_5=%s", +# -max(self.best_top_n_loss), -np.mean(self.best_top_n_loss), +# ",".join(["%.3f" % -mae for mae in self.best_top_n_loss])) +# new_best_n = np.mean(self.best_top_n_loss) +# +# new_best = new_best_n > prev_best_n +# if new_best: +# self.best_step = global_step +# log.debug("New best step %d, current loss=%.3f", global_step, -current_loss) +# else: +# step_count = global_step - self.best_step +# if step_count > self.patience: +# self.stopped = True +# +# return mae, smape, new_best, smooth_mae, smooth_smape def train(features_set, sampling_period, name, hparams, multi_gpu=False, n_models=1, train_completeness_threshold=0.01, From 984a84815f300444e17a6c685935c1cb3b8a27fd Mon Sep 17 00:00:00 2001 From: gk Date: Tue, 24 Jul 2018 16:46:18 -0700 Subject: [PATCH 25/42] finished basic holiday encoding except thxgiving, easter --- Readme.md | 12 ++-- holiday_features.py | 163 ++++++++++++++++++++++++++++++++++++++++++++ input_pipe.py | 20 +++--- make_features.py | 148 ++++++---------------------------------- 4 files changed, 200 insertions(+), 143 deletions(-) create mode 100644 holiday_features.py diff --git a/Readme.md b/Readme.md index ca66990..412d7d0 100755 --- a/Readme.md +++ b/Readme.md @@ -62,9 +62,9 @@ ll data/ python3 make_features.py data/vars ours daily full --add_days=50 #python3 make_features.py data/vars kaggle daily full --add_days=63 -python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=10000 --patience=5 --horizon_window_size=50 --history_window_size=100 --max_epoch=10 - +python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_decay=0.99 --max_steps=11500 --save_from_step=100 --patience=5 --max_epoch=10 +--horizon_window_size=50 --history_window_size=100 @@ -103,14 +103,14 @@ python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_de To do: -0. print out the SMAPE for the actual data [current is doing SMAPE of the unrounded log1p(data) which will likely be much smaller than for real] 0. SMAPEs on ground truth 2018 1. why encoder_state NANs in it for small train window lengths [is it train/predict window completeness thresholds?] 1. performance heatmaps 2. for weekly. monthly inputs, need to change few places in tensorflow code 3. Prediction intervals -4. Architecture improvements: his is not the usual encoder-decoder: add C context vector to every decoder step -4. bi, di, MH -5. custom attention +4. Architecture improvements: bi enc, dil +4. K step recursive as hybrid of 1step recursive and K step direct +4. MLP direct multihorizon +5. custom attention [e.g. position specific] 6. VAE aug \ No newline at end of file diff --git a/holiday_features.py b/holiday_features.py new file mode 100644 index 0000000..7d9723d --- /dev/null +++ b/holiday_features.py @@ -0,0 +1,163 @@ +#Define few functions to create holiday features from the time series +#For now, these are only intended to ork with DAILY sampled data + +import pandas as pd +import numpy as np + + + + +def encode_all_holidays__daily(dates_range): + """ + Encode all fixed and moving holidays, and corresponding holiday shoulders. + Intended for daily sampled data only. + """ + + + def get_fixed_date_holidays__daily(dates_range, month_day): + """ + Get YYYY-mm-DD holidays, + for holidays that occur yearly on fixed dates. + + For daily sampled data only. + + In USA: + Christmas, New Year, 4th of July, Halloween, Cinco de Mayo + Valentine's Day, Veteran's Day + + other international: + ... + """ +# return ['{}-{:02d}-{:02d}'.format(i.year,i.month,i.day) for i in dates_range if ((i.month==int(month_day[:2])) and (i.day==int(month_day[4:])))] +# print([(i.month, i.day) for i in dates_range]) +# print([i for i in dates_range if ((i.month==int(month_day[:2])) and (i.day==int(month_day[4:])))]) + return [i.strftime('%Y-%m-%d') for i in dates_range if ((i.month==int(month_day[:2])) and (i.day==int(month_day[3:])))] + + # ============================================================================= + # MOVING holidays [variable date] + # ============================================================================= + def get_thanksgivings__daily(dates_range): + """ + Get Thanksgiving holiday dates within the few years time range + """ + # 4th Thurs of Novmber... + # if (month==11) and (dayofweek=='Thurs') and (22<=dayofmonth<=28) + thanksgiving_dates = [] + #... + return thanksgiving_dates + + def get_Easters__daily(dates_range): + """ + Get Easter holiday dates within the few years time range + """ + easter_dates = [] + #... + return easter_dates + +# def encode_custom_dates__daily(dates_range,dates_list): +# """ +# Encode custom days and optionally shoulder days. +# For daily sampled data only. +# +# E.g. Superbowl Sunday +# suberbowl_dates = ['2014-02-02','2015-02-01','2016-02-07','2017-02-05','2018-02-04','2019-02-03'] +# shoulders = [...] +# """ +# return dates_range + + def spiral_encoding(dates_range, holiday_date, shoulder): + """ + Encode holiday and shoulders as a spiral: + Rotation over 2pi, with radius goes from 0 to 1 [on holiday] back to 0 + """ + N_real_days = len(dates_range) + real_min = min(dates_range) + real_max = max(dates_range) + dates_range_padded = pd.date_range(real_min-shoulder-2, real_max+shoulder+2, freq='D') +# print(dates_range) +# print(dates_range_padded) + + df = pd.DataFrame() + df['date'] = dates_range_padded.values + Ndays = len(df) + +# print(holiday_date) + _ = df.loc[df['date']==holiday_date] + if len(_)>0: + ind = _.index.values[0] + #If this holiday is completely out of bounds of the time series input, + #ignore it [assumed additive holiday effects, so just add 0's] + else: + return np.zeros((N_real_days,2)) + + #For radius: triangle kernel centered on holiday + r = np.zeros(Ndays) + r[ind-shoulder-1:ind+1] = np.linspace(0.,1.,shoulder+2) + r[ind:ind+shoulder+2] = np.linspace(1.,0.,shoulder+2) + + #For anlge: go from phase [0,pi], with holiday at pi/2 + theta = np.zeros(Ndays) + theta[ind-shoulder-1:ind+shoulder+2] = np.linspace(0., np.pi, 2*shoulder+3) + #Convert to Cartesian: + df['r'] = r + df['theta'] = theta + df['x'] = df['r']*np.cos(df['theta']) + df['y'] = df['r']*np.sin(df['theta']) + v = df[((df['date']>=real_min) & (df['date']<=real_max))] + v = v[['x','y']].values +# print(v, v.sum(axis=0), v.sum(axis=1)) + return v + + + + Ndays = len(dates_range) + + #Fixed Holidays [add other international ones as needed]: + xmas_dates = get_fixed_date_holidays__daily(dates_range, '12-25') + new_years_dates = get_fixed_date_holidays__daily(dates_range, '01-01') + july4_dates = get_fixed_date_holidays__daily(dates_range, '07-04') + halloween_dates = get_fixed_date_holidays__daily(dates_range, '10-31') + cincodemayo_dates = get_fixed_date_holidays__daily(dates_range, '05-05') + valentines_dates = get_fixed_date_holidays__daily(dates_range, '02-14') + veterans_dates = get_fixed_date_holidays__daily(dates_range, '11-11') + #taxday_dates = get_fixed_date_holidays__daily(dates_range, '04-15') + + + #Rule Based Moving Holidays + thanksgiving_dates = get_thanksgivings__daily(dates_range) + easter_dates = get_Easters__daily(dates_range) + #... Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day + #Custom / Single Event moving Holidays + suberbowl_dates = ['2014-02-02','2015-02-01','2016-02-07','2017-02-05','2018-02-04','2019-02-03'] + + #Dict of holiday dates: shoulder halfwidth [-S, -S+1, ..., holiday, holiday+1, ..., holiday+S] + #for now just use 3 as the shoulder width for all "major" holidays, 0 or 1 for "minor" holidays + #Use ODD numbers for shoulder sizes + holidays = {'xmas_dates':(xmas_dates,3), + 'new_years_dates':(new_years_dates,3), + 'july4_dates':(july4_dates,1), + 'halloween_dates':(halloween_dates,1), + 'cincodemayo_dates':(cincodemayo_dates,1), + 'valentines_dates':(valentines_dates,1), + 'veterans_dates':(veterans_dates,1), + 'thanksgiving_dates':(thanksgiving_dates,3), + 'easter_dates':(easter_dates,1), + 'suberbowl_dates':(suberbowl_dates,1), + } +# print(holidays) + + + #Assume additive holiday effects: (which should almost never matter anyway + #for small shoulders unless there is overlap beteen some holidays. E.g. with shoulder=3, + #Christmas and New Year's do NOT overlap.) +# encoded_holidays = pd.DataFrame() +# encoded_holidays['date'] = dates_range.values + _ = np.zeros((Ndays,2)) + #Iterate through each holiday, accumulating the effect: + for mmm in holidays.values(): + shoulder = mmm[1] + #Since date series is potentially over few years, could have e.g. several Christmas furing that time range + for hd in mmm[0]: + _ += spiral_encoding(dates_range, hd, shoulder) +# print(_) + return _ \ No newline at end of file diff --git a/input_pipe.py b/input_pipe.py index a1a7d0e..e8d2787 100755 --- a/input_pipe.py +++ b/input_pipe.py @@ -131,7 +131,7 @@ def cut(self, counts, start, end): :param counts: counts timeseries :param start: start index :param end: end index - :return: tuple (train_counts, test_counts, lagged_counts, [dow,woy,moy,year]) + :return: tuple (train_counts, test_counts, lagged_counts, [subset of: dow,woy,moy,doy,year,holidays]) """ # Pad counts to ensure we have enough array length for prediction counts = tf.concat([counts, tf.fill([self.horizon_window_size], np.NaN)], axis=0) @@ -146,6 +146,8 @@ def cut(self, counts, start, end): if self.sampling_period=='daily': cropped_dow = self.inp.dow[start:end] cropped_woy = self.inp.woy[start:end] + cropped_doy = self.inp.doy[start:end] + cropped_holidays = self.inp.holidays[start:end] # cropped_moy = 0*cropped_dow #Month information is alreayd contained in week information. COuld incude anyway to be explicit, but for now do not use as a feature elif self.sampling_period=='weekly': cropped_woy = self.inp.woy[start:end] @@ -195,7 +197,7 @@ def cut(self, counts, start, end): if self.features_set=='arturius' or self.features_set=='full':#for now, for full just do sam [include lagged] if self.sampling_period=='daily': - return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy, cropped_year + return x_counts, y_counts, lagged_count, cropped_dow, cropped_woy, cropped_doy, cropped_year, cropped_holidays if self.sampling_period=='weekly': return x_counts, y_counts, lagged_count, cropped_woy, cropped_year if self.sampling_period=='monthly': @@ -276,7 +278,7 @@ def make_features(self, *args): print(args) if self.features_set == 'arturius': if self.sampling_period == 'daily': - x_counts, y_counts, lagged_counts, dow, woy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args + x_counts, y_counts, lagged_counts, dow, woy, doy, year, holidays, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args elif self.sampling_period == 'weekly': x_counts, y_counts, lagged_counts, woy, year, pf_agent, pf_country, pf_site, page_ix, count_median, year_autocorr, quarter_autocorr, count_pctl_100 = args elif self.sampling_period == 'monthly': @@ -285,7 +287,7 @@ def make_features(self, *args): # count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance) elif self.features_set == 'full': if self.sampling_period == 'daily': - x_counts, y_counts, lagged_counts, dow, woy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\ + x_counts, y_counts, lagged_counts, dow, woy, doy, year, holidays, page_ix, count_median, year_autocorr, quarter_autocorr,\ count_pctl_0, count_pctl_5, count_pctl_25, count_pctl_75, count_pctl_95, count_pctl_100, count_variance = args elif self.sampling_period == 'weekly': x_counts, y_counts, lagged_counts, woy, year, page_ix, count_median, year_autocorr, quarter_autocorr,\ @@ -299,7 +301,9 @@ def make_features(self, *args): # ============================================================================= if self.sampling_period == 'daily': x_dow, y_dow = tf.split(dow, [self.history_window_size, self.horizon_window_size], axis=0) - x_woy, y_woy = tf.split(woy, [self.history_window_size, self.horizon_window_size], axis=0) #need to see how to fit in woy into inputs to this func + x_woy, y_woy = tf.split(woy, [self.history_window_size, self.horizon_window_size], axis=0) + x_doy, y_doy = tf.split(doy, [self.history_window_size, self.horizon_window_size], axis=0) + x_holidays, y_holidays = tf.split(holidays, [self.history_window_size, self.horizon_window_size], axis=0) elif self.sampling_period == 'weekly': x_woy, y_woy = tf.split(woy, [self.history_window_size, self.horizon_window_size], axis=0) elif self.sampling_period == 'monthly': @@ -355,7 +359,7 @@ def make_features(self, *args): # Train features, depending on measurement frequency x_features = tf.expand_dims(norm_x_counts, -1) # [n_timesteps] -> [n_timesteps, 1] if self.sampling_period == 'daily': - x_features = tf.concat([x_features, x_dow, x_woy, x_year], axis=1) + x_features = tf.concat([x_features, x_dow, x_woy, tf.expand_dims(x_doy,-1), x_year, x_holidays], axis=1) elif self.sampling_period == 'weekly': x_features = tf.concat([x_features, x_woy, x_year], axis=1) elif self.sampling_period == 'monthly': @@ -368,7 +372,7 @@ def make_features(self, *args): # Test features if self.sampling_period == 'daily': - y_features = tf.concat([y_dow, y_woy, y_year], axis=1) + y_features = tf.concat([y_dow, y_woy, tf.expand_dims(y_doy,-1), y_year, y_holidays], axis=1) elif self.sampling_period == 'weekly': y_features = tf.concat([y_woy, y_year], axis=1) elif self.sampling_period == 'monthly': @@ -561,7 +565,7 @@ def page_features(inp: VarFeeder, features_set): So do not need to pass in here the time-varying ones like day of week, month of year, lagged, etc. - DO NOT return dow, woy, moy, year + DO NOT return dow, woy, moy, year, doy, holidays """ if features_set=='arturius': diff --git a/make_features.py b/make_features.py index f8db776..5940cb0 100755 --- a/make_features.py +++ b/make_features.py @@ -9,6 +9,8 @@ import numba from typing import Tuple, Dict, Collection, List +from holiday_features import encode_all_holidays__daily + def read_cached(name) -> pd.DataFrame: """ @@ -270,133 +272,6 @@ def normalize(values: np.ndarray): - - - -def get_fixed_date_holidays__daily(dates_series, month_day): - """ - Encode holidays and shoulder days, for holidays that occur yearly on fixed - dates. - For daily sampled data only. - - In USA: - Christmas, New Year, 4th of July, Halloween, Cinco de Mayo - Valentine's Day, Veteran's Day - - other international: - ... - """ - return dates_series - - -# ============================================================================= -# MOVING holidays [variable date] -# ============================================================================= -def get_thanksgivings__daily(dates_series): - """ - Get Thanksgiving holiday dates within the few years time range - """ -# 4th Thurs of Novmber... -# if (month==11) and (dayofweek=='Thurs') and (22<=dayofmonth<=28) - thanksgiving_dates = [] - #... - return thanksgiving_dates - -def get_Easters__daily(dates_series): - """ - Get Easter holiday dates within the few years time range - """ - easter_dates = [] - #... - return easter_dates - - - -def encode_custom_dates__daily(dates_series,dates_list): - """ - Encode custom days and optionally shoulder days. - For daily sampled data only. - - E.g. Superbowl Sunday - suberbowl_dates = ['2014-2-2','2015-2-1','2016-2-7','2017-2-5','2018-2-4','2019-2-3'] - shoulders = [...] - """ - return dates_series - - -def encode_all_holidays__daily(dates_series): - """ - Encode all fixed and moving holidays, and corresponding holiday shoulders. - Intended for daily sampled data only. - """ - - def spiral_encoding(dates_series, holiday_date, shoulder): - """ - Encode holiday and shoulders as a spiral: - Rotation over 2pi, with radius goes from 0 to 1 [on holiday] back to 0 - """ - Ndays = len(dates_series) - r = np.zeros(Ndays) - r[holiday_date] = 1. - r[holiday_date-shoulder:holiday_date] = np.linspace(0., 1., shoulder) #!!!!!!! - r[holiday_date+1:holiday_date+shoulder+1] = np.linspace(1., 0., shoulder)#!!!!!!! - theta = np.zeros(Ndays) - theta[holiday_date-shoulder:holiday_date+shoulder+1] = (np.pi/(2.*shoulder + 1))*np.linspace(0., 1., 2*shoulder+1) #!!!!!!! - holiday_encoding = np.vstack((r*np.cos(theta), r*np.sin(theta))) - return holiday_encoding - - Ndays = len(dates_series) - - #Fixed Holidays [add other international ones as needed]: - xmas_dates = get_fixed_date_holidays__daily(dates_series, '12-25') - new_years_dates = get_fixed_date_holidays__daily(dates_series, '01-01') - july4_dates = get_fixed_date_holidays__daily(dates_series, '07-04') - halloween_dates = get_fixed_date_holidays__daily(dates_series, '10-31') - cincodemayo_dates = get_fixed_date_holidays__daily(dates_series, '05-05') - valentines_dates = get_fixed_date_holidays__daily(dates_series, '02-14') - veterans_dates = get_fixed_date_holidays__daily(dates_series, '11-11') - #taxday_dates = get_fixed_date_holidays__daily(dates_series, '04-15') - - - #Rule Based Moving Holidays - thanksgiving_dates = get_thanksgivings__daily(dates_series) - easter_dates = get_Easters__daily(dates_series) - #... Labor Day, Memorial Day, President's Day, MLK Day, Columbus Day, Tax Day - #Custom / Single Event moving Holidays - suberbowl_dates = ['2014-2-2','2015-2-1','2016-2-7','2017-2-5','2018-2-4','2019-2-3'] - - #Dict of holiday dates: shoulder halfwidth [-S, -S+1, ..., holiday, holiday+1, ..., holiday+S] - #for now just use 3 as the shoulder width for all "major" holidays, 0 or 1 for "minor" holidays - #Use ODD numbers for shoulder sizes - holidays = {xmas_dates:3, - new_years_dates:3, - july4_dates:1, - halloween_dates:1, - cincodemayo_dates:1, - valentines_dates:1, - veterans_dates:1, - - thanksgiving_dates:3, - easter_dates:1, - - suberbowl_dates:1, - } - - #Assume additive holiday effects: (which should almost never matter anyway - #for small shoulders unless there is overlap beteen some holidays. E.g. with shoulder=3, - #Christmas and New Year's do NOT overlap.) - _ = np.zeros((2,Ndays)) - encoded_holidays = pd.DataFrame(_,index=date_series) - #Iterate through each holiday, accumulating the effect: - for hd, shoulder in holidays.items(): - #Since date series is potentially over few years, could have e.g. several Christmas furing that time range - for holiday_date in hd: - holiday_encoding = spiral_encoding(dates_series, holiday_date, shoulder) - xxxxx += holiday_encoding - return encoded_holidays - - - def run(): parser = argparse.ArgumentParser(description='Prepare data') parser.add_argument('data_dir') @@ -500,6 +375,13 @@ def run(): year_period = WEEK_NUMBER_MAX / (2 * np.pi) #!!!! need to be carefuly non-uniform weeks [52 has 10 days ???] ---> actually in pandas numbering goes to 53, depending on start day of week for that year woy_norm = week / year_period #not sure if by default this starts on Monday vs Sunday woy = np.stack([np.cos(woy_norm), np.sin(woy_norm)], axis=-1) + #Also day of year number. Do not do same circle encoding, just let it be usual ordinal. + #Also, careful w leapyear. After February, year's w it would be out of phase vs. years w/o leap year + #Instead, could leave a gap for leapyear. If that particular year has it, fill it in with that ordinal, + #otherwise the model just does not have that index. + doy = features_times.dayofyear.values + #If not doing the circle encoding, then normalize: + doy = normalize(doy) if args.sampling_period=='weekly': @@ -525,6 +407,14 @@ def run(): year = (features_times.year - REFERENCE_FIRST_YEAR)/float(REFERENCE_LAST_YEAR-REFERENCE_FIRST_YEAR) + #Holidays: try my "spiral encoding": + #Right now only doing for daily sampled data: + if args.sampling_period=='daily': + holidays = encode_all_holidays__daily(features_times) + + + + # Assemble indices for quarterly lagged data lagged_ix = np.stack(lag_indexes(data_start, features_end), axis=-1) @@ -549,8 +439,6 @@ def run(): year_autocorr=year_autocorr, quarter_autocorr=quarter_autocorr, #dow=dow,#N x 2 array since encoded week periodicity as complex number - - #woy=woy,#!!!!!!!! count_pctl_100=percentiles[5],#max #!!!!!!!!!!!!!!!! just to see what happens: apend one of my features. ) @@ -595,6 +483,8 @@ def run(): if args.sampling_period=='daily': tensors['dow']=dow tensors['woy']=woy #and want want week number too, aggregating last ~10 days into week 52 + tensors['doy']=doy + tensors['holidays']=holidays elif args.sampling_period=='weekly': tensors['woy']=woy elif args.sampling_period=='monthly': From 1a23d99056104a01307cb9b313cf4eec6956af5a Mon Sep 17 00:00:00 2001 From: gk Date: Wed, 25 Jul 2018 00:34:41 -0700 Subject: [PATCH 26/42] finished K-step lookback - moderate SMAPE improvement --- Readme.md | 17 ++++++++++++++++- hparams.py | 6 ++++++ model.py | 53 ++++++++++++++++++++++++++++++++--------------------- trainer.py | 2 +- 4 files changed, 55 insertions(+), 23 deletions(-) diff --git a/Readme.md b/Readme.md index 412d7d0..56ea667 100755 --- a/Readme.md +++ b/Readme.md @@ -41,7 +41,17 @@ See also [detailed model description](how_it_works.md) ----------------------------------- -GK modifications for own data: +GK modifications for own forecasting application: + +1) Several architecture improvements: + - give encoded representation vector as context to every decoder timestep + - K step lookback: ideally the RNN would learn a hidden state representation that ~completely describes state of the system. In realiy, that is too much to expect. In addition to previous timestep prediction y_i-1, also feed in y_i-2,...,y_i-K for K-step lookback. + - performance analysis of validation set SMAPE as function of history/horizon window sizes [randomized uniformly in training over all min-max range of history/horizon window sizes] + - more in development +2) More features, relevant to my data. More focus on seasonalities, and "spiral encoding" for holidays. Automated data augmentation. +3) Dealing with holes/sparsity as in my data. + + 1. PREPROCESS.py - Maximize reuse of existing architecture: just put my data in exact same format as Kaggle competition csv's 2. $source activate gktf. #previously set up a conda environment w/ Python 3.6, tensorflow 1.4.0, to match same versions as Kaggle solution 3. $cd ..../kaggle-web-traffic @@ -102,6 +112,11 @@ python3 trainer.py full daily --name s32 --hparam_set=s32 --n_models=3 --asgd_de +#For doing performance analysis of SMAPE as function of history/horizon window sizes: +./RUN_MANY_TRAIN_VAL_WINDOWS + + +---------------------------------------------------------------------------------------------------------------------------------------------------------- To do: 0. SMAPEs on ground truth 2018 1. why encoder_state NANs in it for small train window lengths [is it train/predict window completeness thresholds?] diff --git a/hparams.py b/hparams.py index 4b28e7e..a899502 100755 --- a/hparams.py +++ b/hparams.py @@ -54,6 +54,12 @@ #we can more fairly assess performance over range of history/horizon windows: history_window_size_minmax=[7,365], horizon_window_size_minmax=[7,60], + + #Lookback K steps: [without specifying, default previous Kaggle setting is K=1]: + #for predicting y_i, insteda of just feeding in previous K=1 prediction (y_i-1), + #feed in all previous K predictions: y_ + LOOKBACK_K = 3, #!!!!Can NOT set this to be bigger than min history size (history_window_size_minmax[0]) + #since then depending on random draw would possibly need to look back further than history size. ) diff --git a/model.py b/model.py index 6856988..1aaab86 100755 --- a/model.py +++ b/model.py @@ -413,6 +413,8 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a self.hparams = hparams self.seed = seed self.inp = inp + self.lookback_K_actual = min(hparams.LOOKBACK_K, hparams.history_window_size_minmax[0]) + print('self.lookback_K_actual',self.lookback_K_actual) encoder_output, h_state, c_state = make_encoder(inp.time_x, inp.encoder_features_depth, is_train, hparams, seed, transpose_output=False) @@ -441,12 +443,10 @@ def __init__(self, inp: InputPipe, hparams, is_train, seed, graph_prefix=None, a # Run decoder #... = decoder(encoder_state, attn_features, prediction_inputs, previous_y) - print('inp.norm_x[:, -1]',inp.norm_x[:, -1]) - print('inp.time_y',inp.time_y) decoder_targets, decoder_outputs = self.decoder(encoder_state, attn_features if hparams.use_attn else None, summary_z if hparams.RECURSIVE_W_ENCODER_CONTEXT else None, - inp.time_y, inp.norm_x[:, -1]) #in decoder function def: inp.time_y = "prediction_inputs"; inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd)) + inp.time_y, inp.norm_x[:, -self.lookback_K_actual:]) #in decoder function def: inp.time_y = "prediction_inputs"; inp.norm_x[:, -1] = "previous_y" (i.e. the final x normalizd)) # Decoder activation losses dec_stab_loss = rnn_stability_loss(decoder_outputs, hparams.decoder_stability_loss / inp.horizon_window_size) dec_activation_loss = rnn_activation_loss(decoder_outputs, hparams.decoder_activation_loss / inp.horizon_window_size) @@ -493,7 +493,7 @@ def decoder(self, encoder_state, attn_features, summary_z, prediction_inputs, pr """ :param encoder_state: shape [batch_size, encoder_rnn_depth] :param prediction_inputs: features for prediction days, tensor[batch_size, time, input_depth] - :param previous_y: Last day pageviews, shape [batch_size] + :param previous_y: Last day pageviews, shape [batch_size, self.lookback_K_actual] :param attn_features: Additional features from attention layer, shape [batch, horizon_window_size, readout_depth*n_heads] :return: decoder rnn output """ @@ -511,7 +511,7 @@ def build_cell(idx): attn_depth = attn_features.shape[-1].value if attn_features is not None else 0 context_depth = summary_z.shape[-1].value if self.hparams.RECURSIVE_W_ENCODER_CONTEXT is not None else 0 #Should just be the encoder RNN depth print('attn_depth',attn_depth, 'context_depth',context_depth) - input_size = attn_depth + context_depth + prediction_inputs.shape[-1].value + 1 if idx == 0 else self.hparams.rnn_depth + input_size = attn_depth + context_depth + prediction_inputs.shape[-1].value + self.lookback_K_actual if idx == 0 else self.hparams.rnn_depth input_size = tf.Print(input_size, ['attn_depth',tf.shape(attn_depth),attn_depth, 'context_depth',tf.shape(context_depth),context_depth, 'input_size',tf.shape(input_size),input_size])#!!!!!!!!!! cell = rnn.DropoutWrapper(cell, dtype=tf.float32, input_size=input_size, variational_recurrent=hparams.decoder_variational_dropout[idx], @@ -552,7 +552,9 @@ def build_cell(idx): # Stop condition for decoding loop def cond_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, array_outputs: tf.TensorArray): - return timestep < predict_timesteps + return timestep < predict_timesteps #If doing k2-step lookahead prediction for k2>1, possibly want to + #adjust condition to do appropriate n steps > predict_timesteps... and then combine predictions for those steps to get single prediction, + #e.g. by exponential weighting backward in time from this step. # FC projecting layer to get single predicted value from RNN output def project_output(tensor): @@ -563,7 +565,7 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar """ Main decoder loop :param timestep: timestep number - :param prev_output: Output(prediction) from previous step + :param prev_output: Output(prediction) from previous step --> from previous K steps: self.lookback_K_actual :param prev_state: RNN state tensor from previous step :param array_targets: Predictions, each step will append new value to this array :param array_outputs: Raw RNN outputs (for regularization losses) @@ -571,6 +573,8 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar """ # RNN inputs for current step features = inputs_by_time[timestep] +# print('features',features) +# print('previous_y',previous_y) # [batch, horizon_window_size, readout_depth * n_heads] -> [batch, readout_depth * n_heads] if attn_features is not None: @@ -578,19 +582,13 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar attn = attn_features[:, timestep, :] # Append previous predicted value + attention vector to input features next_input = tf.concat([prev_output, features, attn], axis=1) + else: - next_input = tf.concat([prev_output, features], axis=1) # Append previous predicted value to input features - + next_input = tf.concat([prev_output, features], axis=1) #If using more of a typical encoder-decoder, also have encoder context each time: if self.hparams.RECURSIVE_W_ENCODER_CONTEXT: next_input = tf.concat([next_input, summary_z], axis=1) #!!!!!!!!summary_z[-1] -# if self.hparams.encoder_rnn_layers == 1: -# next_input = tf.concat([next_input, summary_z], axis=1) #!!!!!!!!summary_z[-1] -# elif self.hparams.encoder_rnn_layers > 1: -# next_input = tf.concat([next_input, summary_z[-1]], axis=1) #!!!!!!!!summary_z[-1] -# next_input = tf.Print(next_input,['next_input',tf.shape(next_input),next_input]) - # Run RNN cell output, state = cell(next_input, prev_state) @@ -602,15 +600,28 @@ def loop_fn(timestep, prev_output, prev_state, array_targets: tf.TensorArray, ar if return_raw_outputs: array_outputs = array_outputs.write(timestep, output) array_targets = array_targets.write(timestep, projected_output) + + #Update prev_output + #(delete oldest left, append rightmost) + if self.lookback_K_actual > 1: + prev_output = prev_output[:,1:] #All examples in batch, exclude oldest output [leftmost oldest, rightmost most recent] +# print('prev_output',prev_output) +# print('projected_output',projected_output) + updated_outputs = tf.concat([prev_output,projected_output],axis=1) +# print('updated_outputs',updated_outputs) + elif self.lookback_K_actual==1: + updated_outputs = prev_output + # Increment timestep and return - return timestep + 1, projected_output, state, array_targets, array_outputs #!!!!!! quantiles: projected_output will be diff dims + return timestep + 1, updated_outputs, state, array_targets, array_outputs #!!!!!! quantiles: projected_output will be diff dims # Initial values for loop - loop_init = [tf.constant(0, dtype=tf.int32), - tf.expand_dims(previous_y, -1), - encoder_state, - tf.TensorArray(dtype=tf.float32, size=predict_timesteps), - tf.TensorArray(dtype=tf.float32, size=predict_timesteps) if return_raw_outputs else tf.constant(0)] #!!!!!!! size= ... x N_pctls + loop_init = [tf.constant(0, dtype=tf.int32), #timestep +# previous_y if self.lookback_K_actual > 1 else tf.expand_dims(previous_y, -1), #prev_output + previous_y, #prev_output + encoder_state, #prev_state + tf.TensorArray(dtype=tf.float32, size=predict_timesteps), #array_targets + tf.TensorArray(dtype=tf.float32, size=predict_timesteps) if return_raw_outputs else tf.constant(0)] #array_outputs #!!!!!!! size= ... x N_pctls # Run the loop _timestep, _projected_output, _state, targets_ta, outputs_ta = tf.while_loop(cond_fn, loop_fn, loop_init) diff --git a/trainer.py b/trainer.py index 8ed1e28..108ebc1 100755 --- a/trainer.py +++ b/trainer.py @@ -624,7 +624,7 @@ def ema_vars(model): for _ in tqr: #!!!!!!!!!! Variable random length train predict windows #Random draw the train, predict window lengths - print(_) +# print(_) trainer = random_draw_history_and_horizon_window_sizes(trainer) # print('+++++++++++++++', [(TT.train_model.inp.history_window_size,TT.train_model.inp.horizon_window_size) for TT in trainer.trainers]) # print('--------', [(TT.train_model.inp.max_train_empty,TT.train_model.inp.max_predict_empty) for TT in trainer.trainers]) From 046ba10c64e07b64efa9bfb5460e32320af899da Mon Sep 17 00:00:00 2001 From: gk Date: Thu, 26 Jul 2018 10:53:56 -0700 Subject: [PATCH 27/42] starting on SMAPE heatmaps --- PERFORMANCE_HEATMAPS.py | 32 +++++++++++++++++++++ PREPROCESS.py | 8 ++++-- RUN_MANY_TRAIN_VAL_WINDOWS.sh | 53 ++++++++++++++++++++++++++++------- Readme.md | 4 +-- holiday_features.py | 38 +++++++++++++------------ model.py | 2 +- trainer.py | 32 +++++++++++++++++++-- 7 files changed, 133 insertions(+), 36 deletions(-) create mode 100644 PERFORMANCE_HEATMAPS.py diff --git a/PERFORMANCE_HEATMAPS.py b/PERFORMANCE_HEATMAPS.py new file mode 100644 index 0000000..9da96a9 --- /dev/null +++ b/PERFORMANCE_HEATMAPS.py @@ -0,0 +1,32 @@ +import numpy as np +import matplotlib.pyplot as plt +import argparse + + + + + +def make_heatmaps(logdir='data/logs', K_last=3): + #Load all saved numpy arrays of performance metrics per PREDICTION run: + all_runs = [] + eval_smapes_lastKmean = [] + array_names = [i for i in ssssss if i.endswith('epochs_performance.npy')] + run_names = [i.split('_')[0] for i in array_names] + for i, an in enumerate(array_names): + x = np.load(an) + #Get last K epoch metrics: + j = x[-K_last:] + eval_smapes_lastKmean.append(np.mean(j[:,5])) + all_runs.append(x) + + + + +if __name__=='__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--logdir', default='data/logs', help="Directory where numpy arrays of performance are") + parser.add_argument('--K_last', default=3, dest='K_last', help='Save out per EPOCH metrics (NOT per step, only per EPOCH') + args = parser.parse_args() + param_dict = dict(vars(args)) + + make_heatmaps(**param_dict) \ No newline at end of file diff --git a/PREPROCESS.py b/PREPROCESS.py index a103c04..c779cd4 100755 --- a/PREPROCESS.py +++ b/PREPROCESS.py @@ -403,7 +403,7 @@ def low_pass_filter(df, filter_type, kernel_size): -def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, start_date=None, end_date=None): +def format_like_Kaggle(df, myDataDir, imputation_method, sampling_period, do_augmentation, start_date=None, end_date=None): """ Take my data and format it exactly as needed to use for the Kaggle seq2seq model [requires making train_1.csv, train_2.csv, key_1.csv, key_2.csv] @@ -548,7 +548,8 @@ def make_index_col_left(df): dd = imputation_lagKmedian_single_series(dd,seasonality,N_seasons,OUT_OF_RANGE_FILL_VALUE) #Data augmentation - dd = data_augmentation(dd) + if do_augmentation: + dd = data_augmentation(dd) df_list.append(dd) @@ -641,6 +642,7 @@ def make_key_csv(df): END_DATE = '2017-12-31' #None REMOVE_ID_LIST = []#[3,4]#id's for locations that are no longer useful SAMPLING_PERIOD = 'daily' #'daily', 'weekly', 'monthly' + DO_AUGMENTATION = False #True RANDOM_SEED = None # ============================================================================= @@ -663,5 +665,5 @@ def make_key_csv(df): df = remove_cities(df,REMOVE_ID_LIST) #Put into same format as used by Kaggle, save out csv's - df = format_like_Kaggle(df, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, start_date=START_DATE, end_date=END_DATE) + df = format_like_Kaggle(df, myDataDir, IMPUTATION_METHOD, SAMPLING_PERIOD, DO_AUGMENTATION, start_date=START_DATE, end_date=END_DATE) diff --git a/RUN_MANY_TRAIN_VAL_WINDOWS.sh b/RUN_MANY_TRAIN_VAL_WINDOWS.sh index fd21a5f..e301e3e 100644 --- a/RUN_MANY_TRAIN_VAL_WINDOWS.sh +++ b/RUN_MANY_TRAIN_VAL_WINDOWS.sh @@ -3,13 +3,27 @@ #Run over many history_window_size - horizon_window_size length pairs #Compile results, analyze performance as (2D) heatmap +#At this point, models have been trained already. Trained by randomizing over +#range of history and horizon sizes [~train,validation phases]. +#Now hopefully the models are reasonably good across a range of values of +#history/horizon lengths. +#Now, assess performance (walk-forward SMAPE on test set) as a function of +#(fixed) history and horizon sizes. +#I.e. during training phase, the history and horizon are random variables that +#change randomly for every step of every batch. Vs. during inference, use +#fixed settings of history and horizon sizes and get an SMAPE value, then +#change the fixed history/horizon parameters and get another SMAPE value, etc., +#over a range of histories/horizons. This way we can see if the model does well +#on short series also. Of course we expect that as history->infinity and +#horizon->1, error will decrease. -#TRAIN_WINDOWS="1 2 5 10 20 50 100 150 200 250 300" -#VALIDATION_WINDOWS="1 2 5 10 20 50 100" -#e.g. TRAIN_WINDOWS has NAN SMAPE -> 2 problem with as big as size 50 -TRAIN_WINDOWS="100 150" -VALIDATION_WINDOWS="33 66" +#HISTORY_SIZES="1 2 5 10 20 50 100 150 200 250 300" +#HORIZON_SIZES="1 2 5 10 20 50 100" +#e.g. HISTORY_SIZES has NAN SMAPE -> 2 problem with as big as size 50 + +HISTORY_SIZES="100 150" +HORIZON_SIZES="33 66" #just to test... MAX_EPOCH=2 @@ -25,7 +39,11 @@ cd .. #ls -l data/ -for v in $VALIDATION_WINDOWS; do + + +#Now that all training is done, can run predictions +#python3 PREDICT.py !!!!!make window sizes as params +for v in $HORIZON_SIZES; do #Clea up between feature sets cd data rm -R vars/ @@ -35,7 +53,7 @@ for v in $VALIDATION_WINDOWS; do #Create the features for our data echo 'running make_features.py with --add_days='$v python3 make_features.py data/vars ours daily full --add_days=$v - for t in $TRAIN_WINDOWS; do + for t in $HISTORY_SIZES; do echo 'history window = '$t 'horizon window = '$v echo 'running trainer.py' NAME="val$v-train$t" @@ -46,7 +64,22 @@ done -#Now that all training is done, can run predictions -#python3 PREDICT.py !!!!!make window sizes as params +#from trainer.py, when have save_epochs_performance==True: +#format of saved "{logdir}/{name}_epochs_performance.np" numpy array is: +#2D array, dims = [epochs, 9] +#where epochs is number of epochs that successfully completed (