diff --git a/.gitignore b/.gitignore index b816aaa06..92e195be3 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,8 @@ dmypy.json # Pyre type checker .pyre/ + +backend/twitter_keys.py + +# Credentials +backend/creds.json diff --git a/backend/backend.py b/backend/backend.py new file mode 100644 index 000000000..040be9bad --- /dev/null +++ b/backend/backend.py @@ -0,0 +1,100 @@ +from stock_metadata import * +import tweepy +import time +from twitter_keys import * +from stock_metadata import company_and_ticks +from pprint import pprint +import re +import sys +# init server + +class TweetStreamListener(tweepy.StreamListener): + + def __init__(self): + self.backoff_timeout = 1 + super(TweetStreamListener,self).__init__() + self.query_string = list() + self.query_string.extend(list(company_and_ticks.keys())) + #self.query_string.extend(list(company_and_ticks.values())) + #self.query_string.remove("V") + + def on_status(self, status): + + #reset timeout + self.backoff_timeout = 1 + + #send message on namespace + tweet = self.construct_tweet(status) + if (tweet): + print(tweet) + + def on_error(self, status_code): + + # exp back-off if rate limit error + if status_code == 420: + time.sleep(self.backoff_timeout) + self.backoff_timeout *= 2 + return True + else: + print("Error {0} occurred".format(status_code)) + return False + + def construct_tweet(self, status): + try: + tweet_text = "" + if hasattr(status, 'retweeted_status') and hasattr(status.retweeted_status, 'extended_tweet'): + tweet_text = status.retweeted_status.extended_tweet['full_text'] + elif hasattr(status, 'full_text'): + tweet_text = status.full_text + elif hasattr(status, 'extended_tweet'): + tweet_text = status.extended_tweet['full_text'] + elif hasattr(status, 'quoted_status'): + if hasattr(status.quoted_status, 'extended_tweet'): + tweet_text = status.quoted_status.extended_tweet['full_text'] + else: + tweet_text = status.quoted_status.text + else: + tweet_text = status.text + tweet_data = dict() + for q_string in self.query_string: + if tweet_text.lower().find(q_string.lower()) != -1: + tweet_data = { + "text": TweetStreamListener.sanitize_text(tweet_text), + "tic": company_and_ticks[q_string], + "date": status.created_at + } + break + return tweet_data + except Exception as e: + print("Exception occur while parsing status object:", e) + + @staticmethod + def sanitize_text(tweet): + tweet = tweet.replace('\n', '').replace('"', '').replace('\'', '') + return re.sub(r"http\S+", "", tweet) + +class TwitterStreamer: + + def __init__(self): + self.twitter_api = None + self.__get_twitter_connection() + self.listener = TweetStreamListener() + self.tweet_stream = tweepy.Stream(auth=self.twitter_api.auth, listener=self.listener, tweet_mode='extended') + + def __get_twitter_connection(self): + try: + auth = tweepy.OAuthHandler(tw_access_key, tw_secret_key) + auth.set_access_token(tw_access_token, tw_access_token_secret) + self.twitter_api = tweepy.API(auth, wait_on_rate_limit=True) + except Exception as e: + print("Exception occurred : {0}".format(e)) + + def start_tweet_streaming(self): + # start stream to listen to company tweets + self.tweet_stream.filter(track=self.listener.query_string, languages=['en']) + +if __name__=="__main__": + + #init twitter connection + twitter_streamer = TwitterStreamer() + twitter_streamer.start_tweet_streaming() diff --git a/backend/companies.json b/backend/companies.json new file mode 100644 index 000000000..12457d9b6 --- /dev/null +++ b/backend/companies.json @@ -0,0 +1,32 @@ +{ + "MMM": "3M", + "AXP": "American Express", + "AMGN": "Amgen", + "AAPL": "Apple", + "BA": "Boeing", + "CAT": "Caterpillar", + "CVX": "Chevron", + "CSCO": "Cisco", + "KO": "Coca-Cola", + "DOW": "Dow", + "GS": "Goldman Sachs", + "HD": "Home Depot", + "HON": "Honeywell", + "IBM": "IBM", + "INTC": "Intel", + "JNJ": "Johnson & Johnson", + "JPM": "JPMorgan", + "MCD": "McDonald's", + "MRK": "Merck", + "MSFT": "Microsoft", + "NKE": "Nike", + "PG": "Procter & Gamble", + "CRM": "Salesforce", + "TRV": "The Travelers Companies", + "UNH": "UnitedHealth", + "VZ": "Verizon", + "V": "Visa", + "WBA": "Walgreens Boots Alliance", + "WMT": "Walmart", + "DIS": "Walt Disney" +} diff --git a/backend/stock_metadata.py b/backend/stock_metadata.py new file mode 100644 index 000000000..fddff5256 --- /dev/null +++ b/backend/stock_metadata.py @@ -0,0 +1,33 @@ +company_and_ticks = { + "3M Company": "MMM", + "American Express": "AXP", + "Amgen": "AMGN", + "Apple": "AAPL", + "Boeing": "BA", + "Caterpillar": "CAT", + "Chevron":"CVX", + "Cisco": "CSCO", + "Coca-Cola": "KO", + "Dow": "DOW", + "Goldman Sachs": "GS", + "Home Depot": "HD", + "Honeywell": "HON", + "IBM": "IBM", + "Intel": "INTC", + "Johnson & Johnson": "JNJ", + "JPMorgan": "JPM", + "McDonald": "MCD", + "Merck": "MRK", + "Microsoft": "MSFT", + "Nike": "NKE", + "Proctor & Gamble": "PG", + "Salesforce": "CRM", + "The Travelers Companies": "TRV", + "UnitedHealth": "UNH", + "Verizon": "VZ", + "Visa": "V", + "Walgreens Boots Alliance": "WBA", + "Walmart": "WMT", + "Disney": "DIS" +} + diff --git a/backend/stream_reddit.py b/backend/stream_reddit.py new file mode 100644 index 000000000..0815e3110 --- /dev/null +++ b/backend/stream_reddit.py @@ -0,0 +1,112 @@ +import argparse +import json +import math +import praw +import threading +import time + +from kafka import KafkaProducer + +redditClient = None + +class CommentsFetcher (threading.Thread): + die = False + sr_obj = None + companies = {} + def __init__(self, subreddit, companies, exit_on_fail=False, producer=None, topic=None): + threading.Thread.__init__(self) + self.name = 'fetch_comments_{0}'.format(subreddit) + self.companies = companies + self.exit_on_fail = exit_on_fail + self.producer = producer + self.topic = topic + lock = threading.RLock() + with lock: + self.sr_obj = redditClient.subreddit(subreddit) + + def run(self): + while not self.die: + try: + self.fetchComments() + except Exception as e: + if self.exit_on_fail: + raise + else: + print("Thread {1}, Error {0} occurred while streaming comments, continuing".format(e, self.name)) + + def join(self): + self.die = True + super().join() + + def fetchComments(self): + for comment in self.sr_obj.stream.comments(skip_existing=True, pause_after=5): + comment_text = comment.body.casefold() + for ticker in self.companies: + casefolded_company = self.companies[ticker].casefold() + if ('{0} '.format(ticker) in comment.body or + ' {0}'.format(ticker) in comment.body or + '{0} '.format(casefolded_company) in comment_text or + ' {0}'.format(casefolded_company) in comment_text): + comment_obj = { "ticker": ticker, "text": comment.body, "timestamp": math.ceil(time.time_ns()/1000000) } + self.output(comment_obj) + break + + def output(self, comment): + if self.producer is None: + print(comment) + else: + if self.topic is None: + raise ValueError("topic not supplied") + key = "{0}_{1}".format(comment["ticker"],comment["timestamp"]) + try: + key_bytes = bytes(key, encoding='utf-8') + value = json.dumps(comment_obj) + value_bytes = bytes(value, encoding='utf-8') + self.producer.send(self.topic, key=key_bytes, value=value_bytes) + except Exception as e: + print("Error {0} occurred while publishing message with key {1}".format(e, key)) + +if __name__=='__main__': + parser = argparse.ArgumentParser(description='Stream reddit comments to stdout or kafka topic') + parser.add_argument('-t', '--topic', metavar='', help='Kafka topic name') + parser.add_argument('-H', '--host', metavar='', default='localhost:9092', help='Hostname:port of bootstrap server') + args = parser.parse_args() + creds = json.loads(open("creds.json","r").read()) + redditClient = praw.Reddit(client_id=creds['client_id'], + client_secret=creds['client_secret'], + password=creds['password'], + user_agent=creds['user_agent'], + username=creds['username']) + + + subreddits = [sr.strip() for sr in open("subreddits","r").read().split(',')] + companies = json.loads(open("companies.json","r").read()) + + producer = None + if args.topic is not None: + producer = KafkaProducer(bootstrap_servers=[args.host], api_version=(0, 10)) + + # start fetch thread for every subreddit + fetch_threads = [] + for sr in subreddits: + th = CommentsFetcher(sr, companies, producer, args.topic) + th.start() + fetch_threads.append(th) + + try: + while True: + time.sleep(2) + except KeyboardInterrupt: + for th in fetch_threads: + th.join() + + +""" + +This module is responsible for + +Streaming comments + +Stream comments from reddit and write to specified source (stdout or kafka) + +""" diff --git a/backend/subreddits b/backend/subreddits new file mode 100644 index 000000000..7b011cc11 --- /dev/null +++ b/backend/subreddits @@ -0,0 +1 @@ +wallstreetbets,SecurityAnalysis,Finance,Options,Investing,Stocks,StockMarket diff --git a/config/config.py b/config/config.py index 28b85e030..4a3189286 100644 --- a/config/config.py +++ b/config/config.py @@ -18,7 +18,8 @@ # data #TRAINING_DATA_FILE = "data/ETF_SPY_2009_2020.csv" TRAINING_DATA_FILE = "data/dow_30_2009_2020.csv" - +# List of stock tickers +stock_tickers=['MMM','AXP','AMGN','AAPL','BA','CAT','CVX','CSCO','KO','DIS','DOW','GS','HD','HON','IBM','INTC','JNJ','JPM','MCD','MRK','MSFT','NKE','PG','CRM','TRV','UNH','VZ','V','WBA','WMT'] now = datetime.datetime.now() TRAINED_MODEL_DIR = f"trained_models/{now}" os.makedirs(TRAINED_MODEL_DIR) diff --git a/done_data.csv b/data/done_data.csv similarity index 100% rename from done_data.csv rename to data/done_data.csv diff --git a/env/EnvMultipleStock_trade.py b/env/EnvMultipleStock_trade.py deleted file mode 100644 index 38abff51c..000000000 --- a/env/EnvMultipleStock_trade.py +++ /dev/null @@ -1,255 +0,0 @@ -import numpy as np -import pandas as pd -from gym.utils import seeding -import gym -from gym import spaces -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import pickle - -# shares normalization factor -# 100 shares per trade -HMAX_NORMALIZE = 100 -# initial amount of money we have in our account -INITIAL_ACCOUNT_BALANCE=1000000 -# total number of stocks in our portfolio -STOCK_DIM = 30 -# transaction fee: 1/1000 reasonable percentage -TRANSACTION_FEE_PERCENT = 0.001 - -# turbulence index: 90-150 reasonable threshold -#TURBULENCE_THRESHOLD = 140 -REWARD_SCALING = 1e-4 - -class StockEnvTrade(gym.Env): - """A stock trading environment for OpenAI gym""" - metadata = {'render.modes': ['human']} - - def __init__(self, df,day = 0,turbulence_threshold=140 - ,initial=True, previous_state=[], model_name='', iteration=''): - #super(StockEnv, self).__init__() - #money = 10 , scope = 1 - self.day = day - self.df = df - self.initial = initial - self.previous_state = previous_state - # action_space normalization and shape is STOCK_DIM - self.action_space = spaces.Box(low = -1, high = 1,shape = (STOCK_DIM,)) - # Shape = 181: [Current Balance]+[prices 1-30]+[owned shares 1-30] - # +[macd 1-30]+ [rsi 1-30] + [cci 1-30] + [adx 1-30] - self.observation_space = spaces.Box(low=0, high=np.inf, shape = (181,)) - # load data from a pandas dataframe - self.data = self.df.loc[self.day,:] - self.terminal = False - self.turbulence_threshold = turbulence_threshold - # initalize state - self.state = [INITIAL_ACCOUNT_BALANCE] + \ - self.data.adjcp.values.tolist() + \ - [0]*STOCK_DIM + \ - self.data.macd.values.tolist() + \ - self.data.rsi.values.tolist() + \ - self.data.cci.values.tolist() + \ - self.data.adx.values.tolist() - # initialize reward - self.reward = 0 - self.turbulence = 0 - self.cost = 0 - self.trades = 0 - # memorize all the total balance change - self.asset_memory = [INITIAL_ACCOUNT_BALANCE] - self.rewards_memory = [] - #self.reset() - self._seed() - self.model_name=model_name - self.iteration=iteration - - - def _sell_stock(self, index, action): - # perform sell action based on the sign of the action - if self.turbulence 0: - #update balance - self.state[0] += \ - self.state[index+1]*min(abs(action),self.state[index+STOCK_DIM+1]) * \ - (1- TRANSACTION_FEE_PERCENT) - - self.state[index+STOCK_DIM+1] -= min(abs(action), self.state[index+STOCK_DIM+1]) - self.cost +=self.state[index+1]*min(abs(action),self.state[index+STOCK_DIM+1]) * \ - TRANSACTION_FEE_PERCENT - self.trades+=1 - else: - pass - else: - # if turbulence goes over threshold, just clear out all positions - if self.state[index+STOCK_DIM+1] > 0: - #update balance - self.state[0] += self.state[index+1]*self.state[index+STOCK_DIM+1]* \ - (1- TRANSACTION_FEE_PERCENT) - self.state[index+STOCK_DIM+1] =0 - self.cost += self.state[index+1]*self.state[index+STOCK_DIM+1]* \ - TRANSACTION_FEE_PERCENT - self.trades+=1 - else: - pass - - def _buy_stock(self, index, action): - # perform buy action based on the sign of the action - if self.turbulence< self.turbulence_threshold: - available_amount = self.state[0] // self.state[index+1] - # print('available_amount:{}'.format(available_amount)) - - #update balance - self.state[0] -= self.state[index+1]*min(available_amount, action)* \ - (1+ TRANSACTION_FEE_PERCENT) - - self.state[index+STOCK_DIM+1] += min(available_amount, action) - - self.cost+=self.state[index+1]*min(available_amount, action)* \ - TRANSACTION_FEE_PERCENT - self.trades+=1 - else: - # if turbulence goes over threshold, just stop buying - pass - - def step(self, actions): - # print(self.day) - self.terminal = self.day >= len(self.df.index.unique())-1 - # print(actions) - - if self.terminal: - plt.plot(self.asset_memory,'r') - plt.savefig('results/account_value_trade_{}_{}.png'.format(self.model_name, self.iteration)) - plt.close() - df_total_value = pd.DataFrame(self.asset_memory) - df_total_value.to_csv('results/account_value_trade_{}_{}.csv'.format(self.model_name, self.iteration)) - end_total_asset = self.state[0]+ \ - sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)])) - print("previous_total_asset:{}".format(self.asset_memory[0])) - - print("end_total_asset:{}".format(end_total_asset)) - print("total_reward:{}".format(self.state[0]+sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))- self.asset_memory[0] )) - print("total_cost: ", self.cost) - print("total trades: ", self.trades) - - df_total_value.columns = ['account_value'] - df_total_value['daily_return']=df_total_value.pct_change(1) - sharpe = (4**0.5)*df_total_value['daily_return'].mean()/ \ - df_total_value['daily_return'].std() - print("Sharpe: ",sharpe) - - df_rewards = pd.DataFrame(self.rewards_memory) - df_rewards.to_csv('results/account_rewards_trade_{}_{}.csv'.format(self.model_name, self.iteration)) - - # print('total asset: {}'.format(self.state[0]+ sum(np.array(self.state[1:29])*np.array(self.state[29:])))) - #with open('obs.pkl', 'wb') as f: - # pickle.dump(self.state, f) - - return self.state, self.reward, self.terminal,{} - - else: - # print(np.array(self.state[1:29])) - - actions = actions * HMAX_NORMALIZE - #actions = (actions.astype(int)) - if self.turbulence>=self.turbulence_threshold: - actions=np.array([-HMAX_NORMALIZE]*STOCK_DIM) - - begin_total_asset = self.state[0]+ \ - sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)])) - #print("begin_total_asset:{}".format(begin_total_asset)) - - argsort_actions = np.argsort(actions) - - sell_index = argsort_actions[:np.where(actions < 0)[0].shape[0]] - buy_index = argsort_actions[::-1][:np.where(actions > 0)[0].shape[0]] - - for index in sell_index: - # print('take sell action'.format(actions[index])) - self._sell_stock(index, actions[index]) - - for index in buy_index: - # print('take buy action: {}'.format(actions[index])) - self._buy_stock(index, actions[index]) - - self.day += 1 - self.data = self.df.loc[self.day,:] - self.turbulence = self.data['turbulence'].values[0] - #print(self.turbulence) - #load next state - # print("stock_shares:{}".format(self.state[29:])) - self.state = [self.state[0]] + \ - self.data.adjcp.values.tolist() + \ - list(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]) + \ - self.data.macd.values.tolist() + \ - self.data.rsi.values.tolist() + \ - self.data.cci.values.tolist() + \ - self.data.adx.values.tolist() - - end_total_asset = self.state[0]+ \ - sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)])) - self.asset_memory.append(end_total_asset) - #print("end_total_asset:{}".format(end_total_asset)) - - self.reward = end_total_asset - begin_total_asset - # print("step_reward:{}".format(self.reward)) - self.rewards_memory.append(self.reward) - - self.reward = self.reward*REWARD_SCALING - - - return self.state, self.reward, self.terminal, {} - - def reset(self): - if self.initial: - self.asset_memory = [INITIAL_ACCOUNT_BALANCE] - self.day = 0 - self.data = self.df.loc[self.day,:] - self.turbulence = 0 - self.cost = 0 - self.trades = 0 - self.terminal = False - #self.iteration=self.iteration - self.rewards_memory = [] - #initiate state - self.state = [INITIAL_ACCOUNT_BALANCE] + \ - self.data.adjcp.values.tolist() + \ - [0]*STOCK_DIM + \ - self.data.macd.values.tolist() + \ - self.data.rsi.values.tolist() + \ - self.data.cci.values.tolist() + \ - self.data.adx.values.tolist() - else: - previous_total_asset = self.previous_state[0]+ \ - sum(np.array(self.previous_state[1:(STOCK_DIM+1)])*np.array(self.previous_state[(STOCK_DIM+1):(STOCK_DIM*2+1)])) - self.asset_memory = [previous_total_asset] - #self.asset_memory = [self.previous_state[0]] - self.day = 0 - self.data = self.df.loc[self.day,:] - self.turbulence = 0 - self.cost = 0 - self.trades = 0 - self.terminal = False - #self.iteration=iteration - self.rewards_memory = [] - #initiate state - #self.previous_state[(STOCK_DIM+1):(STOCK_DIM*2+1)] - #[0]*STOCK_DIM + \ - - self.state = [ self.previous_state[0]] + \ - self.data.adjcp.values.tolist() + \ - self.previous_state[(STOCK_DIM+1):(STOCK_DIM*2+1)]+ \ - self.data.macd.values.tolist() + \ - self.data.rsi.values.tolist() + \ - self.data.cci.values.tolist() + \ - self.data.adx.values.tolist() - - return self.state - - def render(self, mode='human',close=False): - return self.state - - - def _seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] \ No newline at end of file diff --git a/env/EnvMultipleStock_train.py b/env/EnvMultipleStock_train.py deleted file mode 100644 index c4096e0d5..000000000 --- a/env/EnvMultipleStock_train.py +++ /dev/null @@ -1,197 +0,0 @@ -import numpy as np -import pandas as pd -from gym.utils import seeding -import gym -from gym import spaces -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import pickle - -# shares normalization factor -# 100 shares per trade -HMAX_NORMALIZE = 100 -# initial amount of money we have in our account -INITIAL_ACCOUNT_BALANCE=1000000 -# total number of stocks in our portfolio -STOCK_DIM = 30 -# transaction fee: 1/1000 reasonable percentage -TRANSACTION_FEE_PERCENT = 0.001 -REWARD_SCALING = 1e-4 - -class StockEnvTrain(gym.Env): - """A stock trading environment for OpenAI gym""" - metadata = {'render.modes': ['human']} - - def __init__(self, df,day = 0): - #super(StockEnv, self).__init__() - #money = 10 , scope = 1 - self.day = day - self.df = df - - # action_space normalization and shape is STOCK_DIM - self.action_space = spaces.Box(low = -1, high = 1,shape = (STOCK_DIM,)) - # Shape = 181: [Current Balance]+[prices 1-30]+[owned shares 1-30] - # +[macd 1-30]+ [rsi 1-30] + [cci 1-30] + [adx 1-30] - self.observation_space = spaces.Box(low=0, high=np.inf, shape = (181,)) - # load data from a pandas dataframe - self.data = self.df.loc[self.day,:] - self.terminal = False - # initalize state - self.state = [INITIAL_ACCOUNT_BALANCE] + \ - self.data.adjcp.values.tolist() + \ - [0]*STOCK_DIM + \ - self.data.macd.values.tolist() + \ - self.data.rsi.values.tolist() + \ - self.data.cci.values.tolist() + \ - self.data.adx.values.tolist() - # initialize reward - self.reward = 0 - self.cost = 0 - # memorize all the total balance change - self.asset_memory = [INITIAL_ACCOUNT_BALANCE] - self.rewards_memory = [] - self.trades = 0 - #self.reset() - self._seed() - - - def _sell_stock(self, index, action): - # perform sell action based on the sign of the action - if self.state[index+STOCK_DIM+1] > 0: - #update balance - self.state[0] += \ - self.state[index+1]*min(abs(action),self.state[index+STOCK_DIM+1]) * \ - (1- TRANSACTION_FEE_PERCENT) - - self.state[index+STOCK_DIM+1] -= min(abs(action), self.state[index+STOCK_DIM+1]) - self.cost +=self.state[index+1]*min(abs(action),self.state[index+STOCK_DIM+1]) * \ - TRANSACTION_FEE_PERCENT - self.trades+=1 - else: - pass - - - def _buy_stock(self, index, action): - # perform buy action based on the sign of the action - available_amount = self.state[0] // self.state[index+1] - # print('available_amount:{}'.format(available_amount)) - - #update balance - self.state[0] -= self.state[index+1]*min(available_amount, action)* \ - (1+ TRANSACTION_FEE_PERCENT) - - self.state[index+STOCK_DIM+1] += min(available_amount, action) - - self.cost+=self.state[index+1]*min(available_amount, action)* \ - TRANSACTION_FEE_PERCENT - self.trades+=1 - - def step(self, actions): - # print(self.day) - self.terminal = self.day >= len(self.df.index.unique())-1 - # print(actions) - - if self.terminal: - plt.plot(self.asset_memory,'r') - plt.savefig('results/account_value_train.png') - plt.close() - end_total_asset = self.state[0]+ \ - sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)])) - - #print("end_total_asset:{}".format(end_total_asset)) - df_total_value = pd.DataFrame(self.asset_memory) - df_total_value.to_csv('results/account_value_train.csv') - #print("total_reward:{}".format(self.state[0]+sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):61]))- INITIAL_ACCOUNT_BALANCE )) - #print("total_cost: ", self.cost) - #print("total_trades: ", self.trades) - df_total_value.columns = ['account_value'] - df_total_value['daily_return']=df_total_value.pct_change(1) - sharpe = (252**0.5)*df_total_value['daily_return'].mean()/ \ - df_total_value['daily_return'].std() - #print("Sharpe: ",sharpe) - #print("=================================") - df_rewards = pd.DataFrame(self.rewards_memory) - #df_rewards.to_csv('results/account_rewards_train.csv') - - # print('total asset: {}'.format(self.state[0]+ sum(np.array(self.state[1:29])*np.array(self.state[29:])))) - #with open('obs.pkl', 'wb') as f: - # pickle.dump(self.state, f) - - return self.state, self.reward, self.terminal,{} - - else: - # print(np.array(self.state[1:29])) - - actions = actions * HMAX_NORMALIZE - #actions = (actions.astype(int)) - - begin_total_asset = self.state[0]+ \ - sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)])) - #print("begin_total_asset:{}".format(begin_total_asset)) - - argsort_actions = np.argsort(actions) - - sell_index = argsort_actions[:np.where(actions < 0)[0].shape[0]] - buy_index = argsort_actions[::-1][:np.where(actions > 0)[0].shape[0]] - - for index in sell_index: - # print('take sell action'.format(actions[index])) - self._sell_stock(index, actions[index]) - - for index in buy_index: - # print('take buy action: {}'.format(actions[index])) - self._buy_stock(index, actions[index]) - - self.day += 1 - self.data = self.df.loc[self.day,:] - #load next state - # print("stock_shares:{}".format(self.state[29:])) - self.state = [self.state[0]] + \ - self.data.adjcp.values.tolist() + \ - list(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]) + \ - self.data.macd.values.tolist() + \ - self.data.rsi.values.tolist() + \ - self.data.cci.values.tolist() + \ - self.data.adx.values.tolist() - - end_total_asset = self.state[0]+ \ - sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)])) - self.asset_memory.append(end_total_asset) - #print("end_total_asset:{}".format(end_total_asset)) - - self.reward = end_total_asset - begin_total_asset - # print("step_reward:{}".format(self.reward)) - self.rewards_memory.append(self.reward) - - self.reward = self.reward*REWARD_SCALING - - - - return self.state, self.reward, self.terminal, {} - - def reset(self): - self.asset_memory = [INITIAL_ACCOUNT_BALANCE] - self.day = 0 - self.data = self.df.loc[self.day,:] - self.cost = 0 - self.trades = 0 - self.terminal = False - self.rewards_memory = [] - #initiate state - self.state = [INITIAL_ACCOUNT_BALANCE] + \ - self.data.adjcp.values.tolist() + \ - [0]*STOCK_DIM + \ - self.data.macd.values.tolist() + \ - self.data.rsi.values.tolist() + \ - self.data.cci.values.tolist() + \ - self.data.adx.values.tolist() - # iteration += 1 - return self.state - - def render(self, mode='human'): - return self.state - - def _seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] \ No newline at end of file diff --git a/env/EnvMultipleStock_validation.py b/env/EnvMultipleStock_validation.py deleted file mode 100644 index 390d46310..000000000 --- a/env/EnvMultipleStock_validation.py +++ /dev/null @@ -1,225 +0,0 @@ -import numpy as np -import pandas as pd -from gym.utils import seeding -import gym -from gym import spaces -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import pickle - -# shares normalization factor -# 100 shares per trade -HMAX_NORMALIZE = 100 -# initial amount of money we have in our account -INITIAL_ACCOUNT_BALANCE=1000000 -# total number of stocks in our portfolio -STOCK_DIM = 30 -# transaction fee: 1/1000 reasonable percentage -TRANSACTION_FEE_PERCENT = 0.001 - -# turbulence index: 90-150 reasonable threshold -#TURBULENCE_THRESHOLD = 140 -REWARD_SCALING = 1e-4 - -class StockEnvValidation(gym.Env): - """A stock trading environment for OpenAI gym""" - metadata = {'render.modes': ['human']} - - def __init__(self, df, day = 0, turbulence_threshold=140, iteration=''): - #super(StockEnv, self).__init__() - #money = 10 , scope = 1 - self.day = day - self.df = df - # action_space normalization and shape is STOCK_DIM - self.action_space = spaces.Box(low = -1, high = 1,shape = (STOCK_DIM,)) - # Shape = 181: [Current Balance]+[prices 1-30]+[owned shares 1-30] - # +[macd 1-30]+ [rsi 1-30] + [cci 1-30] + [adx 1-30] - self.observation_space = spaces.Box(low=0, high=np.inf, shape = (181,)) - # load data from a pandas dataframe - self.data = self.df.loc[self.day,:] - self.terminal = False - self.turbulence_threshold = turbulence_threshold - # initalize state - self.state = [INITIAL_ACCOUNT_BALANCE] + \ - self.data.adjcp.values.tolist() + \ - [0]*STOCK_DIM + \ - self.data.macd.values.tolist() + \ - self.data.rsi.values.tolist() + \ - self.data.cci.values.tolist() + \ - self.data.adx.values.tolist() - # initialize reward - self.reward = 0 - self.turbulence = 0 - self.cost = 0 - self.trades = 0 - # memorize all the total balance change - self.asset_memory = [INITIAL_ACCOUNT_BALANCE] - self.rewards_memory = [] - #self.reset() - self._seed() - - self.iteration=iteration - - - def _sell_stock(self, index, action): - # perform sell action based on the sign of the action - if self.turbulence 0: - #update balance - self.state[0] += \ - self.state[index+1]*min(abs(action),self.state[index+STOCK_DIM+1]) * \ - (1- TRANSACTION_FEE_PERCENT) - - self.state[index+STOCK_DIM+1] -= min(abs(action), self.state[index+STOCK_DIM+1]) - self.cost +=self.state[index+1]*min(abs(action),self.state[index+STOCK_DIM+1]) * \ - TRANSACTION_FEE_PERCENT - self.trades+=1 - else: - pass - else: - # if turbulence goes over threshold, just clear out all positions - if self.state[index+STOCK_DIM+1] > 0: - #update balance - self.state[0] += self.state[index+1]*self.state[index+STOCK_DIM+1]* \ - (1- TRANSACTION_FEE_PERCENT) - self.state[index+STOCK_DIM+1] =0 - self.cost += self.state[index+1]*self.state[index+STOCK_DIM+1]* \ - TRANSACTION_FEE_PERCENT - self.trades+=1 - else: - pass - - def _buy_stock(self, index, action): - # perform buy action based on the sign of the action - if self.turbulence< self.turbulence_threshold: - available_amount = self.state[0] // self.state[index+1] - # print('available_amount:{}'.format(available_amount)) - - #update balance - self.state[0] -= self.state[index+1]*min(available_amount, action)* \ - (1+ TRANSACTION_FEE_PERCENT) - - self.state[index+STOCK_DIM+1] += min(available_amount, action) - - self.cost+=self.state[index+1]*min(available_amount, action)* \ - TRANSACTION_FEE_PERCENT - self.trades+=1 - else: - # if turbulence goes over threshold, just stop buying - pass - - def step(self, actions): - # print(self.day) - self.terminal = self.day >= len(self.df.index.unique())-1 - # print(actions) - - if self.terminal: - plt.plot(self.asset_memory,'r') - plt.savefig('results/account_value_validation_{}.png'.format(self.iteration)) - plt.close() - df_total_value = pd.DataFrame(self.asset_memory) - df_total_value.to_csv('results/account_value_validation_{}.csv'.format(self.iteration)) - end_total_asset = self.state[0]+ \ - sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)])) - #print("previous_total_asset:{}".format(self.asset_memory[0])) - - #print("end_total_asset:{}".format(end_total_asset)) - #print("total_reward:{}".format(self.state[0]+sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):61]))- self.asset_memory[0] )) - #print("total_cost: ", self.cost) - #print("total trades: ", self.trades) - - df_total_value.columns = ['account_value'] - df_total_value['daily_return']=df_total_value.pct_change(1) - sharpe = (4**0.5)*df_total_value['daily_return'].mean()/ \ - df_total_value['daily_return'].std() - #print("Sharpe: ",sharpe) - - #df_rewards = pd.DataFrame(self.rewards_memory) - #df_rewards.to_csv('results/account_rewards_trade_{}.csv'.format(self.iteration)) - - # print('total asset: {}'.format(self.state[0]+ sum(np.array(self.state[1:29])*np.array(self.state[29:])))) - #with open('obs.pkl', 'wb') as f: - # pickle.dump(self.state, f) - - return self.state, self.reward, self.terminal,{} - - else: - # print(np.array(self.state[1:29])) - - actions = actions * HMAX_NORMALIZE - #actions = (actions.astype(int)) - if self.turbulence>=self.turbulence_threshold: - actions=np.array([-HMAX_NORMALIZE]*STOCK_DIM) - begin_total_asset = self.state[0]+ \ - sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)])) - #print("begin_total_asset:{}".format(begin_total_asset)) - - argsort_actions = np.argsort(actions) - - sell_index = argsort_actions[:np.where(actions < 0)[0].shape[0]] - buy_index = argsort_actions[::-1][:np.where(actions > 0)[0].shape[0]] - - for index in sell_index: - # print('take sell action'.format(actions[index])) - self._sell_stock(index, actions[index]) - - for index in buy_index: - # print('take buy action: {}'.format(actions[index])) - self._buy_stock(index, actions[index]) - - self.day += 1 - self.data = self.df.loc[self.day,:] - self.turbulence = self.data['turbulence'].values[0] - #print(self.turbulence) - #load next state - # print("stock_shares:{}".format(self.state[29:])) - self.state = [self.state[0]] + \ - self.data.adjcp.values.tolist() + \ - list(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]) + \ - self.data.macd.values.tolist() + \ - self.data.rsi.values.tolist() + \ - self.data.cci.values.tolist() + \ - self.data.adx.values.tolist() - - end_total_asset = self.state[0]+ \ - sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)])) - self.asset_memory.append(end_total_asset) - #print("end_total_asset:{}".format(end_total_asset)) - - self.reward = end_total_asset - begin_total_asset - # print("step_reward:{}".format(self.reward)) - self.rewards_memory.append(self.reward) - - self.reward = self.reward*REWARD_SCALING - - return self.state, self.reward, self.terminal, {} - - def reset(self): - self.asset_memory = [INITIAL_ACCOUNT_BALANCE] - self.day = 0 - self.data = self.df.loc[self.day,:] - self.turbulence = 0 - self.cost = 0 - self.trades = 0 - self.terminal = False - #self.iteration=self.iteration - self.rewards_memory = [] - #initiate state - self.state = [INITIAL_ACCOUNT_BALANCE] + \ - self.data.adjcp.values.tolist() + \ - [0]*STOCK_DIM + \ - self.data.macd.values.tolist() + \ - self.data.rsi.values.tolist() + \ - self.data.cci.values.tolist() + \ - self.data.adx.values.tolist() - - return self.state - - def render(self, mode='human',close=False): - return self.state - - - def _seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] \ No newline at end of file diff --git a/env/env_onlinestocktrading.py b/env/env_onlinestocktrading.py new file mode 100644 index 000000000..2719be56b --- /dev/null +++ b/env/env_onlinestocktrading.py @@ -0,0 +1,66 @@ +import numpy as np +import pandas as pd +from gym.utils import seeding +import gym +from gym import spaces +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import pickle +from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3.common import logger +from env_stocks import StockEnv + +class OnlineStockTradingEnv(StockEnv): + """A stock trading environment for OpenAI gym""" + metadata = {'render.modes': ['human']} + + def __init__(self, + initial_data, + stock_dim, + hmax, + initial_amount, + buy_cost_pct, + sell_cost_pct, + reward_scaling, + state_space, + action_space, + tech_indicator_list, + turbulence_threshold=None, + make_plots = False, + print_verbosity = 10, + day = 0, + initial=True, + previous_state=[], + model_name = '', + mode='', + iteration=''): + + super().__init__(initial_data,stock_dim,hmax,initial_amount, + buy_cost_pct, + sell_cost_pct, + reward_scaling, + state_space, + action_space, + tech_indicator_list, + turbulence_threshold=None, + make_plots = False, + print_verbosity = 10, + day = 0, + initial=True, + previous_state=[], + model_name = '', + mode='', + iteration='') + + self.data_history = initial_data + + + + + def _update_data(self,new_df): + self.data = new_df + self.data_history = self.data_history.append(new_df) + + + diff --git a/env/env_stocks.py b/env/env_stocks.py new file mode 100644 index 000000000..6b4fc28fc --- /dev/null +++ b/env/env_stocks.py @@ -0,0 +1,388 @@ +import numpy as np +import pandas as pd +from gym.utils import seeding +import gym +from gym import spaces +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import pickle +from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3.common import logger + + +class StockEnv(gym.Env): + """A stock trading environment for OpenAI gym""" + metadata = {'render.modes': ['human']} + + def __init__(self, + initial_data, + stock_dim, + hmax, + initial_amount, + buy_cost_pct, + sell_cost_pct, + reward_scaling, + state_space, + action_space, + tech_indicator_list, + turbulence_threshold=None, + make_plots = False, + print_verbosity = 10, + day = 0, + initial=True, + previous_state=[], + model_name = '', + mode='', + iteration=''): + + # How many days trading for + self.day = day + self.initial_data = initial_data + self.data = initial_data + # Number of stocks you are considering + self.stock_dim = stock_dim + # Max number of a single stock you can trade + self.hmax = hmax + # Initial amount to invest + self.initial_amount = initial_amount + # Transaction costs: Can set these based on real life + self.buy_cost_pct = buy_cost_pct + self.sell_cost_pct = sell_cost_pct + # Something having to do with training + self.reward_scaling = reward_scaling + # The dimension of the state space: 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension + self.state_space = state_space + # Actions you can take depends on number of stocks and how many shares you can buy + self.action_space = action_space + self.tech_indicator_list = tech_indicator_list + self.action_space = spaces.Box(low = -1, high = 1,shape = (self.action_space,)) + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape = (self.state_space,)) + + self.terminal = False + self.make_plots = make_plots + self.print_verbosity = print_verbosity + self.turbulence_threshold = turbulence_threshold + self.initial = initial + self.previous_state = previous_state + self.model_name=model_name + self.mode=mode + self.iteration=iteration + # initalize state + self.state = self._initiate_state() + + # initialize reward + self.reward = 0 + self.turbulence = 0 + self.cost = 0 + self.trades = 0 + self.episode = 0 + # memorize all the total balance change + self.asset_memory = [self.initial_amount] + self.rewards_memory = [] + self.actions_memory=[] + self.date_memory=[self._get_date()] + + self._seed() + + def _sell_stock(self, index, action): + def _do_sell_normal(): + if self.state[index+1]>0: + # Sell only if the price is > 0 (no missing data in this particular date) + # perform sell action based on the sign of the action + if self.state[index+self.stock_dim+1] > 0: + # Sell only if current asset is > 0 + sell_num_shares = min(abs(action),self.state[index+self.stock_dim+1]) + sell_amount = self.state[index+1] * sell_num_shares * (1- self.sell_cost_pct) + #update balance + self.state[0] += sell_amount + + self.state[index+self.stock_dim+1] -= sell_num_shares + self.cost +=self.state[index+1] * sell_num_shares * self.sell_cost_pct + self.trades+=1 + else: + sell_num_shares = 0 + else: + sell_num_shares = 0 + + return sell_num_shares + + # perform sell action based on the sign of the action + if self.turbulence_threshold is not None: + if self.turbulence>=self.turbulence_threshold: + if self.state[index+1]>0: + # Sell only if the price is > 0 (no missing data in this particular date) + # if turbulence goes over threshold, just clear out all positions + if self.state[index+self.stock_dim+1] > 0: + # Sell only if current asset is > 0 + sell_num_shares = self.state[index+self.stock_dim+1] + sell_amount = self.state[index+1]*sell_num_shares* (1- self.sell_cost_pct) + #update balance + self.state[0] += sell_amount + self.state[index+self.stock_dim+1] =0 + self.cost += self.state[index+1]*self.state[index+self.stock_dim+1]* \ + self.sell_cost_pct + self.trades+=1 + else: + sell_num_shares = 0 + else: + sell_num_shares = 0 + else: + sell_num_shares = _do_sell_normal() + else: + sell_num_shares = _do_sell_normal() + + return sell_num_shares + + def _buy_stock(self, index, action): + + def _do_buy(): + if self.state[index+1]>0: + #Buy only if the price is > 0 (no missing data in this particular date) + available_amount = self.state[0] // self.state[index+1] + # print('available_amount:{}'.format(available_amount)) + + #update balance + buy_num_shares = min(available_amount, action) + buy_amount = self.state[index+1] * buy_num_shares * (1+ self.buy_cost_pct) + self.state[0] -= buy_amount + + self.state[index+self.stock_dim+1] += buy_num_shares + + self.cost+=self.state[index+1] * buy_num_shares * self.buy_cost_pct + self.trades+=1 + else: + buy_num_shares = 0 + + return buy_num_shares + + # perform buy action based on the sign of the action + if self.turbulence_threshold is None: + buy_num_shares = _do_buy() + else: + if self.turbulence< self.turbulence_threshold: + buy_num_shares = _do_buy() + else: + buy_num_shares = 0 + pass + + return buy_num_shares + + def _make_plot(self): + plt.plot(self.asset_memory,'r') + plt.savefig('results/account_value_trade_{}.png'.format(self.episode)) + plt.close() + + def step(self, actions): + + if self.terminal: + # print(f"Episode: {self.episode}") + if self.make_plots: + self._make_plot() + end_total_asset = self.state[0]+ \ + sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)])) + df_total_value = pd.DataFrame(self.asset_memory) + tot_reward = self.state[0]+sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]))- self.initial_amount + df_total_value.columns = ['account_value'] + df_total_value['date'] = self.date_memory + df_total_value['daily_return']=df_total_value['account_value'].pct_change(1) + if df_total_value['daily_return'].std() !=0: + sharpe = (252**0.5)*df_total_value['daily_return'].mean()/ \ + df_total_value['daily_return'].std() + df_rewards = pd.DataFrame(self.rewards_memory) + df_rewards.columns = ['account_rewards'] + df_rewards['date'] = self.date_memory[:-1] + if self.episode % self.print_verbosity == 0: + print(f"day: {self.day}, episode: {self.episode}") + print(f"begin_total_asset: {self.asset_memory[0]:0.2f}") + print(f"end_total_asset: {end_total_asset:0.2f}") + print(f"total_reward: {tot_reward:0.2f}") + print(f"total_cost: {self.cost:0.2f}") + print(f"total_trades: {self.trades}") + if df_total_value['daily_return'].std() != 0: + print(f"Sharpe: {sharpe:0.3f}") + print("=================================") + + if (self.model_name!='') and (self.mode!=''): + df_actions = self.save_action_memory() + df_actions.to_csv('results/actions_{}_{}_{}.csv'.format(self.mode,self.model_name, self.iteration)) + df_total_value.to_csv('results/account_value_{}_{}_{}.csv'.format(self.mode,self.model_name, self.iteration),index=False) + df_rewards.to_csv('results/account_rewards_{}_{}_{}.csv'.format(self.mode,self.model_name, self.iteration),index=False) + plt.plot(self.asset_memory,'r') + plt.savefig('results/account_value_{}_{}_{}.png'.format(self.mode,self.model_name, self.iteration),index=False) + plt.close() + + # Add outputs to logger interface + logger.record("environment/portfolio_value", end_total_asset) + logger.record("environment/total_reward", tot_reward) + logger.record("environment/total_reward_pct", (tot_reward / (end_total_asset - tot_reward)) * 100) + logger.record("environment/total_cost", self.cost) + logger.record("environment/total_trades", self.trades) + + return self.state, self.reward, self.terminal, {} + + else: + + actions = actions * self.hmax #actions initially is scaled between 0 to 1 + actions = (actions.astype(int)) #convert into integer because we can't by fraction of shares + if self.turbulence_threshold is not None: + if self.turbulence>=self.turbulence_threshold: + actions=np.array([-self.hmax]*self.stock_dim) + begin_total_asset = self.state[0]+ \ + sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)])) + #print("begin_total_asset:{}".format(begin_total_asset)) + + argsort_actions = np.argsort(actions) + + sell_index = argsort_actions[:np.where(actions < 0)[0].shape[0]] + buy_index = argsort_actions[::-1][:np.where(actions > 0)[0].shape[0]] + + for index in sell_index: + # print(f"Num shares before: {self.state[index+self.stock_dim+1]}") + # print(f'take sell action before : {actions[index]}') + actions[index] = self._sell_stock(index, actions[index]) * (-1) + # print(f'take sell action after : {actions[index]}') + # print(f"Num shares after: {self.state[index+self.stock_dim+1]}") + + for index in buy_index: + # print('take buy action: {}'.format(actions[index])) + actions[index] = self._buy_stock(index, actions[index]) + + self.actions_memory.append(actions) + + self.day += 1 + + if self.turbulence_threshold is not None: + self.turbulence = self.data['turbulence'].values[0] + self.state = self._update_state() + + end_total_asset = self.state[0]+ \ + sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)])) + self.asset_memory.append(end_total_asset) + self.date_memory.append(self._get_date()) + self.reward = end_total_asset - begin_total_asset + self.rewards_memory.append(self.reward) + self.reward = self.reward*self.reward_scaling + + return self.state, self.reward, self.terminal, {} + + + def reset(self): + #initiate state + self.state = self._initiate_state() + + if self.initial: + self.asset_memory = [self.initial_amount] + else: + previous_total_asset = self.previous_state[0]+ \ + sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.previous_state[(self.stock_dim+1):(self.stock_dim*2+1)])) + self.asset_memory = [previous_total_asset] + + self.day = 0 + self.data = self.initial_data + self.turbulence = 0 + self.cost = 0 + self.trades = 0 + self.terminal = False + # self.iteration=self.iteration + self.rewards_memory = [] + self.actions_memory=[] + self.date_memory=[self._get_date()] + + self.episode+=1 + + return self.state + + def render(self, mode='human',close=False): + return self.state + + def _initiate_state(self): + if self.initial: + # For Initial State + if self.stock_dim>1: + # for multiple stock + state = [self.initial_amount] + \ + self.data.close.values.tolist() + \ + [0]*self.stock_dim + \ + sum([self.data[tech].values.tolist() for tech in self.tech_indicator_list ], []) + else: + # for single stock + state = [self.initial_amount] + \ + [self.data.close] + \ + [0]*self.stock_dim + \ + sum([[self.data[tech]] for tech in self.tech_indicator_list ], []) + else: + #Using Previous State + if self.stock_dim>1: + # for multiple stock + state = [self.previous_state[0]] + \ + self.data.close.values.tolist() + \ + self.previous_state[(self.stock_dim+1):(self.stock_dim*2+1)] + \ + sum([self.data[tech].values.tolist() for tech in self.tech_indicator_list ], []) + else: + # for single stock + state = [self.previous_state[0]] + \ + [self.data.close] + \ + self.previous_state[(self.stock_dim+1):(self.stock_dim*2+1)] + \ + sum([[self.data[tech]] for tech in self.tech_indicator_list ], []) + return state + + def _update_state(self): + if self.stock_dim>1: + # for multiple stock + state = [self.state[0]] + \ + self.data.close.values.tolist() + \ + list(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]) + \ + sum([self.data[tech].values.tolist() for tech in self.tech_indicator_list ], []) + + else: + # for single stock + state = [self.state[0]] + \ + [self.data.close] + \ + list(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]) + \ + sum([[self.data[tech]] for tech in self.tech_indicator_list ], []) + + return state + + def _get_date(self): + if self.stock_dim>1: + date = self.data.date.unique()[0] + else: + date = self.data.date + return date + + def save_asset_memory(self): + date_list = self.date_memory + asset_list = self.asset_memory + #print(len(date_list)) + #print(len(asset_list)) + df_account_value = pd.DataFrame({'date':date_list,'account_value':asset_list}) + return df_account_value + + def save_action_memory(self): + if self.stock_dim>1: + # date and close price length must match actions length + date_list = self.date_memory[:-1] + df_date = pd.DataFrame(date_list) + df_date.columns = ['date'] + + action_list = self.actions_memory + df_actions = pd.DataFrame(action_list) + df_actions.columns = self.data.tic.values + df_actions.index = df_date.date + #df_actions = pd.DataFrame({'date':date_list,'actions':action_list}) + else: + date_list = self.date_memory[:-1] + action_list = self.actions_memory + df_actions = pd.DataFrame({'date':date_list,'actions':action_list}) + return df_actions + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + + def get_sb_env(self): + e = DummyVecEnv([lambda: self]) + obs = e.reset() + return e, obs \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 000000000..a5e5165fc --- /dev/null +++ b/environment.yml @@ -0,0 +1,107 @@ +name: bdrl +channels: + - conda-forge + - defaults +dependencies: + - appnope=0.1.2=py38h50d1736_1 + - backcall=0.2.0=pyh9f0ad1d_0 + - backports=1.0=py_2 + - backports.functools_lru_cache=1.6.3=pyhd8ed1ab_0 + - ca-certificates=2020.12.5=h033912b_0 + - certifi=2020.12.5=py38h50d1736_1 + - decorator=4.4.2=py_0 + - ipykernel=5.5.3=py38h6c79ece_0 + - ipython=7.22.0=py38h6c79ece_0 + - ipython_genutils=0.2.0=py_1 + - jedi=0.18.0=py38h50d1736_2 + - jupyter_client=6.1.12=pyhd8ed1ab_0 + - jupyter_core=4.7.1=py38h50d1736_0 + - libcxx=11.1.0=habf9029_0 + - libffi=3.3=h046ec9c_2 + - libsodium=1.0.18=hbcb3906_1 + - ncurses=6.2=h2e338ed_4 + - openssl=1.1.1k=h0d85af4_0 + - parso=0.8.2=pyhd8ed1ab_0 + - pexpect=4.8.0=pyh9f0ad1d_2 + - pickleshare=0.7.5=py_1003 + - pip=21.0.1=pyhd8ed1ab_0 + - prompt-toolkit=3.0.18=pyha770c72_0 + - ptyprocess=0.7.0=pyhd3deb0d_0 + - pygments=2.8.1=pyhd8ed1ab_0 + - python=3.8.8=h4e93d89_0_cpython + - python-dateutil=2.8.1=py_0 + - python_abi=3.8=1_cp38 + - pyzmq=22.0.3=py38hd3b92b6_1 + - readline=8.1=h05e3726_0 + - setuptools=49.6.0=py38h50d1736_3 + - six=1.15.0=pyh9f0ad1d_0 + - sqlite=3.35.3=h44b9ce1_0 + - tk=8.6.10=h0419947_1 + - tornado=6.1=py38h5406a74_1 + - traitlets=5.0.5=py_0 + - wcwidth=0.2.5=pyh9f0ad1d_2 + - wheel=0.36.2=pyhd3deb0d_0 + - xz=5.2.5=haf1e3a3_1 + - zeromq=4.3.4=h1c7c35f_0 + - zlib=1.2.11=h7795811_1010 + - pip: + - absl-py==0.12.0 + - atari-py==0.2.6 + - attrs==20.3.0 + - cachetools==4.2.1 + - chardet==4.0.0 + - cloudpickle==1.6.0 + - cycler==0.10.0 + - empyrical==0.5.5 + - finrl==0.3.0 + - future==0.18.2 + - google-auth==1.28.0 + - google-auth-oauthlib==0.4.4 + - grpcio==1.36.1 + - gym==0.18.0 + - idna==2.10 + - iniconfig==1.1.1 + - int-date==0.1.8 + - ipython-genutils==0.2.0 + - joblib==1.0.1 + - kiwisolver==1.3.1 + - lxml==4.6.3 + - markdown==3.3.4 + - matplotlib==3.4.1 + - multitasking==0.0.9 + - numpy==1.20.2 + - oauthlib==3.1.0 + - opencv-python==4.5.1.48 + - packaging==20.9 + - pandas==1.2.3 + - pandas-datareader==0.9.0 + - pillow==7.2.0 + - pluggy==0.13.1 + - protobuf==3.15.6 + - psutil==5.8.0 + - py==1.10.0 + - pyasn1==0.4.8 + - pyasn1-modules==0.2.8 + - pyfolio==0.9.2 + - pyglet==1.5.0 + - pyparsing==2.4.7 + - pytest==6.2.2 + - pytz==2021.1 + - requests==2.25.1 + - requests-oauthlib==1.3.0 + - rsa==4.7.2 + - scikit-learn==0.24.1 + - scipy==1.6.2 + - seaborn==0.11.1 + - stable-baselines3==1.0 + - stockstats==0.3.2 + - tensorboard==2.4.1 + - tensorboard-plugin-wit==1.8.0 + - threadpoolctl==2.1.0 + - toml==0.10.2 + - torch==1.8.1 + - typing-extensions==3.7.4.3 + - urllib3==1.26.4 + - werkzeug==1.0.1 + - yfinance==0.1.59 +prefix: /Users/rickgentry/opt/anaconda3/envs/bdrl diff --git a/model/online_stock_prediction.py b/model/online_stock_prediction.py new file mode 100644 index 000000000..a04002ace --- /dev/null +++ b/model/online_stock_prediction.py @@ -0,0 +1,110 @@ +import sys +import os +import pandas as pd +import numpy as np +import datetime +from finrl.config import config +from finrl.marketdata.yahoodownloader import YahooDownloader +from finrl.preprocessing.preprocessors import FeatureEngineer +from finrl.preprocessing.data import data_split +from finrl.env.env_stocktrading import StockTradingEnv + +from finrl.model.models import DRLAgent +from finrl.trade.backtest import backtest_stats, backtest_plot, get_daily_return, get_baseline + +sys.path.append(os.path.join(os.path.dirname(__file__),"..","env")) +from env_stocks import StockEnv +from env_onlinestocktrading import OnlineStockTradingEnv + +class OnlineStockPrediction: + + def __init__(self, e_trade_gym, model): + self.e_trade_gym = e_trade_gym + self.env_trade, self.cur_obs = self.e_trade_gym.get_sb_env() + self.model = model + + + def add_data(self,df): + self.e_trade_gym._update_data(df) + + + + def predict(self): + #print("CURRENT OBSERVATION:" , self.cur_obs) + action, states = self.model.predict(self.cur_obs) + next_obs, rewards, done, info = self.e_trade_gym.step(action.squeeze()) + #print("NEXT OBSERVATION:", next_obs) + self.cur_obs = next_obs + return action,states, next_obs, rewards + + def run(self): + pass + + + + +def generate_sentiment_scores(start_date,end_date,tickers=config.DOW_30_TICKER,time_fmt="%Y-%m-%d"): + dates = pd.date_range(start_date,end_date).to_pydatetime() + dates = np.array([datetime.datetime.strftime(r,time_fmt) for r in dates]) + data = np.array(np.meshgrid(dates,tickers)).T.reshape(-1,2) + scores = np.random.uniform(low=-1.0,high=1.0,size=(len(data),1)) + df = pd.DataFrame(data,columns=['date','tic']) + df['sentiment'] = scores + return df + +def get_initial_data(numerical_df,sentiment_df,use_turbulence=False): + fe = FeatureEngineer(use_turbulence=use_turbulence) + numerical_df = fe.preprocess_data(numerical_df) + df = numerical_df.merge(sentiment_df,on=["date","tic"],how="left") + df.fillna(0) + return df + +def main(): + start_date = '2020-01-01' + trade_start_date='2020-12-01' + end_date='2021-01-01' + ticker_list=config.DOW_30_TICKER + numerical_df = YahooDownloader(start_date=start_date,end_date=end_date,ticker_list=ticker_list).fetch_data() + sentiment_df = generate_sentiment_scores(start_date,end_date) + initial_data = get_initial_data(numerical_df,sentiment_df) + train_data = data_split(initial_data,start_date,trade_start_date) + trade_data = data_split(initial_data,trade_start_date,end_date) + indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment'] + stock_dimension = len(trade_data.tic.unique()) + state_space = 1 + 2*stock_dimension + len(indicator_list)*stock_dimension + env_kwargs = { + "hmax": 100, + "initial_amount": 1000000, + "buy_cost_pct": 0.001, + "sell_cost_pct": 0.001, + "state_space": state_space, + "stock_dim": stock_dimension, + "tech_indicator_list": indicator_list, + "action_space": stock_dimension, + "reward_scaling": 1e-4, + "print_verbosity":5 + } + e_train_gym = StockTradingEnv(df = train_data, **env_kwargs) + env_train, _ = e_train_gym.get_sb_env() + # print(train_data.index) + # print(trade_data.index) + # print(trade_data.loc[0]) + e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs) + training_agent = DRLAgent(env=env_train) + model_a2c = training_agent.get_model("a2c") + # print(train_data.index) + # print(trade_data.index) + #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000) + feature_engineer = FeatureEngineer() + online_stock_pred = OnlineStockPrediction(e_trade_gym,model_a2c) + for i in range(1,trade_data.index.unique().max()): + print(trade_data.loc[i]) + online_stock_pred.add_data(trade_data.loc[i]) + action,states, next_obs, rewards = online_stock_pred.predict() + print("Action:" ,action) + print("States: ", states) + print("Next observation: ", next_obs) + print("Rewards: ", rewards) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/notebooks/run_pipeline.ipynb b/notebooks/run_pipeline.ipynb new file mode 100644 index 000000000..75f13b2db --- /dev/null +++ b/notebooks/run_pipeline.ipynb @@ -0,0 +1,1120 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd0884827d7ddaa858276f89104a03cd002b38877c13fae0f667c5d4e67e7e2a66a", + "display_name": "Python 3.8.8 64-bit ('bdrl': conda)" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Users/rickgentry/opt/anaconda3/envs/bdrl/lib/python3.8/site-packages/pyfolio/pos.py:26: UserWarning: Module \"zipline.assets\" not found; multipliers will not be applied to position notionals.\n warnings.warn(\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "from sklearn import preprocessing\n", + "\n", + "matplotlib.use(\"Agg\")\n", + "import datetime\n", + "\n", + "from finrl.config import config\n", + "from finrl.marketdata.yahoodownloader import YahooDownloader\n", + "from finrl.preprocessing.preprocessors import FeatureEngineer\n", + "from finrl.preprocessing.data import data_split\n", + "from finrl.env.env_stocktrading import StockTradingEnv\n", + "from finrl.model.models import DRLAgent\n", + "from finrl.trade.backtest import backtest_stats, backtest_plot, get_daily_return, get_baseline\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['AAPL',\n", + " 'MSFT',\n", + " 'JPM',\n", + " 'V',\n", + " 'RTX',\n", + " 'PG',\n", + " 'GS',\n", + " 'NKE',\n", + " 'DIS',\n", + " 'AXP',\n", + " 'HD',\n", + " 'INTC',\n", + " 'WMT',\n", + " 'IBM',\n", + " 'MRK',\n", + " 'UNH',\n", + " 'KO',\n", + " 'CAT',\n", + " 'TRV',\n", + " 'JNJ',\n", + " 'CVX',\n", + " 'MCD',\n", + " 'VZ',\n", + " 'CSCO',\n", + " 'XOM',\n", + " 'BA',\n", + " 'MMM',\n", + " 'PFE',\n", + " 'WBA',\n", + " 'DD']" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "config.DOW_30_TICKER" + ] + }, + { + "source": [ + "## Fetch Data" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Loads numerical data from csv if filepath is specified, otherwise downloads it from yfinance for specified dates and tickers\n", + "def get_numerical_data(filepath='',start_date='2020-01-01',end_date='2021-01-01',ticker_list=config.DOW_30_TICKER):\n", + " if filepath:\n", + " df = data.load_dataset(filepath)\n", + " else:\n", + " df = YahooDownloader(start_date=start_date,end_date=end_date,ticker_list=ticker_list).fetch_data()\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Need to pull textual data from sources\n", + "def get_textual_data():\n", + " pass\n", + "\n", + "# Run through sentiment analysis model to get the sentiment\n", + "def analyze_textual_data():\n", + " pass\n", + "\n", + "# Compute sentiment score. This needs to be computed for every ticker and day based on the sentiment analysis models output for text related to that day.\n", + "def compute_score():\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "Shape of DataFrame: (7590, 8)\n" + ] + } + ], + "source": [ + "numerical_df = get_numerical_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " date open high low close volume \\\n", + "0 2020-01-02 74.059998 75.150002 73.797501 74.333511 135480400 \n", + "1 2020-01-02 124.660004 126.269997 124.230003 123.267235 2708000 \n", + "2 2020-01-02 328.549988 333.350006 327.700012 331.348572 4544400 \n", + "3 2020-01-02 149.000000 150.550003 147.979996 144.700500 3311900 \n", + "4 2020-01-02 48.060001 48.419998 47.880001 46.443089 16708100 \n", + "\n", + " tic day \n", + "0 AAPL 3 \n", + "1 AXP 3 \n", + "2 BA 3 \n", + "3 CAT 3 \n", + "4 CSCO 3 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
dateopenhighlowclosevolumeticday
02020-01-0274.05999875.15000273.79750174.333511135480400AAPL3
12020-01-02124.660004126.269997124.230003123.2672352708000AXP3
22020-01-02328.549988333.350006327.700012331.3485724544400BA3
32020-01-02149.000000150.550003147.979996144.7005003311900CAT3
42020-01-0248.06000148.41999847.88000146.44308916708100CSCO3
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "numerical_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "7586 2020-12-31\n", + "7587 2020-12-31\n", + "7588 2020-12-31\n", + "7589 2020-12-31\n", + "Name: date, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 84 + } + ], + "source": [ + "numerical_df[-4:]['date']" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "# time_fmt = \"%Y-%m-%d\"\n", + "# dates = pd.date_range('2020-01-01','2021-01-01').to_pydatetime()\n", + "# dates = np.array([datetime.strftime(r,time_fmt) for r in dates])\n", + "# tickers = np.array(config.DOW_30_TICKER)\n", + "# data = np.array(np.meshgrid(dates,tickers)).T.reshape(-1,2)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "\n", + "def generate_sentiment_scores(start_date,end_date,tickers=config.DOW_30_TICKER,time_fmt=\"%Y-%m-%d\"):\n", + " dates = pd.date_range(start_date,end_date).to_pydatetime()\n", + " dates = np.array([datetime.strftime(r,time_fmt) for r in dates])\n", + " data = np.array(np.meshgrid(dates,tickers)).T.reshape(-1,2)\n", + " scores = np.random.uniform(low=-1.0,high=1.0,size=(len(data),1))\n", + " data = np.concatenate((data,scores),axis=1)\n", + " df = pd.DataFrame(data,columns=['date','tic','sentiment'])\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "sentiment_df = generate_sentiment_scores('2020-01-02','2021-01-01')" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " date tic sentiment\n", + "0 2020-01-02 AAPL 0.2833926324598415\n", + "1 2020-01-02 MSFT 0.35161072063542154\n", + "2 2020-01-02 JPM -0.5808555250854983\n", + "3 2020-01-02 V -0.39750294456355584\n", + "4 2020-01-02 RTX -0.2170646506354239\n", + ".. ... ... ...\n", + "56 2020-01-03 MMM 0.05423965692744859\n", + "57 2020-01-03 PFE 0.2665994090757775\n", + "58 2020-01-03 WBA -0.8667574036009138\n", + "59 2020-01-03 DD 0.27551254408626114\n", + "60 2020-01-04 AAPL -0.1348328790150064\n", + "\n", + "[61 rows x 3 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
dateticsentiment
02020-01-02AAPL0.2833926324598415
12020-01-02MSFT0.35161072063542154
22020-01-02JPM-0.5808555250854983
32020-01-02V-0.39750294456355584
42020-01-02RTX-0.2170646506354239
............
562020-01-03MMM0.05423965692744859
572020-01-03PFE0.2665994090757775
582020-01-03WBA-0.8667574036009138
592020-01-03DD0.27551254408626114
602020-01-04AAPL-0.1348328790150064
\n

61 rows × 3 columns

\n
" + }, + "metadata": {}, + "execution_count": 47 + } + ], + "source": [ + "sentiment_df[:61]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "source": [ + "## Preprocess data" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def join_data(numerical_df,sentiment_df):\n", + " return numerical_df.merge(sentiment_df,on=['date','tic'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def preprocess_data(numerical_df,sentiment_df,use_turbulence=False):\n", + " fe = FeatureEngineer(use_turbulence=use_turbulence)\n", + " numerical_df = fe.preprocess_data(numerical_df)\n", + " df = join_data(numerical_df,sentiment_df)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Successfully added technical indicators\n" + ] + } + ], + "source": [ + "# Single sample\n", + "\n", + "two_day_numerical = numerical_df.loc[:60]\n", + "two_day_sentiment = sentiment_df.loc[:60]\n", + "two_day_data = preprocess_data(single_day_numerical,single_day_sentiment)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " date open high low close volume \\\n", + "0 2020-01-02 74.059998 75.150002 73.797501 74.333511 135480400 \n", + "1 2020-01-02 124.660004 126.269997 124.230003 123.267235 2708000 \n", + "2 2020-01-02 328.549988 333.350006 327.700012 331.348572 4544400 \n", + "3 2020-01-02 149.000000 150.550003 147.979996 144.700500 3311900 \n", + "4 2020-01-02 48.060001 48.419998 47.880001 46.443089 16708100 \n", + "5 2020-01-02 120.809998 121.629997 120.769997 113.316681 5205000 \n", + "6 2020-01-02 64.800003 65.160004 63.480000 61.819908 5967300 \n", + "7 2020-01-02 145.289993 148.199997 145.100006 148.199997 9502100 \n", + "8 2020-01-02 231.000000 234.639999 230.160004 227.913971 3736300 \n", + "9 2020-01-02 219.080002 219.759995 217.839996 213.260651 3935700 \n", + "10 2020-01-02 135.000000 135.919998 134.770004 126.975204 3148600 \n", + "11 2020-01-02 60.240002 60.970001 60.220001 59.005745 18056000 \n", + "12 2020-01-02 145.869995 146.020004 145.080002 141.226059 5777000 \n", + "13 2020-01-02 139.789993 141.100006 139.259995 134.380966 10803700 \n", + "14 2020-01-02 55.320000 55.430000 54.759998 52.731567 11867700 \n", + "15 2020-01-02 198.000000 200.800003 197.809998 194.704422 3554200 \n", + "16 2020-01-02 177.679993 180.009995 177.139999 172.119888 3601700 \n", + "17 2020-01-02 91.080002 92.139999 90.370003 88.430603 7873500 \n", + "18 2020-01-02 158.779999 160.729996 158.330002 158.571075 22622100 \n", + "19 2020-01-02 101.360001 102.209999 101.019997 101.029839 5644100 \n", + "20 2020-01-02 37.286530 37.333965 36.888046 35.293362 16514072 \n", + "21 2020-01-02 124.500000 124.730003 122.940002 118.952316 8130800 \n", + "22 2020-01-02 94.235367 96.425423 94.235367 93.120224 4451584 \n", + "23 2020-01-02 137.520004 137.740005 136.139999 133.839127 1117300 \n", + "24 2020-01-02 293.980011 295.700012 289.790009 286.745422 2543400 \n", + "25 2020-01-02 189.000000 191.139999 188.720001 189.656342 8733000 \n", + "26 2020-01-02 61.380001 61.450001 60.810001 57.256145 11447900 \n", + "27 2020-01-02 59.279999 59.590000 58.700001 56.046688 5700500 \n", + "28 2020-01-02 118.860001 119.889999 118.699997 116.500679 6764900 \n", + "29 2020-01-02 70.239998 71.019997 70.239998 64.560120 12456400 \n", + "30 2020-01-03 74.287498 75.144997 74.125000 73.610840 146322800 \n", + "31 2020-01-03 124.320000 125.099998 123.940002 122.042877 2090600 \n", + "32 2020-01-03 330.630005 334.890015 330.299988 330.791901 3875900 \n", + "33 2020-01-03 148.770004 149.960007 147.449997 142.691452 3100600 \n", + "34 2020-01-03 47.910000 48.139999 47.480000 45.685341 15577400 \n", + "35 2020-01-03 121.779999 122.720001 120.739998 112.924751 6360900 \n", + "36 2020-01-03 62.750000 62.950001 61.880001 60.515362 6005300 \n", + "37 2020-01-03 146.399994 147.899994 146.050003 146.500000 7320200 \n", + "38 2020-01-03 231.600006 232.610001 230.300003 225.248886 2274500 \n", + "39 2020-01-03 217.139999 219.679993 216.750000 212.551910 3423200 \n", + "40 2020-01-03 133.570007 134.860001 133.559998 125.962540 2373700 \n", + "41 2020-01-03 59.810001 60.700001 59.810001 58.288052 15293900 \n", + "42 2020-01-03 143.500000 145.369995 143.000000 139.590973 5752400 \n", + "43 2020-01-03 137.500000 139.229996 137.080002 132.607590 10386800 \n", + "44 2020-01-03 54.320000 54.990002 54.090000 52.443882 11354500 \n", + "45 2020-01-03 199.389999 200.550003 198.850006 194.015961 2767600 \n", + "46 2020-01-03 177.020004 178.660004 175.630005 170.637741 2466900 \n", + "47 2020-01-03 90.680000 92.070000 90.510002 87.671577 5633300 \n", + "48 2020-01-03 158.320007 159.949997 158.059998 156.596588 21116200 \n", + "49 2020-01-03 100.589996 102.000000 100.309998 100.753044 4541800 \n", + "50 2020-01-03 36.736244 37.229603 36.688805 35.104000 14922848 \n", + "51 2020-01-03 122.160004 123.529999 121.860001 118.152298 7970500 \n", + "52 2020-01-03 94.984268 97.325363 94.719948 93.247925 4969756 \n", + "53 2020-01-03 136.550003 137.369995 136.350006 133.362213 927300 \n", + "54 2020-01-03 287.269989 291.880005 284.359985 283.843658 2711400 \n", + "55 2020-01-03 188.410004 190.960007 187.919998 188.147980 4899700 \n", + "56 2020-01-03 60.590000 60.790001 60.070000 56.646542 13263200 \n", + "57 2020-01-03 58.540001 59.349998 58.180000 56.046688 4892300 \n", + "58 2020-01-03 118.269997 118.790001 117.589996 115.472214 5399200 \n", + "59 2020-01-03 71.339996 71.370003 70.160004 64.041092 17386900 \n", + "\n", + " tic day macd boll_ub boll_lb rsi_30 cci_30 dx_30 \\\n", + "0 AAPL 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "1 AXP 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "2 BA 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "3 CAT 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "4 CSCO 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "5 CVX 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "6 DD 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "7 DIS 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "8 GS 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "9 HD 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "10 IBM 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "11 INTC 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "12 JNJ 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "13 JPM 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "14 KO 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "15 MCD 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "16 MMM 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "17 MRK 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "18 MSFT 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "19 NKE 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "20 PFE 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "21 PG 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "22 RTX 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "23 TRV 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "24 UNH 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "25 V 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "26 VZ 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "27 WBA 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "28 WMT 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "29 XOM 3 0.000000 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "30 AAPL 4 -0.016214 74.994187 72.950164 0.0 -66.666667 100.0 \n", + "31 AXP 4 -0.027470 124.386559 120.923553 0.0 -66.666667 100.0 \n", + "32 BA 4 -0.012489 331.857488 330.282984 0.0 66.666667 100.0 \n", + "33 CAT 4 -0.045075 146.537200 140.854753 0.0 -66.666667 100.0 \n", + "34 CSCO 4 -0.017001 47.135832 44.992598 0.0 -66.666667 100.0 \n", + "35 CVX 4 -0.008793 113.674988 112.566444 0.0 66.666667 100.0 \n", + "36 DD 4 -0.029269 63.012542 59.322728 0.0 -66.666667 100.0 \n", + "37 DIS 4 -0.038141 149.754157 144.945840 0.0 -66.666667 100.0 \n", + "38 GS 4 -0.059794 230.350428 222.812429 0.0 -66.666667 100.0 \n", + "39 HD 4 -0.015901 213.908591 211.903970 0.0 -66.666667 100.0 \n", + "40 IBM 4 -0.022720 127.900996 125.036748 0.0 -66.666667 100.0 \n", + "41 INTC 4 -0.016102 59.661870 57.631927 0.0 -66.666667 100.0 \n", + "42 JNJ 4 -0.036685 142.720877 138.096155 0.0 -66.666667 100.0 \n", + "43 JPM 4 -0.039787 136.002211 130.986345 0.0 -66.666667 100.0 \n", + "44 KO 4 -0.006454 52.994573 52.180876 0.0 -66.666667 100.0 \n", + "45 MCD 4 -0.015446 195.333823 193.386560 0.0 66.666667 100.0 \n", + "46 MMM 4 -0.033253 173.474887 169.282742 0.0 -66.666667 100.0 \n", + "47 MRK 4 -0.017029 89.124515 86.977666 0.0 -66.666667 100.0 \n", + "48 MSFT 4 -0.044299 160.376179 154.791485 0.0 -66.666667 100.0 \n", + "49 NKE 4 -0.006210 101.282888 100.499995 0.0 -66.666667 100.0 \n", + "50 PFE 4 -0.004248 35.466479 34.930883 0.0 -66.666667 100.0 \n", + "51 PG 4 -0.017949 119.683704 117.420910 0.0 -66.666667 100.0 \n", + "52 RTX 4 0.002865 93.364671 93.003478 100.0 66.666667 100.0 \n", + "53 TRV 4 -0.010700 134.275127 132.926212 0.0 -66.666667 100.0 \n", + "54 UNH 4 -0.065104 289.398254 281.190827 0.0 -66.666667 100.0 \n", + "55 V 4 -0.033841 191.035306 186.769015 0.0 -66.666667 100.0 \n", + "56 VZ 4 -0.013677 57.813454 56.089233 0.0 -66.666667 100.0 \n", + "57 WBA 4 0.000000 56.046688 56.046688 0.0 -66.666667 100.0 \n", + "58 WMT 4 -0.023075 117.440916 114.531977 0.0 -66.666667 100.0 \n", + "59 XOM 4 -0.011645 65.034622 63.566590 0.0 -66.666667 100.0 \n", + "\n", + " close_30_sma close_60_sma sentiment \n", + "0 74.333511 74.333511 0.2833926324598415 \n", + "1 123.267235 123.267235 -0.11115165541267769 \n", + "2 331.348572 331.348572 0.29541394800696064 \n", + "3 144.700500 144.700500 0.8798273908339771 \n", + "4 46.443089 46.443089 -0.07381205009223857 \n", + "5 113.316681 113.316681 -0.4464516834557266 \n", + "6 61.819908 61.819908 -0.957457220229198 \n", + "7 148.199997 148.199997 0.7801606410796214 \n", + "8 227.913971 227.913971 -0.9926437559956203 \n", + "9 213.260651 213.260651 -0.7517608373429434 \n", + "10 126.975204 126.975204 -0.9518127988808116 \n", + "11 59.005745 59.005745 -0.7295197689489716 \n", + "12 141.226059 141.226059 -0.368885697305978 \n", + "13 134.380966 134.380966 -0.5808555250854983 \n", + "14 52.731567 52.731567 -0.3168016119684709 \n", + "15 194.704422 194.704422 0.9544598341934427 \n", + "16 172.119888 172.119888 -0.45637313484623276 \n", + "17 88.430603 88.430603 -0.8483626960013035 \n", + "18 158.571075 158.571075 0.35161072063542154 \n", + "19 101.029839 101.029839 0.33235868699960736 \n", + "20 35.293362 35.293362 -0.21671163807774896 \n", + "21 118.952316 118.952316 -0.2930800244388918 \n", + "22 93.120224 93.120224 -0.2170646506354239 \n", + "23 133.839127 133.839127 -0.7729591574297954 \n", + "24 286.745422 286.745422 -0.07014709176506329 \n", + "25 189.656342 189.656342 -0.39750294456355584 \n", + "26 57.256145 57.256145 0.5056281510451979 \n", + "27 56.046688 56.046688 -0.7445853388654293 \n", + "28 116.500679 116.500679 -0.3850887915612753 \n", + "29 64.560120 64.560120 -0.9017768038998397 \n", + "30 73.972176 73.972176 0.49483270328122275 \n", + "31 122.655056 122.655056 0.42247144916316404 \n", + "32 331.070236 331.070236 -0.1438780409884277 \n", + "33 143.695976 143.695976 0.16136106311291898 \n", + "34 46.064215 46.064215 -0.6097691029467514 \n", + "35 113.120716 113.120716 -0.09582076137347384 \n", + "36 61.167635 61.167635 0.27551254408626114 \n", + "37 147.349998 147.349998 -0.2684184623789383 \n", + "38 226.581429 226.581429 -0.0666785697971437 \n", + "39 212.906281 212.906281 0.4502175044140855 \n", + "40 126.468872 126.468872 -0.3713517376463129 \n", + "41 58.646898 58.646898 0.9328784644234758 \n", + "42 140.408516 140.408516 -0.36031163722297554 \n", + "43 133.494278 133.494278 0.4659316067482131 \n", + "44 52.587725 52.587725 0.8255035548375986 \n", + "45 194.360191 194.360191 -0.6470241167267008 \n", + "46 171.378815 171.378815 0.05423965692744859 \n", + "47 88.051090 88.051090 -0.14301320432544107 \n", + "48 157.583832 157.583832 -0.35755855471695863 \n", + "49 100.891441 100.891441 -0.4964396444392536 \n", + "50 35.198681 35.198681 0.2665994090757775 \n", + "51 118.552307 118.552307 -0.4740679943893711 \n", + "52 93.184074 93.184074 -0.3879245811768506 \n", + "53 133.600670 133.600670 0.8920625605656347 \n", + "54 285.294540 285.294540 0.17691277602586508 \n", + "55 188.902161 188.902161 0.23339625131239883 \n", + "56 56.951344 56.951344 0.03492856441277947 \n", + "57 56.046688 56.046688 -0.8667574036009138 \n", + "58 115.986446 115.986446 0.3016037669797418 \n", + "59 64.300606 64.300606 0.6014761207460211 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
dateopenhighlowclosevolumeticdaymacdboll_ubboll_lbrsi_30cci_30dx_30close_30_smaclose_60_smasentiment
02020-01-0274.05999875.15000273.79750174.333511135480400AAPL30.00000074.99418772.9501640.0-66.666667100.074.33351174.3335110.2833926324598415
12020-01-02124.660004126.269997124.230003123.2672352708000AXP30.00000074.99418772.9501640.0-66.666667100.0123.267235123.267235-0.11115165541267769
22020-01-02328.549988333.350006327.700012331.3485724544400BA30.00000074.99418772.9501640.0-66.666667100.0331.348572331.3485720.29541394800696064
32020-01-02149.000000150.550003147.979996144.7005003311900CAT30.00000074.99418772.9501640.0-66.666667100.0144.700500144.7005000.8798273908339771
42020-01-0248.06000148.41999847.88000146.44308916708100CSCO30.00000074.99418772.9501640.0-66.666667100.046.44308946.443089-0.07381205009223857
52020-01-02120.809998121.629997120.769997113.3166815205000CVX30.00000074.99418772.9501640.0-66.666667100.0113.316681113.316681-0.4464516834557266
62020-01-0264.80000365.16000463.48000061.8199085967300DD30.00000074.99418772.9501640.0-66.666667100.061.81990861.819908-0.957457220229198
72020-01-02145.289993148.199997145.100006148.1999979502100DIS30.00000074.99418772.9501640.0-66.666667100.0148.199997148.1999970.7801606410796214
82020-01-02231.000000234.639999230.160004227.9139713736300GS30.00000074.99418772.9501640.0-66.666667100.0227.913971227.913971-0.9926437559956203
92020-01-02219.080002219.759995217.839996213.2606513935700HD30.00000074.99418772.9501640.0-66.666667100.0213.260651213.260651-0.7517608373429434
102020-01-02135.000000135.919998134.770004126.9752043148600IBM30.00000074.99418772.9501640.0-66.666667100.0126.975204126.975204-0.9518127988808116
112020-01-0260.24000260.97000160.22000159.00574518056000INTC30.00000074.99418772.9501640.0-66.666667100.059.00574559.005745-0.7295197689489716
122020-01-02145.869995146.020004145.080002141.2260595777000JNJ30.00000074.99418772.9501640.0-66.666667100.0141.226059141.226059-0.368885697305978
132020-01-02139.789993141.100006139.259995134.38096610803700JPM30.00000074.99418772.9501640.0-66.666667100.0134.380966134.380966-0.5808555250854983
142020-01-0255.32000055.43000054.75999852.73156711867700KO30.00000074.99418772.9501640.0-66.666667100.052.73156752.731567-0.3168016119684709
152020-01-02198.000000200.800003197.809998194.7044223554200MCD30.00000074.99418772.9501640.0-66.666667100.0194.704422194.7044220.9544598341934427
162020-01-02177.679993180.009995177.139999172.1198883601700MMM30.00000074.99418772.9501640.0-66.666667100.0172.119888172.119888-0.45637313484623276
172020-01-0291.08000292.13999990.37000388.4306037873500MRK30.00000074.99418772.9501640.0-66.666667100.088.43060388.430603-0.8483626960013035
182020-01-02158.779999160.729996158.330002158.57107522622100MSFT30.00000074.99418772.9501640.0-66.666667100.0158.571075158.5710750.35161072063542154
192020-01-02101.360001102.209999101.019997101.0298395644100NKE30.00000074.99418772.9501640.0-66.666667100.0101.029839101.0298390.33235868699960736
202020-01-0237.28653037.33396536.88804635.29336216514072PFE30.00000074.99418772.9501640.0-66.666667100.035.29336235.293362-0.21671163807774896
212020-01-02124.500000124.730003122.940002118.9523168130800PG30.00000074.99418772.9501640.0-66.666667100.0118.952316118.952316-0.2930800244388918
222020-01-0294.23536796.42542394.23536793.1202244451584RTX30.00000074.99418772.9501640.0-66.666667100.093.12022493.120224-0.2170646506354239
232020-01-02137.520004137.740005136.139999133.8391271117300TRV30.00000074.99418772.9501640.0-66.666667100.0133.839127133.839127-0.7729591574297954
242020-01-02293.980011295.700012289.790009286.7454222543400UNH30.00000074.99418772.9501640.0-66.666667100.0286.745422286.745422-0.07014709176506329
252020-01-02189.000000191.139999188.720001189.6563428733000V30.00000074.99418772.9501640.0-66.666667100.0189.656342189.656342-0.39750294456355584
262020-01-0261.38000161.45000160.81000157.25614511447900VZ30.00000074.99418772.9501640.0-66.666667100.057.25614557.2561450.5056281510451979
272020-01-0259.27999959.59000058.70000156.0466885700500WBA30.00000074.99418772.9501640.0-66.666667100.056.04668856.046688-0.7445853388654293
282020-01-02118.860001119.889999118.699997116.5006796764900WMT30.00000074.99418772.9501640.0-66.666667100.0116.500679116.500679-0.3850887915612753
292020-01-0270.23999871.01999770.23999864.56012012456400XOM30.00000074.99418772.9501640.0-66.666667100.064.56012064.560120-0.9017768038998397
302020-01-0374.28749875.14499774.12500073.610840146322800AAPL4-0.01621474.99418772.9501640.0-66.666667100.073.97217673.9721760.49483270328122275
312020-01-03124.320000125.099998123.940002122.0428772090600AXP4-0.027470124.386559120.9235530.0-66.666667100.0122.655056122.6550560.42247144916316404
322020-01-03330.630005334.890015330.299988330.7919013875900BA4-0.012489331.857488330.2829840.066.666667100.0331.070236331.070236-0.1438780409884277
332020-01-03148.770004149.960007147.449997142.6914523100600CAT4-0.045075146.537200140.8547530.0-66.666667100.0143.695976143.6959760.16136106311291898
342020-01-0347.91000048.13999947.48000045.68534115577400CSCO4-0.01700147.13583244.9925980.0-66.666667100.046.06421546.064215-0.6097691029467514
352020-01-03121.779999122.720001120.739998112.9247516360900CVX4-0.008793113.674988112.5664440.066.666667100.0113.120716113.120716-0.09582076137347384
362020-01-0362.75000062.95000161.88000160.5153626005300DD4-0.02926963.01254259.3227280.0-66.666667100.061.16763561.1676350.27551254408626114
372020-01-03146.399994147.899994146.050003146.5000007320200DIS4-0.038141149.754157144.9458400.0-66.666667100.0147.349998147.349998-0.2684184623789383
382020-01-03231.600006232.610001230.300003225.2488862274500GS4-0.059794230.350428222.8124290.0-66.666667100.0226.581429226.581429-0.0666785697971437
392020-01-03217.139999219.679993216.750000212.5519103423200HD4-0.015901213.908591211.9039700.0-66.666667100.0212.906281212.9062810.4502175044140855
402020-01-03133.570007134.860001133.559998125.9625402373700IBM4-0.022720127.900996125.0367480.0-66.666667100.0126.468872126.468872-0.3713517376463129
412020-01-0359.81000160.70000159.81000158.28805215293900INTC4-0.01610259.66187057.6319270.0-66.666667100.058.64689858.6468980.9328784644234758
422020-01-03143.500000145.369995143.000000139.5909735752400JNJ4-0.036685142.720877138.0961550.0-66.666667100.0140.408516140.408516-0.36031163722297554
432020-01-03137.500000139.229996137.080002132.60759010386800JPM4-0.039787136.002211130.9863450.0-66.666667100.0133.494278133.4942780.4659316067482131
442020-01-0354.32000054.99000254.09000052.44388211354500KO4-0.00645452.99457352.1808760.0-66.666667100.052.58772552.5877250.8255035548375986
452020-01-03199.389999200.550003198.850006194.0159612767600MCD4-0.015446195.333823193.3865600.066.666667100.0194.360191194.360191-0.6470241167267008
462020-01-03177.020004178.660004175.630005170.6377412466900MMM4-0.033253173.474887169.2827420.0-66.666667100.0171.378815171.3788150.05423965692744859
472020-01-0390.68000092.07000090.51000287.6715775633300MRK4-0.01702989.12451586.9776660.0-66.666667100.088.05109088.051090-0.14301320432544107
482020-01-03158.320007159.949997158.059998156.59658821116200MSFT4-0.044299160.376179154.7914850.0-66.666667100.0157.583832157.583832-0.35755855471695863
492020-01-03100.589996102.000000100.309998100.7530444541800NKE4-0.006210101.282888100.4999950.0-66.666667100.0100.891441100.891441-0.4964396444392536
502020-01-0336.73624437.22960336.68880535.10400014922848PFE4-0.00424835.46647934.9308830.0-66.666667100.035.19868135.1986810.2665994090757775
512020-01-03122.160004123.529999121.860001118.1522987970500PG4-0.017949119.683704117.4209100.0-66.666667100.0118.552307118.552307-0.4740679943893711
522020-01-0394.98426897.32536394.71994893.2479254969756RTX40.00286593.36467193.003478100.066.666667100.093.18407493.184074-0.3879245811768506
532020-01-03136.550003137.369995136.350006133.362213927300TRV4-0.010700134.275127132.9262120.0-66.666667100.0133.600670133.6006700.8920625605656347
542020-01-03287.269989291.880005284.359985283.8436582711400UNH4-0.065104289.398254281.1908270.0-66.666667100.0285.294540285.2945400.17691277602586508
552020-01-03188.410004190.960007187.919998188.1479804899700V4-0.033841191.035306186.7690150.0-66.666667100.0188.902161188.9021610.23339625131239883
562020-01-0360.59000060.79000160.07000056.64654213263200VZ4-0.01367757.81345456.0892330.0-66.666667100.056.95134456.9513440.03492856441277947
572020-01-0358.54000159.34999858.18000056.0466884892300WBA40.00000056.04668856.0466880.0-66.666667100.056.04668856.046688-0.8667574036009138
582020-01-03118.269997118.790001117.589996115.4722145399200WMT4-0.023075117.440916114.5319770.0-66.666667100.0115.986446115.9864460.3016037669797418
592020-01-0371.33999671.37000370.16000464.04109217386900XOM4-0.01164565.03462263.5665900.0-66.666667100.064.30060664.3006060.6014761207460211
\n
" + }, + "metadata": {}, + "execution_count": 52 + } + ], + "source": [ + "two_day_data" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Successfully added technical indicators\n" + ] + } + ], + "source": [ + "processed = preprocess_data(numerical_df,sentiment_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# import itertools\n", + "# list_ticker = processed[\"tic\"].unique().tolist()\n", + "# list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))\n", + "# combination = list(itertools.product(list_date,list_ticker))\n", + "\n", + "# processed_full = pd.DataFrame(combination,columns=[\"date\",\"tic\"]).merge(processed,on=[\"date\",\"tic\"],how=\"left\")\n", + "# processed_full = processed_full[processed_full['date'].isin(processed['date'])]\n", + "# processed_full = processed_full.sort_values(['date','tic'])\n", + "\n", + "\n", + "# processed_full = processed_full.fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " date open high low close volume \\\n", + "3478 2020-06-17 119.860001 120.129997 118.400002 117.619827 6722300 \n", + "3599 2020-06-23 46.900002 47.220001 46.500000 43.970928 18916600 \n", + "5921 2020-10-13 54.270000 54.290001 53.619999 53.118870 20005800 \n", + "1441 2020-03-12 87.660004 89.610001 81.809998 81.815742 12206600 \n", + "4890 2020-08-25 124.697502 125.180000 123.052498 124.424088 211495600 \n", + "\n", + " tic day macd boll_ub boll_lb rsi_30 cci_30 \\\n", + "3478 WMT 2 -1.290810 125.199126 116.131516 46.609719 -138.407024 \n", + "3599 XOM 1 0.368817 50.564067 40.015104 50.305326 0.772453 \n", + "5921 INTC 1 0.846364 53.585163 47.605143 54.869043 165.159351 \n", + "1441 AXP 3 -8.979919 145.336436 84.050319 27.036437 -198.757983 \n", + "4890 AAPL 1 6.845802 128.234868 96.448479 73.723113 127.021210 \n", + "\n", + " dx_30 close_30_sma close_60_sma sentiment \n", + "3478 18.794410 121.310914 121.087573 -0.398418752383233 \n", + "3599 4.928755 44.004824 41.655123 -0.8059696803289789 \n", + "5921 33.459339 50.166393 49.776057 0.18158025813670298 \n", + "1441 66.161600 119.648064 122.248775 -0.9747474568266239 \n", + "4890 68.616624 106.625726 97.585338 0.3361142593207198 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
dateopenhighlowclosevolumeticdaymacdboll_ubboll_lbrsi_30cci_30dx_30close_30_smaclose_60_smasentiment
34782020-06-17119.860001120.129997118.400002117.6198276722300WMT2-1.290810125.199126116.13151646.609719-138.40702418.794410121.310914121.087573-0.398418752383233
35992020-06-2346.90000247.22000146.50000043.97092818916600XOM10.36881750.56406740.01510450.3053260.7724534.92875544.00482441.655123-0.8059696803289789
59212020-10-1354.27000054.29000153.61999953.11887020005800INTC10.84636453.58516347.60514354.869043165.15935133.45933950.16639349.7760570.18158025813670298
14412020-03-1287.66000489.61000181.80999881.81574212206600AXP3-8.979919145.33643684.05031927.036437-198.75798366.161600119.648064122.248775-0.9747474568266239
48902020-08-25124.697502125.180000123.052498124.424088211495600AAPL16.845802128.23486896.44847973.723113127.02121068.616624106.62572697.5853380.3361142593207198
\n
" + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "processed.sample(5)" + ] + }, + { + "source": [ + "## Setting up the environment" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": {}, + "outputs": [], + "source": [ + "trade = data_split(processed, '2020-12-01','2021-01-01')" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [], + "source": [ + "last_df = trade.loc[21]\n", + "\n", + "trade = trade.drop(21)" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " date open high low close volume tic \\\n", + "1 2020-12-02 122.019997 123.370003 120.889999 122.896355 89004200 AAPL \n", + "1 2020-12-02 119.279999 122.849998 118.900002 121.537247 3271900 AXP \n", + "1 2020-12-02 213.009995 224.990005 210.300003 223.850006 25912300 BA \n", + "1 2020-12-02 173.259995 174.419998 172.279999 172.171539 1971000 CAT \n", + "1 2020-12-02 43.389999 43.959999 43.349998 43.227016 17422200 CSCO \n", + "1 2020-12-02 87.260002 91.309998 87.099998 88.617355 10509600 CVX \n", + "1 2020-12-02 63.480000 64.290001 63.250000 63.684902 5959500 DD \n", + "1 2020-12-02 149.490005 154.009995 148.339996 153.610001 10601900 DIS \n", + "1 2020-12-02 232.080002 238.130005 231.580002 236.720154 2136300 GS \n", + "1 2020-12-02 273.970001 274.109985 269.570007 269.411774 4168600 HD \n", + "1 2020-12-02 122.849998 124.639999 122.410004 122.976685 3690700 IBM \n", + "1 2020-12-02 49.220001 50.060001 49.180000 49.598938 33753500 INTC \n", + "1 2020-12-02 147.850006 149.710007 147.699997 147.221954 7745500 JNJ \n", + "1 2020-12-02 119.699997 122.309998 119.269997 120.457932 10811300 JPM \n", + "1 2020-12-02 51.900002 52.130001 51.639999 51.679848 14913100 KO \n", + "1 2020-12-02 214.000000 214.399994 209.130005 209.570557 6223700 MCD \n", + "1 2020-12-02 170.259995 172.580002 170.220001 170.445450 2421800 MMM \n", + "1 2020-12-02 81.940002 82.739998 81.290001 80.481735 6837600 MRK \n", + "1 2020-12-02 214.880005 215.470001 212.800003 214.875107 23724500 MSFT \n", + "1 2020-12-02 135.160004 136.320007 134.669998 135.033234 4132700 NKE \n", + "1 2020-12-02 40.470001 41.410000 40.299999 40.360924 84347500 PFE \n", + "1 2020-12-02 139.369995 139.940002 137.490005 136.656113 6963000 PG \n", + "1 2020-12-02 71.190002 72.349998 70.690002 71.500046 5401600 RTX \n", + "1 2020-12-02 132.580002 134.279999 132.419998 133.197662 948400 TRV \n", + "1 2020-12-02 341.769989 351.829987 341.500000 345.088043 2864300 UNH \n", + "1 2020-12-02 211.000000 211.399994 208.479996 209.854202 9728900 V \n", + "1 2020-12-02 61.400002 61.950001 61.009998 60.063873 14169900 VZ \n", + "1 2020-12-02 38.500000 40.099998 38.389999 39.463787 8350800 WBA \n", + "1 2020-12-02 152.000000 152.619995 149.529999 149.348251 7849000 WMT \n", + "1 2020-12-02 38.389999 40.419998 38.340000 39.273056 29369500 XOM \n", + "\n", + " day macd boll_ub boll_lb rsi_30 cci_30 dx_30 \\\n", + "1 2 1.155794 122.805722 113.112443 56.727828 154.383004 18.326964 \n", + "1 2 4.855969 128.749669 97.409153 62.028803 89.511153 27.531221 \n", + "1 2 13.984483 239.745046 151.577955 63.765725 106.310809 46.270265 \n", + "1 2 3.476263 179.658458 158.813556 58.681287 64.315213 17.939864 \n", + "1 2 1.260845 44.596100 35.715147 61.543815 122.453264 49.933659 \n", + "1 2 4.274846 97.089123 68.545152 59.083672 82.819903 25.952298 \n", + "1 2 1.451896 65.811749 57.838978 58.141855 85.679606 31.433742 \n", + "1 2 5.910297 157.873490 125.776510 64.685415 109.627809 52.602178 \n", + "1 2 8.360214 243.926541 196.584662 64.374630 110.967327 43.734589 \n", + "1 2 -0.944513 281.791293 263.140854 48.497968 -63.312473 23.109887 \n", + "1 2 2.134021 125.476211 108.313706 56.223680 134.543842 22.198912 \n", + "1 2 0.211684 48.953709 43.227755 53.657276 127.255083 27.590260 \n", + "1 2 0.617159 150.505035 138.234351 54.479110 94.172556 16.105215 \n", + "1 2 4.618248 126.101105 101.519365 62.619666 89.538776 36.182661 \n", + "1 2 0.588356 54.433406 48.800277 55.922677 26.277790 7.999997 \n", + "1 2 -0.700665 217.831208 208.867687 46.215942 -112.124166 8.580493 \n", + "1 2 2.504897 178.973703 157.544793 54.931979 48.736132 14.206497 \n", + "1 2 0.319381 80.386801 78.151293 53.501224 104.705440 28.901184 \n", + "1 2 0.646495 221.489533 207.514984 52.140060 27.877785 3.735623 \n", + "1 2 2.444884 137.104205 124.671836 63.426064 123.045806 30.844240 \n", + "1 2 1.051314 39.227687 33.270638 66.510171 259.359393 55.141508 \n", + "1 2 -0.464932 142.354259 134.869215 49.645956 -70.492153 25.745587 \n", + "1 2 3.364471 78.241081 56.665192 60.365475 80.804586 23.943590 \n", + "1 2 2.975748 139.320666 125.329705 58.551823 47.673542 9.973399 \n", + "1 2 3.014990 359.988047 326.525271 57.457320 52.263896 23.169929 \n", + "1 2 3.105384 218.624165 196.712163 55.270354 57.102913 8.855146 \n", + "1 2 0.574362 61.005373 56.862147 60.438235 94.456302 35.663662 \n", + "1 2 0.455995 42.535704 34.916657 53.783927 48.518993 9.793683 \n", + "1 2 2.347821 154.586767 141.289839 58.043236 69.502107 23.805558 \n", + "1 2 1.479254 42.005492 31.235070 56.350455 95.687022 26.527777 \n", + "\n", + " close_30_sma close_60_sma sentiment \n", + "1 116.314661 115.369485 -0.984651738354918 \n", + "1 107.265336 104.394359 -0.9176770257026572 \n", + "1 182.422001 172.716168 -0.9572068126980808 \n", + "1 166.248767 158.887985 0.04679674467070649 \n", + "1 38.954216 38.811192 0.21756488479904545 \n", + "1 78.091903 75.165402 -0.5083814301305061 \n", + "1 60.476208 58.747960 0.2728045113638675 \n", + "1 135.727333 131.107500 0.9273346290420486 \n", + "1 211.794794 205.571125 0.6197933298098626 \n", + "1 272.434312 274.191360 0.4380411540759901 \n", + "1 114.370554 116.887375 -0.5538340961143591 \n", + "1 46.183893 48.457200 -0.9702955003015619 \n", + "1 142.673007 144.070394 0.12582628545149088 \n", + "1 108.841823 102.814144 0.6302514692451833 \n", + "1 50.578834 49.800701 0.06716112190767953 \n", + "1 214.861430 217.184572 -0.59283964713772 \n", + "1 166.002745 164.271172 -0.030348809818280698 \n", + "1 78.248804 79.456796 0.3860714757752912 \n", + "1 212.256269 210.551778 -0.9377874625885871 \n", + "1 129.016266 126.211955 0.8946925807909476 \n", + "1 35.520684 34.755356 -0.7348287684964152 \n", + "1 138.565613 137.868746 0.4972821283400899 \n", + "1 63.902177 61.525737 0.44880643926311103 \n", + "1 129.047579 120.059917 -0.4315987907494607 \n", + "1 333.850869 321.935051 0.2944977097406001 \n", + "1 201.521997 201.184046 0.5208271308347097 \n", + "1 57.977235 57.823531 0.45030105246049046 \n", + "1 37.612180 36.468485 0.42852109840948405 \n", + "1 145.548057 142.187921 -0.08145969873327075 \n", + "1 35.068157 34.371995 -0.49465033198191954 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
dateopenhighlowclosevolumeticdaymacdboll_ubboll_lbrsi_30cci_30dx_30close_30_smaclose_60_smasentiment
12020-12-02122.019997123.370003120.889999122.89635589004200AAPL21.155794122.805722113.11244356.727828154.38300418.326964116.314661115.369485-0.984651738354918
12020-12-02119.279999122.849998118.900002121.5372473271900AXP24.855969128.74966997.40915362.02880389.51115327.531221107.265336104.394359-0.9176770257026572
12020-12-02213.009995224.990005210.300003223.85000625912300BA213.984483239.745046151.57795563.765725106.31080946.270265182.422001172.716168-0.9572068126980808
12020-12-02173.259995174.419998172.279999172.1715391971000CAT23.476263179.658458158.81355658.68128764.31521317.939864166.248767158.8879850.04679674467070649
12020-12-0243.38999943.95999943.34999843.22701617422200CSCO21.26084544.59610035.71514761.543815122.45326449.93365938.95421638.8111920.21756488479904545
12020-12-0287.26000291.30999887.09999888.61735510509600CVX24.27484697.08912368.54515259.08367282.81990325.95229878.09190375.165402-0.5083814301305061
12020-12-0263.48000064.29000163.25000063.6849025959500DD21.45189665.81174957.83897858.14185585.67960631.43374260.47620858.7479600.2728045113638675
12020-12-02149.490005154.009995148.339996153.61000110601900DIS25.910297157.873490125.77651064.685415109.62780952.602178135.727333131.1075000.9273346290420486
12020-12-02232.080002238.130005231.580002236.7201542136300GS28.360214243.926541196.58466264.374630110.96732743.734589211.794794205.5711250.6197933298098626
12020-12-02273.970001274.109985269.570007269.4117744168600HD2-0.944513281.791293263.14085448.497968-63.31247323.109887272.434312274.1913600.4380411540759901
12020-12-02122.849998124.639999122.410004122.9766853690700IBM22.134021125.476211108.31370656.223680134.54384222.198912114.370554116.887375-0.5538340961143591
12020-12-0249.22000150.06000149.18000049.59893833753500INTC20.21168448.95370943.22775553.657276127.25508327.59026046.18389348.457200-0.9702955003015619
12020-12-02147.850006149.710007147.699997147.2219547745500JNJ20.617159150.505035138.23435154.47911094.17255616.105215142.673007144.0703940.12582628545149088
12020-12-02119.699997122.309998119.269997120.45793210811300JPM24.618248126.101105101.51936562.61966689.53877636.182661108.841823102.8141440.6302514692451833
12020-12-0251.90000252.13000151.63999951.67984814913100KO20.58835654.43340648.80027755.92267726.2777907.99999750.57883449.8007010.06716112190767953
12020-12-02214.000000214.399994209.130005209.5705576223700MCD2-0.700665217.831208208.86768746.215942-112.1241668.580493214.861430217.184572-0.59283964713772
12020-12-02170.259995172.580002170.220001170.4454502421800MMM22.504897178.973703157.54479354.93197948.73613214.206497166.002745164.271172-0.030348809818280698
12020-12-0281.94000282.73999881.29000180.4817356837600MRK20.31938180.38680178.15129353.501224104.70544028.90118478.24880479.4567960.3860714757752912
12020-12-02214.880005215.470001212.800003214.87510723724500MSFT20.646495221.489533207.51498452.14006027.8777853.735623212.256269210.551778-0.9377874625885871
12020-12-02135.160004136.320007134.669998135.0332344132700NKE22.444884137.104205124.67183663.426064123.04580630.844240129.016266126.2119550.8946925807909476
12020-12-0240.47000141.41000040.29999940.36092484347500PFE21.05131439.22768733.27063866.510171259.35939355.14150835.52068434.755356-0.7348287684964152
12020-12-02139.369995139.940002137.490005136.6561136963000PG2-0.464932142.354259134.86921549.645956-70.49215325.745587138.565613137.8687460.4972821283400899
12020-12-0271.19000272.34999870.69000271.5000465401600RTX23.36447178.24108156.66519260.36547580.80458623.94359063.90217761.5257370.44880643926311103
12020-12-02132.580002134.279999132.419998133.197662948400TRV22.975748139.320666125.32970558.55182347.6735429.973399129.047579120.059917-0.4315987907494607
12020-12-02341.769989351.829987341.500000345.0880432864300UNH23.014990359.988047326.52527157.45732052.26389623.169929333.850869321.9350510.2944977097406001
12020-12-02211.000000211.399994208.479996209.8542029728900V23.105384218.624165196.71216355.27035457.1029138.855146201.521997201.1840460.5208271308347097
12020-12-0261.40000261.95000161.00999860.06387314169900VZ20.57436261.00537356.86214760.43823594.45630235.66366257.97723557.8235310.45030105246049046
12020-12-0238.50000040.09999838.38999939.4637878350800WBA20.45599542.53570434.91665753.78392748.5189939.79368337.61218036.4684850.42852109840948405
12020-12-02152.000000152.619995149.529999149.3482517849000WMT22.347821154.586767141.28983958.04323669.50210723.805558145.548057142.187921-0.08145969873327075
12020-12-0238.38999940.41999838.34000039.27305629369500XOM21.47925442.00549231.23507056.35045595.68702226.52777735.06815734.371995-0.49465033198191954
\n
" + }, + "metadata": {}, + "execution_count": 170 + } + ], + "source": [ + "trade[trade.date == '2020-12-02']" + ] + }, + { + "source": [ + "trade[trade.date > '2020-12-02'][['date','open']]" + ], + "cell_type": "code", + "metadata": {}, + "execution_count": 168, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " date open\n", + "2 2020-12-03 123.519997\n", + "2 2020-12-03 122.849998\n", + "2 2020-12-03 228.300003\n", + "2 2020-12-03 173.869995\n", + "2 2020-12-03 43.779999\n", + ".. ... ...\n", + "21 2020-12-31 218.399994\n", + "21 2020-12-31 58.060001\n", + "21 2020-12-31 39.330002\n", + "21 2020-12-31 144.199997\n", + "21 2020-12-31 41.470001\n", + "\n", + "[600 rows x 2 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
dateopen
22020-12-03123.519997
22020-12-03122.849998
22020-12-03228.300003
22020-12-03173.869995
22020-12-0343.779999
.........
212020-12-31218.399994
212020-12-3158.060001
212020-12-3139.330002
212020-12-31144.199997
212020-12-3141.470001
\n

600 rows × 2 columns

\n
" + }, + "metadata": {}, + "execution_count": 168 + } + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " date open high low close volume \\\n", + "1 2020-12-02 122.019997 123.370003 120.889999 122.896355 89004200 \n", + "1 2020-12-02 119.279999 122.849998 118.900002 121.537247 3271900 \n", + "1 2020-12-02 213.009995 224.990005 210.300003 223.850006 25912300 \n", + "1 2020-12-02 173.259995 174.419998 172.279999 172.171539 1971000 \n", + "1 2020-12-02 43.389999 43.959999 43.349998 43.227016 17422200 \n", + ".. ... ... ... ... ... ... \n", + "20 2020-12-30 216.000000 220.389999 215.649994 218.021530 8875100 \n", + "20 2020-12-30 58.830002 58.939999 58.060001 56.911888 18259800 \n", + "20 2020-12-30 39.520000 39.730000 39.200001 38.968510 4194300 \n", + "20 2020-12-30 144.880005 145.149994 143.940002 143.580521 6250400 \n", + "20 2020-12-30 41.330002 42.419998 41.270000 40.905334 23807300 \n", + "\n", + " tic day macd boll_ub boll_lb rsi_30 cci_30 \\\n", + "1 AAPL 2 1.155794 122.805722 113.112443 56.727828 154.383004 \n", + "1 AXP 2 4.855969 128.749669 97.409153 62.028803 89.511153 \n", + "1 BA 2 13.984483 239.745046 151.577955 63.765725 106.310809 \n", + "1 CAT 2 3.476263 179.658458 158.813556 58.681287 64.315213 \n", + "1 CSCO 2 1.260845 44.596100 35.715147 61.543815 122.453264 \n", + ".. ... ... ... ... ... ... ... \n", + "20 V 2 1.774089 216.171158 203.211760 59.209227 263.789704 \n", + "20 VZ 2 -0.390335 61.048898 56.699199 43.708731 -189.807648 \n", + "20 WBA 2 0.022185 42.954975 38.082485 50.214539 -34.466659 \n", + "20 WMT 2 -0.837518 149.407225 141.840954 49.369350 -96.017810 \n", + "20 XOM 2 0.840002 43.642135 39.143972 55.195039 30.368622 \n", + "\n", + " dx_30 close_30_sma close_60_sma sentiment \n", + "1 18.326964 116.314661 115.369485 -0.984651738354918 \n", + "1 27.531221 107.265336 104.394359 -0.9176770257026572 \n", + "1 46.270265 182.422001 172.716168 -0.9572068126980808 \n", + "1 17.939864 166.248767 158.887985 0.04679674467070649 \n", + "1 49.933659 38.954216 38.811192 0.21756488479904545 \n", + ".. ... ... ... ... \n", + "20 29.424279 209.395252 204.046652 0.02611872782509783 \n", + "20 17.673111 58.962963 58.125952 0.08300506391087992 \n", + "20 3.350135 39.692671 38.252223 0.8986461664723171 \n", + "20 5.379552 147.089382 144.891253 -0.6932360295904421 \n", + "20 15.401619 40.351737 36.687027 -0.6605489617307765 \n", + "\n", + "[600 rows x 17 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
dateopenhighlowclosevolumeticdaymacdboll_ubboll_lbrsi_30cci_30dx_30close_30_smaclose_60_smasentiment
12020-12-02122.019997123.370003120.889999122.89635589004200AAPL21.155794122.805722113.11244356.727828154.38300418.326964116.314661115.369485-0.984651738354918
12020-12-02119.279999122.849998118.900002121.5372473271900AXP24.855969128.74966997.40915362.02880389.51115327.531221107.265336104.394359-0.9176770257026572
12020-12-02213.009995224.990005210.300003223.85000625912300BA213.984483239.745046151.57795563.765725106.31080946.270265182.422001172.716168-0.9572068126980808
12020-12-02173.259995174.419998172.279999172.1715391971000CAT23.476263179.658458158.81355658.68128764.31521317.939864166.248767158.8879850.04679674467070649
12020-12-0243.38999943.95999943.34999843.22701617422200CSCO21.26084544.59610035.71514761.543815122.45326449.93365938.95421638.8111920.21756488479904545
......................................................
202020-12-30216.000000220.389999215.649994218.0215308875100V21.774089216.171158203.21176059.209227263.78970429.424279209.395252204.0466520.02611872782509783
202020-12-3058.83000258.93999958.06000156.91188818259800VZ2-0.39033561.04889856.69919943.708731-189.80764817.67311158.96296358.1259520.08300506391087992
202020-12-3039.52000039.73000039.20000138.9685104194300WBA20.02218542.95497538.08248550.214539-34.4666593.35013539.69267138.2522230.8986461664723171
202020-12-30144.880005145.149994143.940002143.5805216250400WMT2-0.837518149.407225141.84095449.369350-96.0178105.379552147.089382144.891253-0.6932360295904421
202020-12-3041.33000242.41999841.27000040.90533423807300XOM20.84000243.64213539.14397255.19503930.36862215.40161940.35173736.687027-0.6605489617307765
\n

600 rows × 17 columns

\n
" + }, + "metadata": {}, + "execution_count": 151 + } + ], + "source": [ + "trade.loc[trade.index[0]+1:]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment']" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Stock Dimension: 30, State Space: 331\n" + ] + } + ], + "source": [ + "stock_dimension = len(processed_full.tic.unique())\n", + "state_space = 1 + 2*stock_dimension + len(indicator_list)*stock_dimension\n", + "print(f\"Stock Dimension: {stock_dimension}, State Space: {state_space}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "env_kwargs = {\n", + " \"hmax\": 100, \n", + " \"initial_amount\": 1000000, \n", + " \"buy_cost_pct\": 0.001, \n", + " \"sell_cost_pct\": 0.001, \n", + " \"state_space\": state_space, \n", + " \"stock_dim\": stock_dimension, \n", + " \"tech_indicator_list\": indicator_list, \n", + " \"action_space\": stock_dimension, \n", + " \"reward_scaling\": 1e-4,\n", + " \"print_verbosity\":5\n", + " \n", + "}\n", + "\n", + "e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)\n", + "env_trade, _ = e_trade_gym.get_sb_env()" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " date open high low close volume \\\n", + "0 2020-12-01 121.010002 123.470001 120.010002 122.536896 128166800 \n", + "0 2020-12-01 120.320000 122.570000 119.849998 119.152794 3584400 \n", + "0 2020-12-01 214.309998 218.089996 213.000000 213.009995 15805200 \n", + "0 2020-12-01 175.389999 176.570007 172.940002 171.567505 2710200 \n", + "0 2020-12-01 43.009998 44.070000 43.009998 42.882305 23948800 \n", + "0 2020-12-01 89.279999 89.709999 87.070000 86.231079 9915700 \n", + "0 2020-12-01 64.839996 65.230003 63.330002 63.256752 4505200 \n", + "0 2020-12-01 149.570007 151.399994 149.000000 149.440002 8827800 \n", + "0 2020-12-01 231.960007 234.869995 231.350006 231.171967 2581000 \n", + "0 2020-12-01 278.730011 278.950012 275.549988 273.386841 3944900 \n", + "0 2020-12-01 123.900002 125.830002 123.080002 121.535934 5312100 \n", + "0 2020-12-01 48.750000 50.230000 48.709999 49.260990 57778200 \n", + "0 2020-12-01 146.289993 149.130005 145.860001 146.536240 9741300 \n", + "0 2020-12-01 120.339996 121.580002 119.629997 118.187737 12678200 \n", + "0 2020-12-01 52.139999 52.330002 51.790001 51.610428 18969300 \n", + "0 2020-12-01 218.880005 218.929993 215.529999 214.818268 4226300 \n", + "0 2020-12-01 174.220001 175.690002 170.009995 169.126312 3859600 \n", + "0 2020-12-01 80.949997 82.459999 80.820000 80.206345 9701500 \n", + "0 2020-12-01 214.509995 217.320007 213.350006 215.713181 30931300 \n", + "0 2020-12-01 136.440002 136.500000 134.750000 134.893799 3834500 \n", + "0 2020-12-01 39.400002 40.500000 39.009998 38.985886 72660800 \n", + "0 2020-12-01 139.160004 139.539993 138.160004 137.653671 7169700 \n", + "0 2020-12-01 72.720001 72.940002 71.209999 70.794388 6545800 \n", + "0 2020-12-01 132.509995 134.240005 131.990005 132.571625 1218300 \n", + "0 2020-12-01 344.769989 354.100006 339.929993 338.763336 3817700 \n", + "0 2020-12-01 212.130005 213.669998 211.039993 210.872620 8049400 \n", + "0 2020-12-01 60.430000 60.919998 60.279999 59.300350 14283200 \n", + "0 2020-12-01 38.380001 39.200001 38.310001 38.096817 8004500 \n", + "0 2020-12-01 153.600006 153.660004 151.660004 151.451736 7647100 \n", + "0 2020-12-01 38.959999 39.650002 38.470001 37.857101 32503100 \n", + "\n", + " tic day macd boll_ub boll_lb rsi_30 cci_30 \\\n", + "0 AAPL 1 0.806734 122.766675 111.870384 56.465653 158.903167 \n", + "0 AXP 1 4.740563 128.735444 94.836253 60.587597 92.037957 \n", + "0 BA 1 13.502710 237.931128 146.371872 61.131542 100.686039 \n", + "0 CAT 1 3.661695 179.348927 158.511125 58.336805 76.050401 \n", + "0 CSCO 1 1.188129 44.341458 35.259680 60.649269 130.290847 \n", + "0 CVX 1 4.377220 97.004396 66.739570 57.242696 76.670547 \n", + "0 DD 1 1.486600 65.660502 57.529655 57.490340 98.279086 \n", + "0 DIS 1 5.628248 157.252273 123.438727 62.437060 103.344798 \n", + "0 GS 1 8.068587 242.898599 193.549264 62.155696 104.800845 \n", + "0 HD 1 -0.821353 281.941664 263.464095 50.789610 9.794276 \n", + "0 IBM 1 1.976144 124.757566 107.839123 54.877749 148.657424 \n", + "0 INTC 1 -0.013522 48.275757 43.371563 52.876832 102.259448 \n", + "0 JNJ 1 0.413395 150.711438 136.974939 53.598712 77.664611 \n", + "0 JPM 1 4.596265 125.822278 99.959343 61.128958 88.001113 \n", + "0 KO 1 0.636729 54.590589 48.331295 55.718840 31.880461 \n", + "0 MCD 1 -0.312211 217.704911 209.457038 51.036825 -14.473086 \n", + "0 MMM 1 2.736254 178.808849 156.940039 53.900958 56.935914 \n", + "0 MRK 1 0.223440 80.884380 77.170803 52.865942 93.671027 \n", + "0 MSFT 1 0.572630 222.105612 205.953179 52.648526 47.034706 \n", + "0 NKE 1 2.423039 136.936074 123.745399 63.290424 135.957603 \n", + "0 PFE 1 0.817742 38.419874 33.404417 63.351960 231.913066 \n", + "0 PG 1 -0.400841 142.396651 135.109301 51.280113 -59.457889 \n", + "0 RTX 1 3.498788 78.386926 54.997566 59.650436 88.203718 \n", + "0 TRV 1 3.146130 139.547557 124.337653 58.097045 47.067895 \n", + "0 UNH 1 2.557991 361.838079 322.072880 55.302689 46.423201 \n", + "0 V 1 3.227687 220.609626 192.517796 56.041466 74.856270 \n", + "0 VZ 1 0.531209 61.017065 56.497081 57.477822 64.122408 \n", + "0 WBA 1 0.390607 42.490830 34.673390 51.242226 21.865538 \n", + "0 WMT 1 2.550562 154.726389 140.382241 61.146359 104.402212 \n", + "0 XOM 1 1.463276 41.856731 30.664341 53.975487 84.355622 \n", + "\n", + " dx_30 close_30_sma close_60_sma sentiment \n", + "0 18.326964 116.122535 115.195506 -0.6353855836609985 \n", + "0 31.349381 106.629814 104.078234 0.6005440884823487 \n", + "0 41.044077 180.535001 171.670001 0.4103206537661357 \n", + "0 20.330658 166.053826 158.454731 -0.5389422727751916 \n", + "0 49.933659 38.800245 38.741337 -0.5830288155235768 \n", + "0 21.597130 77.459857 74.967448 -0.01776743709505868 \n", + "0 32.254347 60.318391 58.625112 -0.5216685812161448 \n", + "0 48.584574 134.772000 130.784000 -0.9259336746217759 \n", + "0 39.014700 210.773879 204.969028 -0.5188158197824897 \n", + "0 10.655830 272.877826 274.136699 0.3704807831218777 \n", + "0 26.069621 114.076892 116.802797 0.7529713903986068 \n", + "0 27.590260 46.288067 48.434949 0.8539701717421719 \n", + "0 13.681658 142.521038 144.038986 0.5682866582278816 \n", + "0 34.500062 108.128854 102.435144 -0.42503771747413355 \n", + "0 6.226137 50.504422 49.749719 -0.9479379522530356 \n", + "0 13.648295 215.366526 217.208724 0.8792847797333472 \n", + "0 14.206497 165.924518 164.104555 0.11072713391558753 \n", + "0 26.952431 78.132093 79.471521 -0.8812162075634768 \n", + "0 2.171538 212.213684 210.331632 0.36688784032438826 \n", + "0 31.371592 128.780886 125.832491 0.42112365937297436 \n", + "0 51.173878 35.336263 34.638991 0.72625802410846 \n", + "0 22.114259 138.675353 137.816553 0.6270024530508409 \n", + "0 27.283244 63.533057 61.308451 -0.6990874391243174 \n", + "0 9.827462 128.519917 119.709633 0.7552892014347639 \n", + "0 23.169929 333.051594 321.247131 -0.8165491584577971 \n", + "0 16.239753 201.096742 201.011629 -0.502620224611146 \n", + "0 21.124652 57.843128 57.790385 0.6591081344198431 \n", + "0 3.172934 37.520049 36.399741 -0.5569183988036852 \n", + "0 43.725999 145.329107 141.988320 -0.3113203205386308 \n", + "0 21.828008 34.837608 34.328411 -0.14096433016302212 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
dateopenhighlowclosevolumeticdaymacdboll_ubboll_lbrsi_30cci_30dx_30close_30_smaclose_60_smasentiment
02020-12-01121.010002123.470001120.010002122.536896128166800AAPL10.806734122.766675111.87038456.465653158.90316718.326964116.122535115.195506-0.6353855836609985
02020-12-01120.320000122.570000119.849998119.1527943584400AXP14.740563128.73544494.83625360.58759792.03795731.349381106.629814104.0782340.6005440884823487
02020-12-01214.309998218.089996213.000000213.00999515805200BA113.502710237.931128146.37187261.131542100.68603941.044077180.535001171.6700010.4103206537661357
02020-12-01175.389999176.570007172.940002171.5675052710200CAT13.661695179.348927158.51112558.33680576.05040120.330658166.053826158.454731-0.5389422727751916
02020-12-0143.00999844.07000043.00999842.88230523948800CSCO11.18812944.34145835.25968060.649269130.29084749.93365938.80024538.741337-0.5830288155235768
02020-12-0189.27999989.70999987.07000086.2310799915700CVX14.37722097.00439666.73957057.24269676.67054721.59713077.45985774.967448-0.01776743709505868
02020-12-0164.83999665.23000363.33000263.2567524505200DD11.48660065.66050257.52965557.49034098.27908632.25434760.31839158.625112-0.5216685812161448
02020-12-01149.570007151.399994149.000000149.4400028827800DIS15.628248157.252273123.43872762.437060103.34479848.584574134.772000130.784000-0.9259336746217759
02020-12-01231.960007234.869995231.350006231.1719672581000GS18.068587242.898599193.54926462.155696104.80084539.014700210.773879204.969028-0.5188158197824897
02020-12-01278.730011278.950012275.549988273.3868413944900HD1-0.821353281.941664263.46409550.7896109.79427610.655830272.877826274.1366990.3704807831218777
02020-12-01123.900002125.830002123.080002121.5359345312100IBM11.976144124.757566107.83912354.877749148.65742426.069621114.076892116.8027970.7529713903986068
02020-12-0148.75000050.23000048.70999949.26099057778200INTC1-0.01352248.27575743.37156352.876832102.25944827.59026046.28806748.4349490.8539701717421719
02020-12-01146.289993149.130005145.860001146.5362409741300JNJ10.413395150.711438136.97493953.59871277.66461113.681658142.521038144.0389860.5682866582278816
02020-12-01120.339996121.580002119.629997118.18773712678200JPM14.596265125.82227899.95934361.12895888.00111334.500062108.128854102.435144-0.42503771747413355
02020-12-0152.13999952.33000251.79000151.61042818969300KO10.63672954.59058948.33129555.71884031.8804616.22613750.50442249.749719-0.9479379522530356
02020-12-01218.880005218.929993215.529999214.8182684226300MCD1-0.312211217.704911209.45703851.036825-14.47308613.648295215.366526217.2087240.8792847797333472
02020-12-01174.220001175.690002170.009995169.1263123859600MMM12.736254178.808849156.94003953.90095856.93591414.206497165.924518164.1045550.11072713391558753
02020-12-0180.94999782.45999980.82000080.2063459701500MRK10.22344080.88438077.17080352.86594293.67102726.95243178.13209379.471521-0.8812162075634768
02020-12-01214.509995217.320007213.350006215.71318130931300MSFT10.572630222.105612205.95317952.64852647.0347062.171538212.213684210.3316320.36688784032438826
02020-12-01136.440002136.500000134.750000134.8937993834500NKE12.423039136.936074123.74539963.290424135.95760331.371592128.780886125.8324910.42112365937297436
02020-12-0139.40000240.50000039.00999838.98588672660800PFE10.81774238.41987433.40441763.351960231.91306651.17387835.33626334.6389910.72625802410846
02020-12-01139.160004139.539993138.160004137.6536717169700PG1-0.400841142.396651135.10930151.280113-59.45788922.114259138.675353137.8165530.6270024530508409
02020-12-0172.72000172.94000271.20999970.7943886545800RTX13.49878878.38692654.99756659.65043688.20371827.28324463.53305761.308451-0.6990874391243174
02020-12-01132.509995134.240005131.990005132.5716251218300TRV13.146130139.547557124.33765358.09704547.0678959.827462128.519917119.7096330.7552892014347639
02020-12-01344.769989354.100006339.929993338.7633363817700UNH12.557991361.838079322.07288055.30268946.42320123.169929333.051594321.247131-0.8165491584577971
02020-12-01212.130005213.669998211.039993210.8726208049400V13.227687220.609626192.51779656.04146674.85627016.239753201.096742201.011629-0.502620224611146
02020-12-0160.43000060.91999860.27999959.30035014283200VZ10.53120961.01706556.49708157.47782264.12240821.12465257.84312857.7903850.6591081344198431
02020-12-0138.38000139.20000138.31000138.0968178004500WBA10.39060742.49083034.67339051.24222621.8655383.17293437.52004936.399741-0.5569183988036852
02020-12-01153.600006153.660004151.660004151.4517367647100WMT12.550562154.726389140.38224161.146359104.40221243.725999145.329107141.988320-0.3113203205386308
02020-12-0138.95999939.65000238.47000137.85710132503100XOM11.46327641.85673130.66434153.97548784.35562221.82800834.83760834.328411-0.14096433016302212
\n
" + }, + "metadata": {}, + "execution_count": 75 + } + ], + "source": [ + "e_trade_gym.data" + ] + }, + { + "source": [ + "## Trading" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize agent\n", + "agent = DRLAgent(env_trade)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}\nUsing cpu device\n" + ] + } + ], + "source": [ + "# Initialize Model\n", + "model_a2c = agent.get_model(\"a2c\")" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "# Load or train\n", + "path_to_saved_model = ''\n", + "model_a2c.load(path_to_saved_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/preprocessing/data_processor.py b/preprocessing/data_processor.py new file mode 100644 index 000000000..14562d746 --- /dev/null +++ b/preprocessing/data_processor.py @@ -0,0 +1,82 @@ +from __future__ import division, absolute_import, print_function +import numpy as np +import pandas as pd +import datetime +from finrl.config import config +from finrl.marketdata.yahoodownloader import YahooDownloader +from finrl.preprocessing.preprocessors import FeatureEngineer +from finrl.preprocessing.data import data_split + + +class DataProcessor: + + def __init__(self,feature_engineer, initial_data, buffer_size=30,numerical_cols = ['date','open', 'high', 'low', 'close', 'volume', 'tic','day']): + self.numerical_cols = numerical_cols + self.fe = feature_engineer + self.buffer_size = buffer_size + initial_indices = initial_data.index.unique() + initial_data.index = initial_data.date.factorize()[0] + self.numerical_data_history = initial_data[self.numerical_cols] + if len(initial_indices) > buffer_size: + self.numerical_data_history = self.numerical_data_history.loc[initial_indices[-buffer_size:]] + + def _add_new(self,df): + if len(self.numerical_data_history.index.unique()) >= self.buffer_size: + self.numerical_data_history.drop(self.numerical_data_history.index[0],inplace=True) + df.set_index(pd.Index(np.full((len(df),),self.numerical_data_history.index[-1]+1)),inplace=True) + self.numerical_data_history = self.numerical_data_history.append(df) + return self.numerical_data_history + + def process_data(self,numerical_df,sentiment_df): + new_feature_df = self.compute_technical_indicators(numerical_df) + new_df = new_feature_df.reset_index().merge(sentiment_df,on=['date','tic']).set_index('index') + return new_df + + def compute_technical_indicators(self,numerical_df): + full_df = self._add_new(numerical_df) + feature_df = self.fe.preprocess_data(full_df) + feature_df.index = feature_df.date.factorize()[0] + new_feature_df = feature_df.loc[feature_df.index[-1]] + return new_feature_df + + def save_to_database(self): + pass + + + +def get_initial_data(numerical_df,sentiment_df,use_turbulence=False): + fe = FeatureEngineer(use_turbulence=use_turbulence) + numerical_df = fe.preprocess_data(numerical_df) + df = numerical_df.merge(sentiment_df,on=["date","tic"],how="left") + df.fillna(0) + return df + +def generate_sentiment_scores(start_date,end_date,tickers=config.DOW_30_TICKER,time_fmt="%Y-%m-%d"): + dates = pd.date_range(start_date,end_date).to_pydatetime() + dates = np.array([datetime.datetime.strftime(r,time_fmt) for r in dates]) + data = np.array(np.meshgrid(dates,tickers)).T.reshape(-1,2) + scores = np.random.uniform(low=-1.0,high=1.0,size=(len(data),1)) + df = pd.DataFrame(data,columns=['date','tic']) + df['sentiment'] = scores + return df + +def test_process_data(): + start_date = '2020-11-01' + end_date='2021-01-01' + ticker_list=config.DOW_30_TICKER + numerical_df = YahooDownloader(start_date=start_date,end_date=end_date,ticker_list=ticker_list).fetch_data() + sentiment_df = generate_sentiment_scores(start_date,end_date) + initial_data = get_initial_data(numerical_df,sentiment_df) + trade_data = data_split(initial_data,start_date,'2020-12-01') + numerical_feed_data = numerical_df[numerical_df.date > '2020-12-01'] + sentiment_feed_data = sentiment_df[sentiment_df.date > '2020-12-01'] + data_processor = DataProcessor(FeatureEngineer(),trade_data) + for date in numerical_feed_data.date.unique(): + + new_numerical = numerical_feed_data[numerical_feed_data.date==date] + new_sentiment = sentiment_feed_data.loc[sentiment_feed_data.date==date] + new_df=data_processor.process_data(new_numerical,new_sentiment) + print(new_df) + +if __name__ == "__main__": + test_process_data() \ No newline at end of file diff --git a/preprocessing/preprocessors.py b/preprocessing/preprocessors.py deleted file mode 100644 index b1d0a41f5..000000000 --- a/preprocessing/preprocessors.py +++ /dev/null @@ -1,161 +0,0 @@ -import numpy as np -import pandas as pd -from stockstats import StockDataFrame as Sdf -from config import config - -def load_dataset(*, file_name: str) -> pd.DataFrame: - """ - load csv dataset from path - :return: (df) pandas dataframe - """ - #_data = pd.read_csv(f"{config.DATASET_DIR}/{file_name}") - _data = pd.read_csv(file_name) - return _data - -def data_split(df,start,end): - """ - split the dataset into training or testing using date - :param data: (df) pandas dataframe, start, end - :return: (df) pandas dataframe - """ - data = df[(df.datadate >= start) & (df.datadate < end)] - data=data.sort_values(['datadate','tic'],ignore_index=True) - #data = data[final_columns] - data.index = data.datadate.factorize()[0] - return data - -def calcualte_price(df): - """ - calcualte adjusted close price, open-high-low price and volume - :param data: (df) pandas dataframe - :return: (df) pandas dataframe - """ - data = df.copy() - data = data[['datadate', 'tic', 'prccd', 'ajexdi', 'prcod', 'prchd', 'prcld', 'cshtrd']] - data['ajexdi'] = data['ajexdi'].apply(lambda x: 1 if x == 0 else x) - - data['adjcp'] = data['prccd'] / data['ajexdi'] - data['open'] = data['prcod'] / data['ajexdi'] - data['high'] = data['prchd'] / data['ajexdi'] - data['low'] = data['prcld'] / data['ajexdi'] - data['volume'] = data['cshtrd'] - - data = data[['datadate', 'tic', 'adjcp', 'open', 'high', 'low', 'volume']] - data = data.sort_values(['tic', 'datadate'], ignore_index=True) - return data - -def add_technical_indicator(df): - """ - calcualte technical indicators - use stockstats package to add technical inidactors - :param data: (df) pandas dataframe - :return: (df) pandas dataframe - """ - stock = Sdf.retype(df.copy()) - - stock['close'] = stock['adjcp'] - unique_ticker = stock.tic.unique() - - macd = pd.DataFrame() - rsi = pd.DataFrame() - cci = pd.DataFrame() - dx = pd.DataFrame() - - #temp = stock[stock.tic == unique_ticker[0]]['macd'] - for i in range(len(unique_ticker)): - ## macd - temp_macd = stock[stock.tic == unique_ticker[i]]['macd'] - temp_macd = pd.DataFrame(temp_macd) - macd = macd.append(temp_macd, ignore_index=True) - ## rsi - temp_rsi = stock[stock.tic == unique_ticker[i]]['rsi_30'] - temp_rsi = pd.DataFrame(temp_rsi) - rsi = rsi.append(temp_rsi, ignore_index=True) - ## cci - temp_cci = stock[stock.tic == unique_ticker[i]]['cci_30'] - temp_cci = pd.DataFrame(temp_cci) - cci = cci.append(temp_cci, ignore_index=True) - ## adx - temp_dx = stock[stock.tic == unique_ticker[i]]['dx_30'] - temp_dx = pd.DataFrame(temp_dx) - dx = dx.append(temp_dx, ignore_index=True) - - - df['macd'] = macd - df['rsi'] = rsi - df['cci'] = cci - df['adx'] = dx - - return df - - - -def preprocess_data(): - """data preprocessing pipeline""" - - df = load_dataset(file_name=config.TRAINING_DATA_FILE) - # get data after 2009 - df = df[df.datadate>=20090000] - # calcualte adjusted price - df_preprocess = calcualte_price(df) - # add technical indicators using stockstats - df_final=add_technical_indicator(df_preprocess) - # fill the missing values at the beginning - df_final.fillna(method='bfill',inplace=True) - return df_final - -def add_turbulence(df): - """ - add turbulence index from a precalcualted dataframe - :param data: (df) pandas dataframe - :return: (df) pandas dataframe - """ - turbulence_index = calcualte_turbulence(df) - df = df.merge(turbulence_index, on='datadate') - df = df.sort_values(['datadate','tic']).reset_index(drop=True) - return df - - - -def calcualte_turbulence(df): - """calculate turbulence index based on dow 30""" - # can add other market assets - - df_price_pivot=df.pivot(index='datadate', columns='tic', values='adjcp') - unique_date = df.datadate.unique() - # start after a year - start = 252 - turbulence_index = [0]*start - #turbulence_index = [0] - count=0 - for i in range(start,len(unique_date)): - current_price = df_price_pivot[df_price_pivot.index == unique_date[i]] - hist_price = df_price_pivot[[n in unique_date[0:i] for n in df_price_pivot.index ]] - cov_temp = hist_price.cov() - current_temp=(current_price - np.mean(hist_price,axis=0)) - temp = current_temp.values.dot(np.linalg.inv(cov_temp)).dot(current_temp.values.T) - if temp>0: - count+=1 - if count>2: - turbulence_temp = temp[0][0] - else: - #avoid large outlier because of the calculation just begins - turbulence_temp=0 - else: - turbulence_temp=0 - turbulence_index.append(turbulence_temp) - - - turbulence_index = pd.DataFrame({'datadate':df_price_pivot.index, - 'turbulence':turbulence_index}) - return turbulence_index - - - - - - - - - - diff --git a/requirements.txt b/requirements.txt index def79b22b..e5c01da91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ # Model Building Requirements -numpy==1.16.4 +numpy +finrl pandas==1.0.3 stockstats scikit-learn==0.21.0 @@ -16,8 +17,16 @@ matplotlib==3.2.1 pytest>=5.3.2,<6.0.0 # packaging -setuptools>=41.4.0,<42.0.0 -wheel>=0.33.6,<0.34.0 +setuptools +wheel + +#twitter +tweepy + +#yahoo finance +yfinance + + diff --git a/sentiment_analysis/Sentiment_model.py b/sentiment_analysis/Sentiment_model.py new file mode 100644 index 000000000..1bb7eaf66 --- /dev/null +++ b/sentiment_analysis/Sentiment_model.py @@ -0,0 +1,64 @@ + +from transformers import AutoTokenizer, AutoModelForSequenceClassification +from transformers import pipeline +import torch +import numpy as np + + + +model_name = "ProsusAI/finbert" +model = AutoModelForSequenceClassification.from_pretrained(model_name) +tokenizer = AutoTokenizer.from_pretrained(model_name) +classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer) + +dic={'MMM':0,'AXP':0,'AMGN':0,'AAPL':0,'BA':0,'CAT':0,'CVX':0,'CSCO':0,'KO':0,'DIS':0,'DOW':0,'GS':0,'HD':0,'HON':0,'IBM':0,'INTC':0,'JNJ':0,'JPM':0,'MCD':0,'MRK':0,'MSFT':0,'NKE':0,'PG':0,'CRM':0,'TRV':0,'UNH':0,'VZ':0,'V':0,'WBA':0,'WMT':0} + + + + +def get_sentiment_score(sentence, stock): + out= classifier(sentence) + # print(out) + pos=0 + neg=0 + neutral=0 + sentiment_score=0 + for i in out: + # print(i['label']) + if(i['label']=='POSITIVE'): + pos=i['score'] + # print(pos) + elif(i['label']=='NEGATIVE'): + neg=i['score'] + # print(neg) + else: + neutral= i['score'] + + if(pos!=0 or neg!=0): + sentiment_score= pos-neg + else: + sentiment_score=neutral + + if(dic[stock]==0): + avg= sentiment_score + dic[stock]=avg + + else: + alpha = (calc_alpha(10,0.9)) + avg= dic[stock] + res = update_ewma(avg,sentiment_score,alpha) + dic[stock]=res + + return dic + +def calc_alpha(window, weight_proportion): + + return 1 - np.exp(np.log(1-weight_proportion)/window) + + +def update_ewma(prev_stat, data_point, alpha): + + return data_point*alpha + (1-alpha) * prev_stat + + + diff --git a/sentiment_analysis/__init__.py b/sentiment_analysis/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sentiment_analysis/score.py b/sentiment_analysis/score.py new file mode 100644 index 000000000..b084da755 --- /dev/null +++ b/sentiment_analysis/score.py @@ -0,0 +1,123 @@ +import numpy as np + + +def calc_alpha(window, weight_proportion): + ''' + Calculate alpha parameter for exponentially weighted moving average + + :param window: number of values that weights will add up to the weight_proportion + :param weight_proportion: float [0,1], gives the amount of cumulative weight given to last window values + :return: alpha parameter for ewma + ''' + return 1 - np.exp(np.log(1-weight_proportion)/window) + + +def get_weights(alpha,num_steps): + ''' + Shows weights of last num_steps values + ''' + return [alpha] + [alpha*(1-alpha)**i for i in range(1,num_steps)] + +def update_ewma(prev_stat, data_point, alpha): + ''' + Updates the exponentially weighted moving average given a new data_point and parameter alpha + ''' + return data_point*alpha + (1-alpha) * prev_stat + +# This can be used for calculating the ewma given a vector as a start. Safe for large sizes of input +def ewma_vectorized(data, alpha, offset=None, dtype=None, order='C', out=None): + """ + Calculates the exponential moving average over a vector. + Will fail for large inputs. + Params: + :param data: Input data + :param alpha: scalar float in range (0,1) + The alpha parameter for the moving average. + :param offset: optional + The offset for the moving average, scalar. Defaults to data[0]. + :param dtype: optional + Data type used for calculations. Defaults to float64 unless + data.dtype is float32, then it will use float32. + :param order: {'C', 'F', 'A'}, optional + Order to use when flattening the data. Defaults to 'C'. + :param out: ndarray, or None, optional + A location into which the result is stored. If provided, it must have + the same shape as the input. If not provided or `None`, + a freshly-allocated array is returned. + :return out + """ + data = np.array(data, copy=False) + + if dtype is None: + if data.dtype == np.float32: + dtype = np.float32 + else: + dtype = np.float64 + else: + dtype = np.dtype(dtype) + + if data.ndim > 1: + # flatten input + data = data.reshape(-1, order) + + if out is None: + out = np.empty_like(data, dtype=dtype) + else: + assert out.shape == data.shape + assert out.dtype == dtype + + if data.size < 1: + # empty input, return empty array + return out + + if offset is None: + offset = data[0] + + alpha = np.array(alpha, copy=False).astype(dtype, copy=False) + + # scaling_factors -> 0 as len(data) gets large + # this leads to divide-by-zeros below + scaling_factors = np.power(1. - alpha, np.arange(data.size + 1, dtype=dtype), + dtype=dtype) + # create cumulative sum array + np.multiply(data, (alpha * scaling_factors[-2]) / scaling_factors[:-1], + dtype=dtype, out=out) + np.cumsum(out, dtype=dtype, out=out) + + # cumsums / scaling + out /= scaling_factors[-2::-1] + + if offset != 0: + offset = np.array(offset, copy=False).astype(dtype, copy=False) + # add offsets + out += offset * scaling_factors[1:] + + return out + + + initial_avg = vals[0] + get_weights(alpha,len(vals)) + + + +def test_calc_alpha(): + alpha = (calc_alpha(7,.99)) + weights = get_weights(alpha, 10) + print(alpha) + print(weights) + print(sum(weights)) + +def test_update_ewma(): + alpha = (calc_alpha(10,0.9)) + scores = np.array([-1,-1,-1,0,0,0,0,1,1,1,1,1,1,1,1]) + avg = scores[0] + ewmas = [avg] + for i in range(1,len(scores)): + avg = (update_ewma(avg,scores[i],alpha)) + ewmas.append(avg) + print(ewmas) + + + +if __name__ == "__main__": + test_update_ewma() \ No newline at end of file diff --git a/trained_models/2021-03-31/A2C_30k_dow_126.zip b/trained_models/2021-03-31/A2C_30k_dow_126.zip new file mode 100644 index 000000000..c03317273 Binary files /dev/null and b/trained_models/2021-03-31/A2C_30k_dow_126.zip differ