Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

How to predict tomorrow's price? #20

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
Binary file added data/.DS_Store
Binary file not shown.
Empty file removed data/.placeholder
Empty file.
16,863 changes: 16,863 additions & 0 deletions data/SP500.csv

Large diffs are not rendered by default.

111 changes: 0 additions & 111 deletions data_fetcher.py

This file was deleted.

3 changes: 1 addition & 2 deletions data_model.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

random.seed(time.time())


#test_ratio:split train and test
class StockDataSet(object):
def __init__(self,
stock_sym,
Expand Down Expand Up @@ -42,7 +42,6 @@ def _prepare_data(self, seq):
# split into items of input_size
seq = [np.array(seq[i * self.input_size: (i + 1) * self.input_size])
for i in range(len(seq) // self.input_size)]

if self.normalized:
seq = [seq[0] / seq[0][0] - 1.0] + [
curr / seq[i][-1] - 1.0 for i, curr in enumerate(seq[1:])]
Expand Down
42 changes: 2 additions & 40 deletions main.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

flags = tf.app.flags
flags.DEFINE_integer("stock_count", 100, "Stock count [100]")
flags.DEFINE_integer("input_size", 1, "Input size [1]")
flags.DEFINE_integer("input_size", 1, "Input size [1]")#input data sample
flags.DEFINE_integer("num_steps", 30, "Num of steps [30]")
flags.DEFINE_integer("num_layers", 1, "Num of layer [1]")
flags.DEFINE_integer("lstm_size", 128, "Size of one LSTM cell [128]")
Expand All @@ -20,7 +20,6 @@
flags.DEFINE_float("learning_rate_decay", 0.99, "Decay rate of learning rate. [0.99]")
flags.DEFINE_integer("init_epoch", 5, "Num. of epoches considered as early stage. [5]")
flags.DEFINE_integer("max_epoch", 50, "Total training epoches. [50]")
flags.DEFINE_integer("embed_size", None, "If provided, use embedding vector of this size. [None]")
flags.DEFINE_string("stock_symbol", None, "Target stock symbol [None]")
flags.DEFINE_integer("sample_size", 4, "Number of stocks to plot during training. [4]")
flags.DEFINE_boolean("train", False, "True for training, False for testing [False]")
Expand All @@ -32,12 +31,6 @@
if not os.path.exists("logs"):
os.mkdir("logs")


def show_all_variables():
model_vars = tf.trainable_variables()
slim.model_analyzer.analyze_vars(model_vars, print_info=True)


def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.05):
if target_symbol is not None:
return [
Expand All @@ -48,35 +41,8 @@ def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.0
test_ratio=test_ratio)
]

# Load metadata of s & p 500 stocks
info = pd.read_csv("data/constituents-financials.csv")
info = info.rename(columns={col: col.lower().replace(' ', '_') for col in info.columns})
info['file_exists'] = info['symbol'].map(lambda x: os.path.exists("data/{}.csv".format(x)))
print info['file_exists'].value_counts().to_dict()

info = info[info['file_exists'] == True].reset_index(drop=True)
info = info.sort('market_cap', ascending=False).reset_index(drop=True)

if k is not None:
info = info.head(k)

print "Head of S&P 500 info:\n", info.head()

# Generate embedding meta file
info[['symbol', 'sector']].to_csv(os.path.join("logs/metadata.tsv"), sep='\t', index=False)

return [
StockDataSet(row['symbol'],
input_size=input_size,
num_steps=num_steps,
test_ratio=0.05)
for _, row in info.iterrows()]


def main(_):
pp.pprint(flags.FLAGS.__flags)

# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
pp.pprint(FLAGS.__flags)
run_config = tf.ConfigProto()
run_config.gpu_options.allow_growth = True

Expand All @@ -89,11 +55,7 @@ def main(_):
num_steps=FLAGS.num_steps,
input_size=FLAGS.input_size,
keep_prob=FLAGS.keep_prob,
embed_size=FLAGS.embed_size,
)

show_all_variables()

stock_data_list = load_sp500(
FLAGS.input_size,
FLAGS.num_steps,
Expand Down
42 changes: 0 additions & 42 deletions model_rnn.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def __init__(self, sess, stock_count,
num_steps=30,
input_size=1,
keep_prob=0.8,
embed_size=None,
logs_dir="logs",
plots_dir="images"):
"""
Expand All @@ -35,7 +34,6 @@ def __init__(self, sess, stock_count,
num_steps:
input_size:
keep_prob:
embed_size
checkpoint_dir
"""
self.sess = sess
Expand All @@ -46,10 +44,6 @@ def __init__(self, sess, stock_count,
self.num_steps = num_steps
self.input_size = input_size
self.keep_prob = keep_prob

self.use_embed = (embed_size is not None) and (embed_size > 0)
self.embed_size = embed_size or -1

self.logs_dir = logs_dir
self.plots_dir = plots_dir

Expand Down Expand Up @@ -82,21 +76,6 @@ def _create_one_cell():
state_is_tuple=True
) if self.num_layers > 1 else _create_one_cell()

if self.embed_size > 0:
self.embed_matrix = tf.Variable(
tf.random_uniform([self.stock_count, self.embed_size], -1.0, 1.0),
name="embed_matrix"
)
sym_embeds = tf.nn.embedding_lookup(self.embed_matrix, self.symbols)

# stock_label_embeds.shape = (batch_size, embedding_size)
stacked_symbols = tf.tile(self.symbols, [1, self.num_steps], name='stacked_stock_labels')
stacked_embeds = tf.nn.embedding_lookup(self.embed_matrix, stacked_symbols)

# After concat, inputs.shape = (batch_size, num_steps, lstm_size + embed_size)
self.inputs_with_embed = tf.concat([self.inputs, stacked_embeds], axis=2, name="inputs_with_embed")
else:
self.inputs_with_embed = tf.identity(self.inputs)

# Run dynamic RNN
val, state_ = tf.nn.dynamic_rnn(cell, self.inputs, dtype=tf.float32, scope="dynamic_rnn")
Expand Down Expand Up @@ -137,24 +116,6 @@ def train(self, dataset_list, config):
# Set up the logs folder
self.writer = tf.summary.FileWriter(os.path.join("./logs", self.model_name))
self.writer.add_graph(self.sess.graph)

if self.use_embed:
# Set up embedding visualization
# Format: tensorflow/tensorboard/plugins/projector/projector_config.proto
projector_config = projector.ProjectorConfig()

# You can add multiple embeddings. Here we add only one.
added_embed = projector_config.embeddings.add()
added_embed.tensor_name = self.embed_matrix.name
# Link this tensor to its metadata file (e.g. labels).
shutil.copyfile(os.path.join(self.logs_dir, "metadata.tsv"),
os.path.join(self.model_logs_dir, "metadata.tsv"))
added_embed.metadata_path = "metadata.tsv"

# The next line writes a projector_config.pbtxt in the LOG_DIR. TensorBoard will
# read this file during startup.
projector.visualize_embeddings(self.writer, projector_config)

tf.global_variables_initializer().run()

# Merged test data of different stocks.
Expand Down Expand Up @@ -247,9 +208,6 @@ def model_name(self):
name = "stock_rnn_lstm%d_step%d_input%d" % (
self.lstm_size, self.num_steps, self.input_size)

if self.embed_size > 0:
name += "_embed%d" % self.embed_size

return name

@property
Expand Down
69 changes: 0 additions & 69 deletions scripts/build_graph.py

This file was deleted.

30 changes: 0 additions & 30 deletions scripts/config.py

This file was deleted.

Loading