Skip to content

Commit e77791d

Browse files
committed
Initial commit
1 parent 46c11a5 commit e77791d

2 files changed

Lines changed: 176 additions & 0 deletions

File tree

lib/DataRetrieval.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
from Historic_Crypto import HistoricalData
2+
from datetime import datetime
3+
import yfinance as yf
4+
import talib as ta
5+
6+
'''
7+
All data goes from earliest to latest and is stored in a data frame.
8+
You can access individual values by doing: data['High'], data['Low']...
9+
'''
10+
11+
12+
'''
13+
This functions takes a symbol for a crypto and returns it's historic data.
14+
15+
Input:
16+
symbol (Required) - Takes the exchanged pairs: ETH-USD or BTC_ETH, or BTC-USD
17+
interval (optional) - Takes the interval of prices: 1d, 1h, 5m or 1m.
18+
start_date (optional) - The earliest historic data you want in form YYYY-MM-DD-HH-SS: 2000-01-01-00-00
19+
end_date (optional) - The latest historic data you want in form YYYY-MM-DD-HH-SS: 2000-01-01-00-00
20+
21+
Output:
22+
A data frame with the High, Low, Open, Close and Volume prices. This is the historic prices. By default it returns
23+
all daily prices unless otherwise specified.
24+
25+
Examples:
26+
get_crypto_data('BTC-USD',interval='1h',start_date='2021-04-01-00-00')
27+
get_crypto_data('BTC-USD',interval='1h',start_date='2021-04-01-00-00',end_date='2021-06-01-00-00')
28+
get_crypto_data('ETH-USD')
29+
'''
30+
def get_crypto_data(symbol,**kwargs):
31+
interval = kwargs.get('interval','1d')
32+
start_date = kwargs.get('start_date', '2000-01-01-00-00')
33+
end_date = kwargs.get('end_date', datetime.today().strftime('%Y-%m-%d-%H-%M'))
34+
seconds = 0
35+
if interval == '1d':
36+
seconds = 86400
37+
elif interval == '1h':
38+
seconds = 3600
39+
elif interval == '5m':
40+
seconds = 300
41+
elif interval == '1m':
42+
seconds = 60
43+
data = HistoricalData(symbol,seconds,start_date,end_date).retrieve_data()
44+
data = data.rename(columns={"open": "Open", "high": "High","low": "Low", "close": "Close",'volume':'Volume'})
45+
return data
46+
47+
48+
'''
49+
This functions takes a symbol for a stock and returns it's historic data.
50+
51+
Input:
52+
symbol (Required) - Takes the exchanged pairs: ETH-USD or BTC_ETH, or BTC-USD
53+
interval (optional) - Takes the interval of prices: 1d, 1h, 5m or 1m.
54+
start_date (optional) - The earliest historic data you want in form YYYY-MM-DD: 2000-01-01
55+
end_date (optional) - The latest historic data you want in form YYYY-MM-DD: 2000-01-01
56+
period (optional) - Instead of specifying an end_date you can specify a period: 1d, 1y, 1m, 2y
57+
58+
Output:
59+
A data frame with the High, Low, Open, Close and Volume prices. This is the historic prices. By default it returns
60+
all daily prices unless otherwise specified.
61+
62+
** Note: yFinance is limited in what it can return in terms of intraday data so be aware.
63+
64+
Examples:
65+
data_source = DR.get_stock_data('DIA',interval='1d',period='2y')
66+
data_source = DR.get_stock_data('AAPL',start_date='2021-04-01',end_date='2021-06-01')
67+
'''
68+
def get_stock_data(symbol,**kwargs):
69+
interval = kwargs.get('interval','1d')
70+
start_date = kwargs.get('start_date', '2000-01-01')
71+
end_date = kwargs.get('end_date', datetime.today().strftime('%Y-%m-%d'))
72+
period = kwargs.get('period', None)
73+
74+
stock = yf.Ticker(symbol)
75+
if period != None:
76+
return stock.history(period=period,interval=interval)
77+
78+
return stock.history(interval=interval,start=start_date,end=end_date)
79+
80+
'''
81+
This functions takes a data frame of historic data and adds the SMA to it.
82+
83+
Input:
84+
data (required) - A dataframe of historic data that SMA will be added to.
85+
86+
Output:
87+
A data frame with the SMA added.
88+
Examples:
89+
add_SMA(data_source)
90+
'''
91+
def add_SMA(data):
92+
# Adding SMA
93+
sma = ta.SMA(data['Close'])
94+
data['SMA'] = sma
95+
data= data.iloc[29:, :]
96+
return data
97+
98+
'''
99+
This functions takes a data frame of historic data and adds the EMA to it.
100+
101+
Input:
102+
data (required) - A dataframe of historic data that EMA will be added to.
103+
104+
Output:
105+
A data frame with the EMA added.
106+
Examples:
107+
add_EMA(data_source)
108+
'''
109+
def add_EMA(data):
110+
# Adding EMA
111+
ema = ta.EMA(data['Close'])
112+
data['EMA'] = ema
113+
data= data.iloc[29:, :]
114+
return data
115+

lib/Metrics.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import sklearn.metrics as skm
2+
3+
def calculate_classification(test_x,test_y,model,label_scaler):
4+
# Getting predictions for the test set.
5+
pred_y = model(test_x)
6+
7+
# Transforming the labels back to original space
8+
orig_y = label_scaler.inverse_transform(test_y.numpy()).tolist()
9+
pred_y = label_scaler.inverse_transform(pred_y.detach().numpy()).tolist()
10+
11+
# Transforming labels to be returns instead of price
12+
rtrn_y = [orig_y[i][0] - orig_y[i - 1][0] for i in range(1, len(orig_y))]
13+
rtrn_pred = [pred_y[i][0] - pred_y[i - 1][0] for i in range(1, len(pred_y))]
14+
15+
# Getting Binary Classification
16+
bin_y = [1 if i > 0 else 0 for i in rtrn_y]
17+
bin_pred = [1 if i > 0 else 0 for i in rtrn_pred]
18+
return bin_y, bin_pred
19+
20+
def get_acc(bin_y,bin_pred):
21+
# Getting accuracy
22+
acc = skm.accuracy_score(bin_y,bin_pred)
23+
24+
# Accuracy if you randomly guessed
25+
rndm_acc = sum(bin_y) / len(bin_y)
26+
rndm_acc = max(1 - rndm_acc, rndm_acc)
27+
28+
return rndm_acc, acc
29+
30+
# Output:
31+
# true neg, false pos
32+
# false neg, true pos
33+
def get_confusion_matrix(bin_y,bin_pred):
34+
cfn_mtrx = skm.confusion_matrix(bin_y, bin_pred)
35+
tp = cfn_mtrx[1][1]
36+
fp = cfn_mtrx[0][1]
37+
tn = cfn_mtrx[0][0]
38+
fn = cfn_mtrx[1][0]
39+
return cfn_mtrx
40+
41+
def get_precision(cfn_mtrx):
42+
precision = cfn_mtrx[1][1] / (cfn_mtrx[0][1] + cfn_mtrx[1][1])
43+
return precision
44+
45+
def print_metrics(test_x,test_y,model,label_scaler):
46+
# Getting binary classification
47+
bin_y, bin_pred = calculate_classification(test_x, test_y, model, label_scaler)
48+
49+
rndm_acc, acc = get_acc(bin_y, bin_pred)
50+
cfn_mtrx = get_confusion_matrix(bin_y,bin_pred)
51+
prec = get_precision(cfn_mtrx)
52+
53+
# Printing Accuracy
54+
print("Random: " + str(rndm_acc))
55+
print("Accuracy: " + str(acc))
56+
57+
# Printing Confusion Matrix
58+
print("Confusion Matrix: ")
59+
print(cfn_mtrx)
60+
61+
print("Precision: " + str(prec))

0 commit comments

Comments
 (0)