In [125]:
# Stefano Ciccarelli
import quandl
import pandas as pd 

quandl.ApiConfig.api_key = "TyKZthWgcxsqfZf9yeXD"

data = dict() 

for n in range(10):
    request = quandl.get_table('SHARADAR/SF1', calendardate='201{}-12-31'.format(n))
    request.index = request['ticker']
    data['201{}'.format(n)] = request
In [127]:
def labeling(x):
    if x >= 0.3:
        return 1
    else:
        return 0
    

for n in range(9):
    data['201{}'.format(n)]['Return'] = data['201{}'.format(n + 1)]['price']/data['201{}'.format(n)]['price'] - 1
    data['201{}'.format(n)]['Label'] = data['201{}'.format(n)]['Return'].apply(labeling)
In [66]:
import pandas as pd 
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

columns = list(data['2011'].columns[6:])
data_standard = pd.DataFrame(columns = data['2011'].columns)

for var in data:
    if var not in ("2010", "2018", "2019") :
        
        dataset_standard = data[var].copy()
        dataset_standard[columns] = scaler.fit_transform(dataset_standard[columns])
        data_standard = data_standard.append(dataset_standard)
        
    else:
        pass

data_standard.index = data_standard["calendardate"]
data_standard.tail()
Out[66]:
ticker dimension calendardate datekey reportperiod lastupdated accoci assets assetsavg assetsc ... shareswadil sps tangibles taxassets taxexp taxliabilities tbvps workingcapital Return Label
calendardate
2017-12-31 CSCO MRY 2017-12-31 2017-07-29 2017-07-29 2020-09-03 0.910670 0.042448 0.041329 0.498039 ... 0.230141 0.005520 0.030401 0.480667 0.479851 0.043161 0.003620 0.595964 0.889288 1.0
2017-12-31 CAT MRY 2017-12-31 2017-12-31 2017-12-31 2020-08-05 0.868346 0.021393 0.021883 0.196461 ... 0.015472 0.341384 0.018651 0.191972 0.504957 0.000000 0.046120 0.223913 0.361409 0.0
2017-12-31 BA MRY 2017-12-31 2017-12-31 2017-12-31 2020-07-31 0.349356 0.035494 0.028950 0.507514 ... NaN 0.746081 0.033135 0.036399 0.440769 0.070056 0.072094 0.233713 0.639973 0.0
2017-12-31 AXP MRY 2017-12-31 2017-12-31 2017-12-31 2020-07-24 0.826091 0.062915 0.058120 NaN ... 0.029303 0.150664 0.064469 0.000000 0.555775 0.000000 0.085388 NaN 0.510249 0.0
2017-12-31 AAPL MRY 2017-12-31 2017-09-30 2017-09-30 2020-08-31 0.903969 0.140244 0.128110 0.783623 ... 1.000000 0.012523 0.143497 0.000000 0.975882 0.000000 0.002969 0.371105 1.000000 1.0

5 rows × 113 columns

In [67]:
X, y = data_standard[columns], data_standard['Label']

from sklearn.model_selection import train_test_split
X_train, X_test = X[:'2017-01-01'], X['2017-01-01':]
y_train, y_test = y[:'2017-01-01'], y['2017-01-01':]

import autokeras as ak
search = ak.StructuredDataClassifier(max_trials=15)
search.fit(x=X_train, y=y_train, verbose=1)
INFO:tensorflow:Reloading Oracle from existing project .\structured_data_classifier\oracle.json
INFO:tensorflow:Reloading Tuner from .\structured_data_classifier\tuner0.json
INFO:tensorflow:Oracle triggered exit
Epoch 1/67
6/6 [==============================] - 0s 2ms/step - loss: 0.7960 - accuracy: 0.2944
Epoch 2/67
6/6 [==============================] - 0s 2ms/step - loss: 0.6462 - accuracy: 0.6778
Epoch 3/67
6/6 [==============================] - 0s 2ms/step - loss: 0.5743 - accuracy: 0.7667
Epoch 4/67
6/6 [==============================] - 0s 2ms/step - loss: 0.5428 - accuracy: 0.7667
Epoch 5/67
6/6 [==============================] - 0s 2ms/step - loss: 0.5280 - accuracy: 0.7667
Epoch 6/67
6/6 [==============================] - 0s 2ms/step - loss: 0.5183 - accuracy: 0.7667
Epoch 7/67
6/6 [==============================] - 0s 2ms/step - loss: 0.5094 - accuracy: 0.7667
Epoch 8/67
6/6 [==============================] - 0s 2ms/step - loss: 0.5005 - accuracy: 0.7667
Epoch 9/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4920 - accuracy: 0.7722
Epoch 10/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4845 - accuracy: 0.7722
Epoch 11/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4778 - accuracy: 0.7778
Epoch 12/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4707 - accuracy: 0.7778
Epoch 13/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4638 - accuracy: 0.7778
Epoch 14/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4570 - accuracy: 0.7833
Epoch 15/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4508 - accuracy: 0.7833
Epoch 16/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4447 - accuracy: 0.7889
Epoch 17/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4391 - accuracy: 0.7889
Epoch 18/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4335 - accuracy: 0.7944
Epoch 19/67
6/6 [==============================] - 0s 1ms/step - loss: 0.4282 - accuracy: 0.8056
Epoch 20/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4229 - accuracy: 0.8056
Epoch 21/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4178 - accuracy: 0.8056
Epoch 22/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4127 - accuracy: 0.8056
Epoch 23/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4079 - accuracy: 0.8056
Epoch 24/67
6/6 [==============================] - 0s 2ms/step - loss: 0.4031 - accuracy: 0.8056
Epoch 25/67
6/6 [==============================] - ETA: 0s - loss: 0.3115 - accuracy: 0.90 - 0s 2ms/step - loss: 0.3984 - accuracy: 0.8111
Epoch 26/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3937 - accuracy: 0.8111
Epoch 27/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3892 - accuracy: 0.8167
Epoch 28/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3847 - accuracy: 0.8167
Epoch 29/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3802 - accuracy: 0.8167
Epoch 30/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3759 - accuracy: 0.8222
Epoch 31/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3714 - accuracy: 0.8278
Epoch 32/67
6/6 [==============================] - 0s 1ms/step - loss: 0.3670 - accuracy: 0.8333
Epoch 33/67
6/6 [==============================] - 0s 1ms/step - loss: 0.3626 - accuracy: 0.8333
Epoch 34/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3583 - accuracy: 0.8333
Epoch 35/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3540 - accuracy: 0.8389
Epoch 36/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3498 - accuracy: 0.8500
Epoch 37/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3456 - accuracy: 0.8500
Epoch 38/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3414 - accuracy: 0.8611
Epoch 39/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3372 - accuracy: 0.8667
Epoch 40/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3330 - accuracy: 0.8667
Epoch 41/67
6/6 [==============================] - 0s 1ms/step - loss: 0.3287 - accuracy: 0.8722
Epoch 42/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3244 - accuracy: 0.8778
Epoch 43/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3202 - accuracy: 0.8833
Epoch 44/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3159 - accuracy: 0.8833
Epoch 45/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3117 - accuracy: 0.8889
Epoch 46/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3074 - accuracy: 0.8889
Epoch 47/67
6/6 [==============================] - 0s 2ms/step - loss: 0.3032 - accuracy: 0.8889
Epoch 48/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2990 - accuracy: 0.9000
Epoch 49/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2949 - accuracy: 0.9111
Epoch 50/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2908 - accuracy: 0.9167
Epoch 51/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2867 - accuracy: 0.9167
Epoch 52/67
6/6 [==============================] - 0s 1ms/step - loss: 0.2827 - accuracy: 0.9222
Epoch 53/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2787 - accuracy: 0.9222
Epoch 54/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2747 - accuracy: 0.9222
Epoch 55/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2708 - accuracy: 0.9222
Epoch 56/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2669 - accuracy: 0.9222
Epoch 57/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2630 - accuracy: 0.9222
Epoch 58/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2592 - accuracy: 0.9333
Epoch 59/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2554 - accuracy: 0.9333
Epoch 60/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2516 - accuracy: 0.9333
Epoch 61/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2479 - accuracy: 0.9333
Epoch 62/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2441 - accuracy: 0.9333
Epoch 63/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2403 - accuracy: 0.9333
Epoch 64/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2365 - accuracy: 0.9333
Epoch 65/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2323 - accuracy: 0.9389
Epoch 66/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2273 - accuracy: 0.9444
Epoch 67/67
6/6 [==============================] - 0s 2ms/step - loss: 0.2228 - accuracy: 0.9444
In [128]:
loss, acc = search.evaluate(X_test, y_test, verbose=0)
print('{:.2f}%'.format(acc*100))
86.67%
In [129]:
y_predictions = search.predict(X_test)
y_train, y_test = y[:'2017-01-01'], y['2017-01-01':]
df = pd.DataFrame(y_test)


df["Expected"] = y_predictions
returns = data["2017"]
returns.index = returns["calendardate"]

df["Return"] = returns["Return"]
df["Ticker"] = returns["ticker"]
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.iter
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_1
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_2
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.decay
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.learning_rate
WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details.
Hi
In [106]:
df
Out[106]:
Label Expected Return Ticker
calendardate
2017-12-31 0.0 0.0 -0.184720 XOM
2017-12-31 0.0 0.0 -0.101032 WMT
2017-12-31 0.0 0.0 0.062158 VZ
2017-12-31 1.0 1.0 0.426169 V
2017-12-31 0.0 1.0 0.130001 UNH
2017-12-31 0.0 1.0 0.068894 TSLA
2017-12-31 0.0 0.0 -0.117148 TRV
2017-12-31 0.0 0.0 -0.104303 PG
2017-12-31 0.0 0.0 0.205135 PFE
2017-12-31 1.0 1.0 0.354973 NKE
2017-12-31 1.0 1.0 0.430582 MSFT
2017-12-31 1.0 1.0 0.357917 MRK
2017-12-31 0.0 0.0 -0.190466 MMM
2017-12-31 0.0 0.0 0.031664 MCD
2017-12-31 0.0 0.0 0.032040 KO
2017-12-31 0.0 0.0 -0.087152 JPM
2017-12-31 0.0 0.0 -0.089107 JNJ
2017-12-31 0.0 0.0 0.012782 INTC
2017-12-31 0.0 0.0 -0.259093 IBM
2017-12-31 0.0 0.0 -0.110312 HD
2017-12-31 0.0 0.0 -0.344285 GS
2017-12-31 0.0 0.0 -0.566189 GE
2017-12-31 0.0 1.0 0.186365 DIS
2017-12-31 0.0 0.0 -0.249087 DD
2017-12-31 0.0 0.0 -0.131001 CVX
2017-12-31 1.0 1.0 0.350571 CSCO
2017-12-31 0.0 0.0 -0.193616 CAT
2017-12-31 0.0 0.0 0.093554 BA
2017-12-31 0.0 1.0 -0.040177 AXP
2017-12-31 1.0 1.0 0.464703 AAPL
In [107]:
df["Strategy"] = df['Expected']*(df['Return']+1)

print("This Algorithm for Stock Selection produces an average annualized return of {:.2f}%".format(df["Strategy"].mean()*100))
This Algorithm for Stock Selection produces an average annualized return of 42.43%
In [113]:
import yfinance as yf

stocks = pd.DataFrame()
count = 0 
insert = list(df["Expected"])

for i in df["Ticker"]:
    if insert[count] == 1:
        count += 1
        try:
            data = yf.download(i, start="2017-01-01", end="2017-12-30")

            stocks[i] = data['Adj Close']
        except:
            print('failed: ', i)
    else:
        count += 1
        pass
        
print(stocks)
stocks = stocks.dropna(axis='columns')
stocks.head(10)
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
                     V         UNH       TSLA        NKE       MSFT  \
Date                                                                  
2017-01-03   77.558708  152.367905  43.397999  49.816982  58.826855   
2017-01-04   78.192841  152.802032  45.397999  50.861626  58.563644   
2017-01-05   79.109886  153.056808  45.349998  50.852039  58.563644   
2017-01-06   80.202553  153.273880  45.801998  51.666664  59.071262   
2017-01-09   79.753784  152.839767  46.256001  51.158726  58.883255   
...                ...         ...        ...        ...        ...   
2017-12-22  110.716484  210.762146  65.040001  61.465988  82.176476   
2017-12-26  111.011246  210.378937  63.458000  61.815617  82.070770   
2017-12-27  112.023186  211.164505  62.327999  61.135788  82.368675   
2017-12-28  112.347404  213.415833  63.071999  61.135788  82.378288   
2017-12-29  112.023186  211.202835  62.270000  60.747318  82.205322   

                  MRK         DIS       CSCO        AXP       AAPL  
Date                                                                
2017-01-03  53.849270  101.584358  27.153379  71.067505  27.548414  
2017-01-04  53.831364  102.886719  26.991968  72.232552  27.517582  
2017-01-05  53.813457  102.829262  27.054739  71.342186  27.657520  
2017-01-06  53.956696  104.361458  27.108545  71.484283  27.965857  
2017-01-09  54.699753  103.767731  27.063707  71.853676  28.222006  
...               ...         ...        ...        ...        ...  
2017-12-22  52.002960  105.682114  35.500523  94.627693  42.176311  
2017-12-26  51.984501  105.147240  35.436054  94.464767  41.106304  
2017-12-27  51.984501  104.680435  35.509731  95.001465  41.113533  
2017-12-28  52.224400  104.806862  35.537357  95.547714  41.229214  
2017-12-29  51.919914  104.554008  35.270294  95.173958  40.783367  

[251 rows x 10 columns]
Out[113]:
V UNH TSLA NKE MSFT MRK DIS CSCO AXP AAPL
Date
2017-01-03 77.558708 152.367905 43.397999 49.816982 58.826855 53.849270 101.584358 27.153379 71.067505 27.548414
2017-01-04 78.192841 152.802032 45.397999 50.861626 58.563644 53.831364 102.886719 26.991968 72.232552 27.517582
2017-01-05 79.109886 153.056808 45.349998 50.852039 58.563644 53.813457 102.829262 27.054739 71.342186 27.657520
2017-01-06 80.202553 153.273880 45.801998 51.666664 59.071262 53.956696 104.361458 27.108545 71.484283 27.965857
2017-01-09 79.753784 152.839767 46.256001 51.158726 58.883255 54.699753 103.767731 27.063707 71.853676 28.222006
2017-01-10 79.324509 152.490585 45.973999 50.899967 58.864449 53.643364 103.786880 27.243055 72.601952 28.250471
2017-01-11 79.802551 152.783142 45.945999 50.497440 59.400265 55.174236 104.801964 27.036808 72.848221 28.402262
2017-01-12 79.383034 153.226730 45.917999 50.219505 58.855049 55.693474 102.972900 26.938164 72.819801 28.283670
2017-01-13 79.187943 152.698196 47.549999 50.717861 58.939663 55.809868 103.480438 26.965063 72.573547 28.233862
2017-01-17 79.275742 151.622314 47.116001 51.417500 58.779850 55.039948 103.394257 26.893322 72.554588 28.461563
In [114]:
import sys
import yfinance as yf
import lxml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import figure
from sklearn.preprocessing import MinMaxScaler
import os
In [115]:
log_ret = np.log(stocks/stocks.shift(1))

np.random.seed(42)
num_ports = 2000
all_weights = np.zeros((num_ports, len(stocks.columns)))
ret_arr = np.zeros(num_ports)
vol_arr = np.zeros(num_ports)
shape_arr = np.zeros(num_ports)

for x in range(num_ports):
    weights = np.array(np.random.random(len(stocks.columns)))
    weights = weights/np.sum(weights)
    all_weights[x,:] = weights
    ret_arr[x] = np.sum((log_ret.mean()*weights*252))
    vol_arr[x] = np.sqrt(np.dot(weights.T, np.dot(log_ret.cov()*252, weights)))
    shape_arr[x] = ret_arr[x]/vol_arr[x]

print('Max sharpe ratio in the array: {}'.format(shape_arr.max()))
print("Its location in the array: {}".format(shape_arr.argmax()))

max_sr_ret = ret_arr[shape_arr.argmax()]
max_sr_vol = vol_arr[shape_arr.argmax()]

print('Proportion ratio: ',100*all_weights[shape_arr.argmax(),:])

plt.figure(figsize=(12,8))
plt.scatter(vol_arr, ret_arr, c=shape_arr)
plt.colorbar(label='Sharpe Ratio')
plt.xlabel('Volatility')
plt.ylabel('Return')
plt.scatter(max_sr_vol, max_sr_ret, c='red', s=50)
plt.show()
Max sharpe ratio in the array: 3.531086903574403
Its location in the array: 1451
Proportion ratio:  [18.88535894 15.30726511  2.09109787  4.67307346  7.96875013  3.43896713
 10.0053844   9.52941807 19.33222169  8.7684632 ]
In [130]:
print("The Expected return of this optimal portfolio would be {:.2f}%, with a volatility of {:.2f}%, generating a Sharpe Ratio of {:.2f}".format(ret_arr[1451]*100,vol_arr[1451]*100,shape_arr[1451])) 
The Expected return of this optimal portfolio would be 28.26%, with a volatility of 8.00%, generating a Sharpe Ratio of 3.53
In [116]:
equipment = list(stocks.columns.values)
sizes = list(100*all_weights[shape_arr.argmax(),:])

dictionary = {}
count = 0

for x in equipment:
    dictionary[x] = sizes[count]
    count += 1

print(dictionary)

a = pd.DataFrame.from_dict(dictionary,orient= "index")
a.to_csv("final.csv")


fig1, ax1 = plt.subplots()
fig1.set_size_inches(8, 6)
ax1.pie(sizes, labels=equipment, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
fig1.set_facecolor('white')
plt.show()
{'V': 18.885358942238106, 'UNH': 15.307265108539687, 'TSLA': 2.0910978697403197, 'NKE': 4.673073461941965, 'MSFT': 7.968750132080986, 'MRK': 3.438967133987564, 'DIS': 10.00538439670626, 'CSCO': 9.529418071108267, 'AXP': 19.332221687280196, 'AAPL': 8.768463196376654}
In [123]:
28.26 - 3.291 * (8)
Out[123]:
1.9320000000000022
In [ ]: