iRobot Stock Analysis
4 minute read
In this Jupyter Notebook I have done some exploratory data analysis of iRobot stock.
import quandl
import pandas as pd
import numpy as np
import pandas_datareader as pdr
#from datetime import datetime
import datetime
from __future__ import division
import math
from sklearn import preprocessing, cross_validation, svm
from sklearn.linear_model import LinearRegression
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline
#Set times, -1 = yearg ago from now
end = datetime.now()
print (end)
start = datetime(end.year -1, end.month, end.day)
print (start)
2018-06-07 22:12:03.122418
2017-06-07 00:00:00
#Import data from yahoo and assign it to IRBT
IRBT = pdr.DataReader('IRBT', 'yahoo', start, end)
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 253 entries, 2017-06-07 to 2018-06-07
Data columns (total 6 columns):
Open 253 non-null float64
High 253 non-null float64
Low 253 non-null float64
Close 253 non-null float64
Adj Close 253 non-null float64
Volume 253 non-null int64
dtypes: float64(5), int64(1)
memory usage: 13.8 KB
#View historical trend of the closing price Adj Close column
IRBT['Adj Close'].plot(legend=True, figsize=(10,4))
<matplotlib.axes._subplots.AxesSubplot at 0x11efda048>
|
Open |
High |
Low |
Close |
Adj Close |
Volume |
Date |
|
|
|
|
|
|
2017-06-07 |
96.860001 |
99.309998 |
96.080002 |
99.230003 |
99.230003 |
687500 |
2017-06-08 |
98.980003 |
100.480003 |
97.820000 |
99.930000 |
99.930000 |
633700 |
2017-06-09 |
100.419998 |
100.949997 |
93.129997 |
95.989998 |
95.989998 |
1135700 |
2017-06-12 |
95.989998 |
98.760002 |
91.510002 |
98.650002 |
98.650002 |
1133200 |
2017-06-13 |
99.430000 |
101.540001 |
98.500000 |
100.279999 |
100.279999 |
801200 |
|
Open |
High |
Low |
Close |
Adj Close |
Volume |
Date |
|
|
|
|
|
|
2018-06-01 |
62.820000 |
64.349998 |
62.820000 |
63.730000 |
63.730000 |
619600 |
2018-06-04 |
64.099998 |
65.639999 |
64.099998 |
65.360001 |
65.360001 |
659500 |
2018-06-05 |
65.290001 |
66.080002 |
65.000000 |
65.919998 |
65.919998 |
510500 |
2018-06-06 |
66.220001 |
69.250000 |
65.949997 |
68.349998 |
68.349998 |
933200 |
2018-06-07 |
68.160004 |
69.290001 |
67.290001 |
69.040001 |
69.040001 |
625500 |
df = IRBT[['Open','High','Low','Close', 'Volume']]
df['HL_PCT'] = (df['High']-df['Close'])/ df['Close'] * 100.0
df['PCT_Change'] = (df['Close']-df['Open'])/ df['Open'] * 100.0
df = df [['Close','HL_PCT','PCT_Change','Volume']]
|
Close |
HL_PCT |
PCT_Change |
Volume |
Date |
|
|
|
|
2018-06-01 |
63.730000 |
0.972851 |
1.448583 |
619600 |
2018-06-04 |
65.360001 |
0.428394 |
1.965683 |
659500 |
2018-06-05 |
65.919998 |
0.242725 |
0.964921 |
510500 |
2018-06-06 |
68.349998 |
1.316755 |
3.216546 |
933200 |
2018-06-07 |
69.040001 |
0.362109 |
1.291075 |
625500 |
#Define a label -- Adj Price in the future
forecast_col = 'Close'
df.fillna(-99999, inplace=True)
forecast_out = int(math.ceil(0.01*len(df)))
df['Label'] = df[forecast_col].shift(-forecast_out)
df.dropna(inplace=True)
df.head()
|
Close |
HL_PCT |
PCT_Change |
Volume |
Label |
Date |
|
|
|
|
|
2017-06-07 |
99.230003 |
0.080616 |
2.446833 |
687500 |
98.650002 |
2017-06-08 |
99.930000 |
0.550388 |
0.959787 |
633700 |
100.279999 |
2017-06-09 |
95.989998 |
5.167204 |
-4.411472 |
1135700 |
101.989998 |
2017-06-12 |
98.650002 |
0.111505 |
2.771126 |
1133200 |
97.970001 |
2017-06-13 |
100.279999 |
1.256484 |
0.854872 |
801200 |
96.269997 |
df[['Close','Label']].plot(subplots=False,legend=True, figsize=(10,4))
<matplotlib.axes._subplots.AxesSubplot at 0x121a8f588>
#Define features X and labels y
X = np.array(df.drop(['Label'],1))
y = np.array(df['Label'])
#Scale X
X = preprocessing.scale(X)
y = np.array(df['Label'])
print(len(X),len(y))
#Create training and testing sets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)
#Define a classifier and test it .fit=train / .score=test
classf = LinearRegression()
classf.fit(X_train, y_train)
accuracy = classf.score(X_test, y_test)
print(forecast_out)
print(accuracy)
#Using the Support Vector Machine algo
classf = svm.SVR()
classf.fit(X_train, y_train)
accuracy = classf.score(X_test, y_test)
print(forecast_out)
print(accuracy)
#Forecasting into the future...
df['Label'] = df[forecast_col].shift(-forecast_out)
X = np.array(df.drop(['Label'],1))
X = preprocessing.scale(X)
X_lately = X[-forecast_out:]
X = X[:-forecast_out]
df.dropna(inplace=True)
y = np.array(df['Label'])
#Create training and testing sets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)
#Define a classifier and test it .fit=train / .score=test
classf = LinearRegression()
classf.fit(X_train, y_train)
accuracy = classf.score(X_test, y_test)
print(forecast_out)
print(accuracy)
forecast_set = classf.predict(X_lately)
print (forecast_set, accuracy, forecast_out)
[ 62.91718632 63.80245711 65.64647982] 0.925348943641 3
last_date = df.iloc[-1].name
last_unix = last_date.timestamp()
one_day = 86400
next_unix = last_unix + one_day
for i in forecast_set:
next_date = datetime.datetime.fromtimestamp(next_unix)
next_unix += one_day
df.loc[next_date] = [np.nan for _ in range(len(df.columns)-1)]+ [i]
df['Close'].plot()
df['Forecast'].plot()
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()
df[['Close','Forecast']].plot(subplots=False,legend=True, figsize=(10,4))
<matplotlib.axes._subplots.AxesSubplot at 0x121f011d0>
|
Close |
HL_PCT |
PCT_Change |
Volume |
Label |
Forecast |
2017-06-07 |
99.230003 |
0.080616 |
2.446833 |
687500.0 |
98.650002 |
NaN |
2017-06-08 |
99.930000 |
0.550388 |
0.959787 |
633700.0 |
100.279999 |
NaN |
2017-06-09 |
95.989998 |
5.167204 |
-4.411472 |
1135700.0 |
101.989998 |
NaN |
2017-06-12 |
98.650002 |
0.111505 |
2.771126 |
1133200.0 |
97.970001 |
NaN |
2017-06-13 |
100.279999 |
1.256484 |
0.854872 |
801200.0 |
96.269997 |
NaN |
2017-06-14 |
101.989998 |
2.568882 |
1.644411 |
1283400.0 |
98.779999 |
NaN |
2017-06-15 |
97.970001 |
3.143822 |
-2.225545 |
1657400.0 |
97.730003 |
NaN |
2017-06-16 |
96.269997 |
2.690354 |
-2.442241 |
1599000.0 |
101.080002 |
NaN |
2017-06-19 |
98.779999 |
0.496050 |
1.877063 |
883400.0 |
100.430000 |
NaN |
2017-06-20 |
97.730003 |
2.302261 |
-1.282825 |
604500.0 |
101.199997 |
NaN |
2017-06-21 |
101.080002 |
1.226749 |
2.723577 |
780200.0 |
100.980003 |
NaN |
2017-06-22 |
100.430000 |
1.214778 |
-0.554508 |
560200.0 |
90.650002 |
NaN |
2017-06-23 |
101.199997 |
1.482213 |
0.806851 |
696400.0 |
92.470001 |
NaN |
2017-06-26 |
100.980003 |
1.871658 |
-0.951441 |
392100.0 |
86.779999 |
NaN |
2017-06-27 |
90.650002 |
10.821839 |
-9.675168 |
1906600.0 |
84.139999 |
NaN |
2017-06-28 |
92.470001 |
1.254457 |
1.581898 |
1359000.0 |
83.540001 |
NaN |
2017-06-29 |
86.779999 |
6.914036 |
-6.426568 |
1299900.0 |
82.180000 |
NaN |
2017-06-30 |
84.139999 |
3.220821 |
-2.412436 |
1125500.0 |
80.540001 |
NaN |
2017-07-03 |
83.540001 |
1.460379 |
0.288113 |
886300.0 |
83.650002 |
NaN |
2017-07-05 |
82.180000 |
2.579707 |
-1.035647 |
1140600.0 |
84.470001 |
NaN |
2017-07-06 |
80.540001 |
1.092621 |
-0.297101 |
1400600.0 |
83.239998 |
NaN |
2017-07-07 |
83.650002 |
1.016136 |
4.171854 |
989000.0 |
85.019997 |
NaN |
2017-07-10 |
84.470001 |
0.627441 |
0.607434 |
776500.0 |
84.620003 |
NaN |
2017-07-11 |
83.239998 |
2.570878 |
-1.257420 |
812600.0 |
83.989998 |
NaN |
2017-07-12 |
85.019997 |
0.270528 |
1.069893 |
814900.0 |
84.900002 |
NaN |
2017-07-13 |
84.620003 |
1.217205 |
-1.202567 |
527500.0 |
84.750000 |
NaN |
2017-07-14 |
83.989998 |
1.154901 |
-0.896758 |
424200.0 |
86.769997 |
NaN |
2017-07-17 |
84.900002 |
0.895173 |
1.035348 |
556900.0 |
85.000000 |
NaN |
2017-07-18 |
84.750000 |
0.436582 |
-0.200186 |
554000.0 |
88.470001 |
NaN |
2017-07-19 |
86.769997 |
0.806735 |
2.070339 |
559200.0 |
90.379997 |
NaN |
... |
... |
... |
... |
... |
... |
... |
2018-04-23 |
59.950001 |
5.054210 |
-4.018569 |
2061800.0 |
58.240002 |
NaN |
2018-04-24 |
59.080002 |
4.807034 |
-2.039461 |
2178300.0 |
58.580002 |
NaN |
2018-04-25 |
57.060001 |
8.499823 |
-7.789269 |
2951800.0 |
58.360001 |
NaN |
2018-04-26 |
58.240002 |
2.970465 |
2.050117 |
1423100.0 |
57.830002 |
NaN |
2018-04-27 |
58.580002 |
1.502214 |
0.652923 |
1150700.0 |
58.310001 |
NaN |
2018-04-30 |
58.360001 |
0.822479 |
-0.273409 |
616500.0 |
57.180000 |
NaN |
2018-05-01 |
57.830002 |
0.916478 |
-0.344644 |
709300.0 |
60.020000 |
NaN |
2018-05-02 |
58.310001 |
0.840333 |
1.144838 |
549700.0 |
61.759998 |
NaN |
2018-05-03 |
57.180000 |
2.168587 |
-1.464758 |
654200.0 |
62.270000 |
NaN |
2018-05-04 |
60.020000 |
0.216598 |
5.520392 |
850600.0 |
62.060001 |
NaN |
2018-05-07 |
61.759998 |
0.259067 |
2.404240 |
824500.0 |
61.930000 |
NaN |
2018-05-08 |
62.270000 |
0.112414 |
0.630250 |
497600.0 |
62.360001 |
NaN |
2018-05-09 |
62.060001 |
0.805672 |
-0.369241 |
517900.0 |
62.299999 |
NaN |
2018-05-10 |
61.930000 |
1.356370 |
-0.625802 |
584900.0 |
61.259998 |
NaN |
2018-05-11 |
62.360001 |
0.384857 |
0.694334 |
485200.0 |
62.310001 |
NaN |
2018-05-14 |
62.299999 |
1.364371 |
-0.272134 |
509600.0 |
62.880001 |
NaN |
2018-05-15 |
61.259998 |
1.142672 |
-0.969937 |
661000.0 |
62.430000 |
NaN |
2018-05-16 |
62.310001 |
0.866630 |
1.070558 |
462000.0 |
62.889999 |
NaN |
2018-05-17 |
62.880001 |
0.795165 |
0.930982 |
693100.0 |
63.160000 |
NaN |
2018-05-18 |
62.430000 |
0.736824 |
-0.731434 |
464000.0 |
61.869999 |
NaN |
2018-05-21 |
62.889999 |
0.079504 |
0.143312 |
633900.0 |
62.389999 |
NaN |
2018-05-22 |
63.160000 |
2.184929 |
-0.158075 |
621600.0 |
62.730000 |
NaN |
2018-05-23 |
61.869999 |
1.874899 |
-1.574932 |
654600.0 |
64.839996 |
NaN |
2018-05-24 |
62.389999 |
0.112197 |
0.873082 |
434200.0 |
65.300003 |
NaN |
2018-05-25 |
62.730000 |
0.031883 |
0.593327 |
380900.0 |
62.410000 |
NaN |
2018-05-29 |
64.839996 |
0.046279 |
3.413070 |
739800.0 |
63.730000 |
NaN |
2018-05-30 |
65.300003 |
0.459410 |
0.554359 |
644800.0 |
65.360001 |
NaN |
2018-05-31 |
NaN |
NaN |
NaN |
NaN |
NaN |
62.917186 |
2018-06-01 |
NaN |
NaN |
NaN |
NaN |
NaN |
63.802457 |
2018-06-02 |
NaN |
NaN |
NaN |
NaN |
NaN |
65.646480 |
250 rows × 6 columns