iRobot Stock Analysis
4 minute read
In this Jupyter Notebook I have done some exploratory data analysis of iRobot stock.
import quandl
import pandas as pd
import numpy as np
import pandas_datareader as pdr
#from datetime import datetime
import datetime
from __future__ import division
import math
from sklearn import preprocessing , cross_validation , svm
from sklearn.linear_model import LinearRegression
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
sns . set_style ( 'whitegrid' )
% matplotlib inline
#Set times, -1 = yearg ago from now
end = datetime . now ()
print ( end )
start = datetime ( end . year - 1 , end . month , end . day )
print ( start )
2018-06-07 22:12:03.122418
2017-06-07 00:00:00
#Import data from yahoo and assign it to IRBT
IRBT = pdr . DataReader ( 'IRBT' , 'yahoo' , start , end )
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 253 entries, 2017-06-07 to 2018-06-07
Data columns (total 6 columns):
Open 253 non-null float64
High 253 non-null float64
Low 253 non-null float64
Close 253 non-null float64
Adj Close 253 non-null float64
Volume 253 non-null int64
dtypes: float64(5), int64(1)
memory usage: 13.8 KB
#View historical trend of the closing price Adj Close column
IRBT [ 'Adj Close' ]. plot ( legend = True , figsize = ( 10 , 4 ))
<matplotlib.axes._subplots.AxesSubplot at 0x11efda048>
Open
High
Low
Close
Adj Close
Volume
Date
2017-06-07
96.860001
99.309998
96.080002
99.230003
99.230003
687500
2017-06-08
98.980003
100.480003
97.820000
99.930000
99.930000
633700
2017-06-09
100.419998
100.949997
93.129997
95.989998
95.989998
1135700
2017-06-12
95.989998
98.760002
91.510002
98.650002
98.650002
1133200
2017-06-13
99.430000
101.540001
98.500000
100.279999
100.279999
801200
Open
High
Low
Close
Adj Close
Volume
Date
2018-06-01
62.820000
64.349998
62.820000
63.730000
63.730000
619600
2018-06-04
64.099998
65.639999
64.099998
65.360001
65.360001
659500
2018-06-05
65.290001
66.080002
65.000000
65.919998
65.919998
510500
2018-06-06
66.220001
69.250000
65.949997
68.349998
68.349998
933200
2018-06-07
68.160004
69.290001
67.290001
69.040001
69.040001
625500
df = IRBT [[ 'Open' , 'High' , 'Low' , 'Close' , 'Volume' ]]
df [ 'HL_PCT' ] = ( df [ 'High' ] - df [ 'Close' ]) / df [ 'Close' ] * 100.0
df [ 'PCT_Change' ] = ( df [ 'Close' ] - df [ 'Open' ]) / df [ 'Open' ] * 100.0
df = df [[ 'Close' , 'HL_PCT' , 'PCT_Change' , 'Volume' ]]
Close
HL_PCT
PCT_Change
Volume
Date
2018-06-01
63.730000
0.972851
1.448583
619600
2018-06-04
65.360001
0.428394
1.965683
659500
2018-06-05
65.919998
0.242725
0.964921
510500
2018-06-06
68.349998
1.316755
3.216546
933200
2018-06-07
69.040001
0.362109
1.291075
625500
#Define a label -- Adj Price in the future
forecast_col = 'Close'
df . fillna ( - 99999 , inplace = True )
forecast_out = int ( math . ceil ( 0.01 * len ( df )))
df [ 'Label' ] = df [ forecast_col ]. shift ( - forecast_out )
df . dropna ( inplace = True )
df . head ()
Close
HL_PCT
PCT_Change
Volume
Label
Date
2017-06-07
99.230003
0.080616
2.446833
687500
98.650002
2017-06-08
99.930000
0.550388
0.959787
633700
100.279999
2017-06-09
95.989998
5.167204
-4.411472
1135700
101.989998
2017-06-12
98.650002
0.111505
2.771126
1133200
97.970001
2017-06-13
100.279999
1.256484
0.854872
801200
96.269997
df [[ 'Close' , 'Label' ]]. plot ( subplots = False , legend = True , figsize = ( 10 , 4 ))
<matplotlib.axes._subplots.AxesSubplot at 0x121a8f588>
#Define features X and labels y
X = np . array ( df . drop ([ 'Label' ], 1 ))
y = np . array ( df [ 'Label' ])
#Scale X
X = preprocessing . scale ( X )
y = np . array ( df [ 'Label' ])
print ( len ( X ), len ( y ))
#Create training and testing sets
X_train , X_test , y_train , y_test = cross_validation . train_test_split ( X , y , test_size = 0.2 )
#Define a classifier and test it .fit=train / .score=test
classf = LinearRegression ()
classf . fit ( X_train , y_train )
accuracy = classf . score ( X_test , y_test )
print ( forecast_out )
print ( accuracy )
#Using the Support Vector Machine algo
classf = svm . SVR ()
classf . fit ( X_train , y_train )
accuracy = classf . score ( X_test , y_test )
print ( forecast_out )
print ( accuracy )
#Forecasting into the future...
df [ 'Label' ] = df [ forecast_col ]. shift ( - forecast_out )
X = np . array ( df . drop ([ 'Label' ], 1 ))
X = preprocessing . scale ( X )
X_lately = X [ - forecast_out :]
X = X [: - forecast_out ]
df . dropna ( inplace = True )
y = np . array ( df [ 'Label' ])
#Create training and testing sets
X_train , X_test , y_train , y_test = cross_validation . train_test_split ( X , y , test_size = 0.2 )
#Define a classifier and test it .fit=train / .score=test
classf = LinearRegression ()
classf . fit ( X_train , y_train )
accuracy = classf . score ( X_test , y_test )
print ( forecast_out )
print ( accuracy )
forecast_set = classf . predict ( X_lately )
print ( forecast_set , accuracy , forecast_out )
[ 62.91718632 63.80245711 65.64647982] 0.925348943641 3
last_date = df . iloc [ - 1 ]. name
last_unix = last_date . timestamp ()
one_day = 86400
next_unix = last_unix + one_day
for i in forecast_set :
next_date = datetime . datetime . fromtimestamp ( next_unix )
next_unix += one_day
df . loc [ next_date ] = [ np . nan for _ in range ( len ( df . columns ) - 1 )] + [ i ]
df [ 'Close' ]. plot ()
df [ 'Forecast' ]. plot ()
plt . xlabel ( 'Date' )
plt . ylabel ( 'Price' )
plt . show ()
df [[ 'Close' , 'Forecast' ]]. plot ( subplots = False , legend = True , figsize = ( 10 , 4 ))
<matplotlib.axes._subplots.AxesSubplot at 0x121f011d0>
Close
HL_PCT
PCT_Change
Volume
Label
Forecast
2017-06-07
99.230003
0.080616
2.446833
687500.0
98.650002
NaN
2017-06-08
99.930000
0.550388
0.959787
633700.0
100.279999
NaN
2017-06-09
95.989998
5.167204
-4.411472
1135700.0
101.989998
NaN
2017-06-12
98.650002
0.111505
2.771126
1133200.0
97.970001
NaN
2017-06-13
100.279999
1.256484
0.854872
801200.0
96.269997
NaN
2017-06-14
101.989998
2.568882
1.644411
1283400.0
98.779999
NaN
2017-06-15
97.970001
3.143822
-2.225545
1657400.0
97.730003
NaN
2017-06-16
96.269997
2.690354
-2.442241
1599000.0
101.080002
NaN
2017-06-19
98.779999
0.496050
1.877063
883400.0
100.430000
NaN
2017-06-20
97.730003
2.302261
-1.282825
604500.0
101.199997
NaN
2017-06-21
101.080002
1.226749
2.723577
780200.0
100.980003
NaN
2017-06-22
100.430000
1.214778
-0.554508
560200.0
90.650002
NaN
2017-06-23
101.199997
1.482213
0.806851
696400.0
92.470001
NaN
2017-06-26
100.980003
1.871658
-0.951441
392100.0
86.779999
NaN
2017-06-27
90.650002
10.821839
-9.675168
1906600.0
84.139999
NaN
2017-06-28
92.470001
1.254457
1.581898
1359000.0
83.540001
NaN
2017-06-29
86.779999
6.914036
-6.426568
1299900.0
82.180000
NaN
2017-06-30
84.139999
3.220821
-2.412436
1125500.0
80.540001
NaN
2017-07-03
83.540001
1.460379
0.288113
886300.0
83.650002
NaN
2017-07-05
82.180000
2.579707
-1.035647
1140600.0
84.470001
NaN
2017-07-06
80.540001
1.092621
-0.297101
1400600.0
83.239998
NaN
2017-07-07
83.650002
1.016136
4.171854
989000.0
85.019997
NaN
2017-07-10
84.470001
0.627441
0.607434
776500.0
84.620003
NaN
2017-07-11
83.239998
2.570878
-1.257420
812600.0
83.989998
NaN
2017-07-12
85.019997
0.270528
1.069893
814900.0
84.900002
NaN
2017-07-13
84.620003
1.217205
-1.202567
527500.0
84.750000
NaN
2017-07-14
83.989998
1.154901
-0.896758
424200.0
86.769997
NaN
2017-07-17
84.900002
0.895173
1.035348
556900.0
85.000000
NaN
2017-07-18
84.750000
0.436582
-0.200186
554000.0
88.470001
NaN
2017-07-19
86.769997
0.806735
2.070339
559200.0
90.379997
NaN
...
...
...
...
...
...
...
2018-04-23
59.950001
5.054210
-4.018569
2061800.0
58.240002
NaN
2018-04-24
59.080002
4.807034
-2.039461
2178300.0
58.580002
NaN
2018-04-25
57.060001
8.499823
-7.789269
2951800.0
58.360001
NaN
2018-04-26
58.240002
2.970465
2.050117
1423100.0
57.830002
NaN
2018-04-27
58.580002
1.502214
0.652923
1150700.0
58.310001
NaN
2018-04-30
58.360001
0.822479
-0.273409
616500.0
57.180000
NaN
2018-05-01
57.830002
0.916478
-0.344644
709300.0
60.020000
NaN
2018-05-02
58.310001
0.840333
1.144838
549700.0
61.759998
NaN
2018-05-03
57.180000
2.168587
-1.464758
654200.0
62.270000
NaN
2018-05-04
60.020000
0.216598
5.520392
850600.0
62.060001
NaN
2018-05-07
61.759998
0.259067
2.404240
824500.0
61.930000
NaN
2018-05-08
62.270000
0.112414
0.630250
497600.0
62.360001
NaN
2018-05-09
62.060001
0.805672
-0.369241
517900.0
62.299999
NaN
2018-05-10
61.930000
1.356370
-0.625802
584900.0
61.259998
NaN
2018-05-11
62.360001
0.384857
0.694334
485200.0
62.310001
NaN
2018-05-14
62.299999
1.364371
-0.272134
509600.0
62.880001
NaN
2018-05-15
61.259998
1.142672
-0.969937
661000.0
62.430000
NaN
2018-05-16
62.310001
0.866630
1.070558
462000.0
62.889999
NaN
2018-05-17
62.880001
0.795165
0.930982
693100.0
63.160000
NaN
2018-05-18
62.430000
0.736824
-0.731434
464000.0
61.869999
NaN
2018-05-21
62.889999
0.079504
0.143312
633900.0
62.389999
NaN
2018-05-22
63.160000
2.184929
-0.158075
621600.0
62.730000
NaN
2018-05-23
61.869999
1.874899
-1.574932
654600.0
64.839996
NaN
2018-05-24
62.389999
0.112197
0.873082
434200.0
65.300003
NaN
2018-05-25
62.730000
0.031883
0.593327
380900.0
62.410000
NaN
2018-05-29
64.839996
0.046279
3.413070
739800.0
63.730000
NaN
2018-05-30
65.300003
0.459410
0.554359
644800.0
65.360001
NaN
2018-05-31
NaN
NaN
NaN
NaN
NaN
62.917186
2018-06-01
NaN
NaN
NaN
NaN
NaN
63.802457
2018-06-02
NaN
NaN
NaN
NaN
NaN
65.646480
250 rows × 6 columns