iRobot Stock Analysis

In this Jupyter Notebook I have done some exploratory data analysis of iRobot stock.

import quandl
import pandas as pd
import numpy as np
import pandas_datareader as pdr 
#from datetime import datetime
import datetime
from __future__ import division
import math
from sklearn import preprocessing, cross_validation, svm
from sklearn.linear_model import LinearRegression
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#Set times, -1 = yearg ago from now
end =
print (end)

start = datetime(end.year -1, end.month,
print (start)
2018-06-07 22:12:03.122418
2017-06-07 00:00:00
#Import data from yahoo and assign it to IRBT
IRBT = pdr.DataReader('IRBT', 'yahoo', start, end)
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 253 entries, 2017-06-07 to 2018-06-07
Data columns (total 6 columns):
Open         253 non-null float64
High         253 non-null float64
Low          253 non-null float64
Close        253 non-null float64
Adj Close    253 non-null float64
Volume       253 non-null int64
dtypes: float64(5), int64(1)
memory usage: 13.8 KB
#View historical trend of the closing price Adj Close column
IRBT['Adj Close'].plot(legend=True, figsize=(10,4))
Open High Low Close Adj Close Volume
2017-06-07 96.860001 99.309998 96.080002 99.230003 99.230003 687500
2017-06-08 98.980003 100.480003 97.820000 99.930000 99.930000 633700
2017-06-09 100.419998 100.949997 93.129997 95.989998 95.989998 1135700
2017-06-12 95.989998 98.760002 91.510002 98.650002 98.650002 1133200
2017-06-13 99.430000 101.540001 98.500000 100.279999 100.279999 801200
Open High Low Close Adj Close Volume
2018-06-01 62.820000 64.349998 62.820000 63.730000 63.730000 619600
2018-06-04 64.099998 65.639999 64.099998 65.360001 65.360001 659500
2018-06-05 65.290001 66.080002 65.000000 65.919998 65.919998 510500
2018-06-06 66.220001 69.250000 65.949997 68.349998 68.349998 933200
2018-06-07 68.160004 69.290001 67.290001 69.040001 69.040001 625500
df = IRBT[['Open','High','Low','Close', 'Volume']]

df['HL_PCT'] = (df['High']-df['Close'])/ df['Close'] * 100.0

df['PCT_Change'] = (df['Close']-df['Open'])/ df['Open'] * 100.0

df = df [['Close','HL_PCT','PCT_Change','Volume']]
Close HL_PCT PCT_Change Volume
2018-06-01 63.730000 0.972851 1.448583 619600
2018-06-04 65.360001 0.428394 1.965683 659500
2018-06-05 65.919998 0.242725 0.964921 510500
2018-06-06 68.349998 1.316755 3.216546 933200
2018-06-07 69.040001 0.362109 1.291075 625500
#Define a label -- Adj Price in the future
forecast_col = 'Close' 
df.fillna(-99999, inplace=True)
forecast_out = int(math.ceil(0.01*len(df)))

df['Label'] = df[forecast_col].shift(-forecast_out)

Close HL_PCT PCT_Change Volume Label
2017-06-07 99.230003 0.080616 2.446833 687500 98.650002
2017-06-08 99.930000 0.550388 0.959787 633700 100.279999
2017-06-09 95.989998 5.167204 -4.411472 1135700 101.989998
2017-06-12 98.650002 0.111505 2.771126 1133200 97.970001
2017-06-13 100.279999 1.256484 0.854872 801200 96.269997
df[['Close','Label']].plot(subplots=False,legend=True, figsize=(10,4))
#Define features X and labels y

X = np.array(df.drop(['Label'],1))

y = np.array(df['Label'])

#Scale X
X = preprocessing.scale(X)
y = np.array(df['Label'])
250 250
#Create training and testing sets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)

#Define a classifier and test it .fit=train / .score=test
classf = LinearRegression(), y_train)
accuracy = classf.score(X_test, y_test)

#Using the Support Vector Machine algo

classf = svm.SVR(), y_train)
accuracy = classf.score(X_test, y_test)

#Forecasting into the future...
df['Label'] = df[forecast_col].shift(-forecast_out)
X = np.array(df.drop(['Label'],1))
X = preprocessing.scale(X)
X_lately = X[-forecast_out:]
X = X[:-forecast_out]

y = np.array(df['Label'])

#Create training and testing sets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)

#Define a classifier and test it .fit=train / .score=test
classf = LinearRegression(), y_train)
accuracy = classf.score(X_test, y_test)


forecast_set = classf.predict(X_lately)
print (forecast_set, accuracy, forecast_out)
[ 62.91718632  63.80245711  65.64647982] 0.925348943641 3
df['Forecast']= np.nan
last_date = df.iloc[-1].name
last_unix = last_date.timestamp()
one_day = 86400
next_unix = last_unix + one_day

for i in forecast_set:
    next_date = datetime.datetime.fromtimestamp(next_unix)
    next_unix += one_day
    df.loc[next_date] = [np.nan for _ in range(len(df.columns)-1)]+ [i]



df[['Close','Forecast']].plot(subplots=False,legend=True, figsize=(10,4))
Close HL_PCT PCT_Change Volume Label Forecast
2017-06-07 99.230003 0.080616 2.446833 687500.0 98.650002 NaN
2017-06-08 99.930000 0.550388 0.959787 633700.0 100.279999 NaN
2017-06-09 95.989998 5.167204 -4.411472 1135700.0 101.989998 NaN
2017-06-12 98.650002 0.111505 2.771126 1133200.0 97.970001 NaN
2017-06-13 100.279999 1.256484 0.854872 801200.0 96.269997 NaN
2017-06-14 101.989998 2.568882 1.644411 1283400.0 98.779999 NaN
2017-06-15 97.970001 3.143822 -2.225545 1657400.0 97.730003 NaN
2017-06-16 96.269997 2.690354 -2.442241 1599000.0 101.080002 NaN
2017-06-19 98.779999 0.496050 1.877063 883400.0 100.430000 NaN
2017-06-20 97.730003 2.302261 -1.282825 604500.0 101.199997 NaN
2017-06-21 101.080002 1.226749 2.723577 780200.0 100.980003 NaN
2017-06-22 100.430000 1.214778 -0.554508 560200.0 90.650002 NaN
2017-06-23 101.199997 1.482213 0.806851 696400.0 92.470001 NaN
2017-06-26 100.980003 1.871658 -0.951441 392100.0 86.779999 NaN
2017-06-27 90.650002 10.821839 -9.675168 1906600.0 84.139999 NaN
2017-06-28 92.470001 1.254457 1.581898 1359000.0 83.540001 NaN
2017-06-29 86.779999 6.914036 -6.426568 1299900.0 82.180000 NaN
2017-06-30 84.139999 3.220821 -2.412436 1125500.0 80.540001 NaN
2017-07-03 83.540001 1.460379 0.288113 886300.0 83.650002 NaN
2017-07-05 82.180000 2.579707 -1.035647 1140600.0 84.470001 NaN
2017-07-06 80.540001 1.092621 -0.297101 1400600.0 83.239998 NaN
2017-07-07 83.650002 1.016136 4.171854 989000.0 85.019997 NaN
2017-07-10 84.470001 0.627441 0.607434 776500.0 84.620003 NaN
2017-07-11 83.239998 2.570878 -1.257420 812600.0 83.989998 NaN
2017-07-12 85.019997 0.270528 1.069893 814900.0 84.900002 NaN
2017-07-13 84.620003 1.217205 -1.202567 527500.0 84.750000 NaN
2017-07-14 83.989998 1.154901 -0.896758 424200.0 86.769997 NaN
2017-07-17 84.900002 0.895173 1.035348 556900.0 85.000000 NaN
2017-07-18 84.750000 0.436582 -0.200186 554000.0 88.470001 NaN
2017-07-19 86.769997 0.806735 2.070339 559200.0 90.379997 NaN
2018-04-23 59.950001 5.054210 -4.018569 2061800.0 58.240002 NaN
2018-04-24 59.080002 4.807034 -2.039461 2178300.0 58.580002 NaN
2018-04-25 57.060001 8.499823 -7.789269 2951800.0 58.360001 NaN
2018-04-26 58.240002 2.970465 2.050117 1423100.0 57.830002 NaN
2018-04-27 58.580002 1.502214 0.652923 1150700.0 58.310001 NaN
2018-04-30 58.360001 0.822479 -0.273409 616500.0 57.180000 NaN
2018-05-01 57.830002 0.916478 -0.344644 709300.0 60.020000 NaN
2018-05-02 58.310001 0.840333 1.144838 549700.0 61.759998 NaN
2018-05-03 57.180000 2.168587 -1.464758 654200.0 62.270000 NaN
2018-05-04 60.020000 0.216598 5.520392 850600.0 62.060001 NaN
2018-05-07 61.759998 0.259067 2.404240 824500.0 61.930000 NaN
2018-05-08 62.270000 0.112414 0.630250 497600.0 62.360001 NaN
2018-05-09 62.060001 0.805672 -0.369241 517900.0 62.299999 NaN
2018-05-10 61.930000 1.356370 -0.625802 584900.0 61.259998 NaN
2018-05-11 62.360001 0.384857 0.694334 485200.0 62.310001 NaN
2018-05-14 62.299999 1.364371 -0.272134 509600.0 62.880001 NaN
2018-05-15 61.259998 1.142672 -0.969937 661000.0 62.430000 NaN
2018-05-16 62.310001 0.866630 1.070558 462000.0 62.889999 NaN
2018-05-17 62.880001 0.795165 0.930982 693100.0 63.160000 NaN
2018-05-18 62.430000 0.736824 -0.731434 464000.0 61.869999 NaN
2018-05-21 62.889999 0.079504 0.143312 633900.0 62.389999 NaN
2018-05-22 63.160000 2.184929 -0.158075 621600.0 62.730000 NaN
2018-05-23 61.869999 1.874899 -1.574932 654600.0 64.839996 NaN
2018-05-24 62.389999 0.112197 0.873082 434200.0 65.300003 NaN
2018-05-25 62.730000 0.031883 0.593327 380900.0 62.410000 NaN
2018-05-29 64.839996 0.046279 3.413070 739800.0 63.730000 NaN
2018-05-30 65.300003 0.459410 0.554359 644800.0 65.360001 NaN
2018-05-31 NaN NaN NaN NaN NaN 62.917186
2018-06-01 NaN NaN NaN NaN NaN 63.802457
2018-06-02 NaN NaN NaN NaN NaN 65.646480

250 rows × 6 columns