Numpy Basics

3 minute read

Numpy Basics

# import packages
import pandas as pd
import numpy as np

# Creating an array linearly-spaced
np.linspace(0,10,20)
array([ 0.        ,  0.52631579,  1.05263158,  1.57894737,  2.10526316,
        2.63157895,  3.15789474,  3.68421053,  4.21052632,  4.73684211,
        5.26315789,  5.78947368,  6.31578947,  6.84210526,  7.36842105,
        7.89473684,  8.42105263,  8.94736842,  9.47368421, 10.        ])
# return an array with ones in the diagonal and zero otherwise
np.eye(5)
array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])
# create an array of random numbers
np.random.rand(4,4)
array([[0.23912098, 0.26176023, 0.65033571, 0.26975701],
       [0.31568221, 0.89199535, 0.19985759, 0.15676016],
       [0.47552317, 0.00933197, 0.1337214 , 0.95415866],
       [0.42100403, 0.04881252, 0.96221299, 0.1989001 ]])
# random integer from 1 - 100 third parameters defines how many numbers we want returned
np.random.randint(1,100,20)
array([13, 17, 22, 68,  1,  7, 55, 10, 89, 22, 28, 29, 57,  2, 73, 26, 54,
       16, 32,  9])
# setting the seed to generate random numbers
# good for reproducibility
np.random.seed(42)
np.random.rand(4)

array([0.37454012, 0.95071431, 0.73199394, 0.59865848])

# array attributes and methods
arr = np.arange(25)
arr

array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24])

ranarr = np.random.randint(0, 50, 10)
ranarr

array([38, 18, 22, 10, 10, 23, 35, 39, 23, 2])

arr.shape

(25,)

# reshape array
arr.reshape(5,5)
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])
# return max value
ranarr.max()
39

# return min value
2

# returning the index location
ranarr.argmax()
7

ranarr.argmin()
9


# Indexing and selection
arr1 = np.arange(0,11)
arr1

array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

# return the value at index 8
arr1[8]

8

# select form 1:5
arr1[1:5]
array([1, 2, 3, 4])

arr1[:5]
array([0, 1, 2, 3, 4])

arr1[5:]
array([ 5,  6,  7,  8,  9, 10])

# Broadcasting -- performing functions and operations across the array
# adding a number to every single number of the array
# changes are not permanent

arr1 + 100
array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110])

# divide every single number of the array
arr1 / 2
array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. ])

# multiply every single number of the array
arr1 ** 2
array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100])

# create a new array by slicing the original array
slice_of_array = arr1[0:6]
slice_of_array
array([0, 1, 2, 3, 4, 5])

# update array's values
slice_of_array[:] = 99
slice_of_array
array([99, 99, 99, 99, 99, 99])

# since slice_of_array was created by slicing the array when we broadcast to slice_of_array
# the original array gets affected, too.
arr1
array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

# To prevent this we have to explicitly create a copy using 'arr.copy'
arr1_copy = arr1.copy()
arr1_copy[:] = 10000
arr1_copy

array([10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000,
       10000, 10000])
# Indexing on a 2D array
# create a 2D array
arr_2d = np.array([[5, 10, 15],[20, 25, 30],[35, 40, 45]])
arr_2d
array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])
arr_2d.shape
(3, 3)

# Slicing a 2D array, index of row & index of column
# slicing to get row in index 1 [20, 25, 30]
arr_2d[1]
array([20, 25, 30])

# to get an element by row and column
arr_2d[1][1]
25

# or
arr_2d[1,1]
25

# to get the '45'
arr_2d[2, 2]
45

# slicing subsets of the array 10 & 15 from the first row
# and 25, 30 from second row
arr_2d[:2,1:]

array([[10, 15],
       [25, 30]])

# Conditional Selection
# allows us to use conditional operators to grab elements
arr2 = np.arange(1,11)
arr2
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

# Return boolean for elements that are greater than 4
arr2  > 4
array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

bool_arr = arr2 > 4
bool_arr
array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

arr2[bool_arr]
array([ 5,  6,  7,  8,  9, 10])

arr2[arr2<=6]
array([1, 2, 3, 4, 5, 6])
# Apply square root to all elements of the array
np.sqrt(arr2)
array([1.        , 1.41421356, 1.73205081, 2.        , 2.23606798,
       2.44948974, 2.64575131, 2.82842712, 3.        , 3.16227766])
# apply the log
np.log(arr2)
array([0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791,
       1.79175947, 1.94591015, 2.07944154, 2.19722458, 2.30258509])
# getting the sum of the array
arr2.sum()
55

# get mean of the array
# mean
arr2.mean()
5.5

Tags:

Updated: