Sunday 19 July 2020

Machine Learning with Python note 2

array1 = np.array(list1)
array2 = np.array(list2)
array3 = array1/array2

np.arrange(10)
generate array 0 - 9

np.arrange(5, 56, 10)
generate array begin with 5, increment step by 10, end at 56, exclude 56

np.linspace(10, 100, 5)
generate array with 5 elements from 10 to 100, equal space

array1.reshape(8, 3)
convert array1 into 8 rows x 3 columns matrix

result = array2.reshape(2,2,6)
result has 2 rows, each row has 2 columns, each column has 6 elements

array3[array3 % 2 == 0]
filter array3, return even number

np.zeros(50)
create array with 50 0

np.zeros((3, 5)) + 6
create 3 x 5 matrix with all values 6

np.ones((5, 9))
creatre 5 x 9 matrix with all values 1

np.eye(5)
identity matrix
1,0,0,0,0
0,1,0,0,0
0,0,1,0,0
0,0,0,1,0
0,0,0,0,1

np.eye(5,6)
1,0,0,0,0,0
0,1,0,0,0,0
0,0,1,0,0,0
0,0,0,1,0,0
0,0,0,0,1,0

import seaborn as sns
sns.distplot(pd['col'], kde=True)
analyze the spread of col, histogram

plot = sns.countplot(x = 'col', data = pd)
plot.set_xticklabes(plot.get_xticklabels(), rotation = 45)
bar plot rotate xlabels 45

sns.regplot(x = 'col1', y = 'col2', data = pd)
scatter plot, then fit a line

pd['col'].quantile([0.5, 0.7, 0.9])
find col median value, values at 70% and 90%

new_frame = pd.loc[pd['col1'] < pd['col1'].quantile(0.95)]
filter frame ingnore top 5% in col1

find_cols = [cols for cols in pd.columns if 'abc' is in cols]
find columns that contains abc in name

fig, axs = plt.subplots(nrows = 3, ncols = 3, figsize = (10, 10))
for i in range(0, 9):
   rows = i // 3
   cols = i % 3
   ax = axs[rows, cols]
   plot = sns.regplot(x = pd.columns[i], y = 'col2', data = pd, ax = ax)
create 3 x 3 subplots showing scatter plot for first 9 columns with relation to col2

plot = sns.boxplot(x = 'col1', y = 'col2', data = pd)
box plot shows median and range that most of data reside

pd.describe(include='all')
shows mean, std, min, max, 25%...

pd.info()
shows column data types

import matplotlib.pyplot as plt
%matplotlib inline
pd.hist(figsize=(20,30))
histogram

pd.pivot_table(pd, index=['col1', 'col2'], columns=['col3'], aggfunc=len)
pivot table

sns.pairplot(pd)
scatter plot of 2 columns in pd, hist plot if 2 columns are same

No comments:

Post a Comment