array1 = np.array(list1)
array2 = np.array(list2)
array3 = array1/array2
np.arrange(10)
generate array 0 - 9
np.arrange(5, 56, 10)
generate array begin with 5, increment step by 10, end at 56, exclude 56
np.linspace(10, 100, 5)
generate array with 5 elements from 10 to 100, equal space
array1.reshape(8, 3)
convert array1 into 8 rows x 3 columns matrix
result = array2.reshape(2,2,6)
result has 2 rows, each row has 2 columns, each column has 6 elements
array3[array3 % 2 == 0]
filter array3, return even number
np.zeros(50)
create array with 50 0
np.zeros((3, 5)) + 6
create 3 x 5 matrix with all values 6
np.ones((5, 9))
creatre 5 x 9 matrix with all values 1
np.eye(5)
identity matrix
1,0,0,0,0
0,1,0,0,0
0,0,1,0,0
0,0,0,1,0
0,0,0,0,1
np.eye(5,6)
1,0,0,0,0,0
0,1,0,0,0,0
0,0,1,0,0,0
0,0,0,1,0,0
0,0,0,0,1,0
import seaborn as sns
sns.distplot(pd['col'], kde=True)
analyze the spread of col, histogram
plot = sns.countplot(x = 'col', data = pd)
plot.set_xticklabes(plot.get_xticklabels(), rotation = 45)
bar plot rotate xlabels 45
sns.regplot(x = 'col1', y = 'col2', data = pd)
scatter plot, then fit a line
pd['col'].quantile([0.5, 0.7, 0.9])
find col median value, values at 70% and 90%
new_frame = pd.loc[pd['col1'] < pd['col1'].quantile(0.95)]
filter frame ingnore top 5% in col1
find_cols = [cols for cols in pd.columns if 'abc' is in cols]
find columns that contains abc in name
fig, axs = plt.subplots(nrows = 3, ncols = 3, figsize = (10, 10))
for i in range(0, 9):
rows = i // 3
cols = i % 3
ax = axs[rows, cols]
plot = sns.regplot(x = pd.columns[i], y = 'col2', data = pd, ax = ax)
create 3 x 3 subplots showing scatter plot for first 9 columns with relation to col2
plot = sns.boxplot(x = 'col1', y = 'col2', data = pd)
box plot shows median and range that most of data reside
pd.describe(include='all')
shows mean, std, min, max, 25%...
pd.info()
shows column data types
import matplotlib.pyplot as plt
%matplotlib inline
pd.hist(figsize=(20,30))
histogram
pd.pivot_table(pd, index=['col1', 'col2'], columns=['col3'], aggfunc=len)
pivot table
sns.pairplot(pd)
scatter plot of 2 columns in pd, hist plot if 2 columns are same
No comments:
Post a Comment